将yahoo mall 的商品列表.csv 下载到本地按照 SKU 进行命名文件夹
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. const fs = require('fs');
  2. const path = require('path');
  3. const { execSync } = require('child_process');
  4. // 检查并安装依赖
  5. function ensureDependencies() {
  6. const dependencies = {
  7. 'csv-parse': '^4.16.3',
  8. 'axios': '^1.6.0'
  9. };
  10. const packageJsonPath = path.join(__dirname, 'package.json');
  11. const nodeModulesPath = path.join(__dirname, 'node_modules');
  12. // 检查package.json是否存在
  13. if (!fs.existsSync(packageJsonPath)) {
  14. console.log('创建package.json...');
  15. const packageJson = {
  16. name: 'yahoo-goods-export',
  17. version: '1.0.0',
  18. private: true,
  19. dependencies: dependencies
  20. };
  21. fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2));
  22. }
  23. // 检查node_modules是否存在
  24. if (!fs.existsSync(nodeModulesPath)) {
  25. console.log('安装依赖,这可能需要几分钟...');
  26. try {
  27. execSync('npm install', { cwd: __dirname, stdio: 'inherit' });
  28. console.log('依赖安装完成');
  29. } catch (error) {
  30. console.error('依赖安装失败:', error.message);
  31. process.exit(1);
  32. }
  33. }
  34. }
  35. // 确保依赖已安装
  36. ensureDependencies();
  37. // 现在导入依赖
  38. const { parse } = require('csv-parse/sync');
  39. const axios = require('axios');
  40. async function downloadImage(url, filepath, retries = 3) {
  41. for (let attempt = 1; attempt <= retries; attempt++) {
  42. try {
  43. const response = await axios({
  44. url,
  45. method: 'GET',
  46. responseType: 'stream',
  47. timeout: 50000, // 50秒超时
  48. maxContentLength: 50 * 1024 * 1024, // 50MB最大文件限制
  49. headers: {
  50. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
  51. }
  52. });
  53. await new Promise((resolve, reject) => {
  54. const writer = fs.createWriteStream(filepath);
  55. response.data.on('error', error => {
  56. writer.close();
  57. reject(error);
  58. });
  59. writer.on('error', error => {
  60. writer.close();
  61. reject(error);
  62. });
  63. writer.on('finish', resolve);
  64. response.data.pipe(writer);
  65. });
  66. return true;
  67. } catch (error) {
  68. console.error(`下载失败 (尝试 ${attempt}/${retries}): ${url}`, error.message);
  69. if (attempt === retries) {
  70. throw new Error(`下载失败 (已重试${retries}次): ${error.message}`);
  71. }
  72. // 等待一段时间后重试
  73. await new Promise(resolve => setTimeout(resolve, 2000 * attempt));
  74. }
  75. }
  76. }
  77. async function processCSV() {
  78. const csvFilePath = path.join(__dirname, '2025-02-07商城商品.csv');
  79. const progressFile = path.join(__dirname, 'progress.json');
  80. // 读取进度
  81. let lastProcessedRow = 0;
  82. if (fs.existsSync(progressFile)) {
  83. try {
  84. const progress = JSON.parse(fs.readFileSync(progressFile, 'utf8'));
  85. lastProcessedRow = progress.lastRow || 0;
  86. console.log(`从上次进度继续:第 ${lastProcessedRow} 行`);
  87. } catch (error) {
  88. console.error('读取进度文件失败:', error);
  89. }
  90. }
  91. // 添加文件存在检查
  92. if (!fs.existsSync(csvFilePath)) {
  93. throw new Error(`CSV文件不存在: ${csvFilePath}`);
  94. }
  95. console.log('开始处理CSV文件...');
  96. console.log(`CSV文件路径: ${csvFilePath}`);
  97. const fileContent = fs.readFileSync(csvFilePath);
  98. const records = parse(fileContent, {
  99. skip_empty_lines: true,
  100. relax_quotes: true,
  101. relax_column_count: true,
  102. encoding: 'utf8',
  103. bom: true
  104. });
  105. console.log(`总行数: ${records.length}`);
  106. // 获取列索引
  107. const headers = records[0];
  108. const codeIndex = headers.indexOf('code'); // C列
  109. const imageUrlsIndex = headers.indexOf('item-image-urls'); // CS列
  110. console.log('code列索引:', codeIndex);
  111. console.log('item-image-urls列索引:', imageUrlsIndex);
  112. // 创建goods文件夹
  113. const goodsDir = path.join(__dirname, 'goods');
  114. if (!fs.existsSync(goodsDir)) {
  115. fs.mkdirSync(goodsDir);
  116. }
  117. // 从上次处理的位置继续
  118. for (let i = Math.max(1, lastProcessedRow); i < records.length; i++) {
  119. console.log(`\n正在处理第 ${i + 1}/${records.length} 行`);
  120. try {
  121. const record = records[i];
  122. // 添加记录长度检查
  123. if (!record || record.length < Math.max(codeIndex, imageUrlsIndex) + 1) {
  124. console.log(`第${i + 1}行: 数据不完整,跳过`);
  125. continue;
  126. }
  127. const folderName = record[codeIndex];
  128. const imageUrls = record[imageUrlsIndex];
  129. if (!folderName || folderName.trim() === '') {
  130. console.log(`第${i + 1}行: code列为空,跳过`);
  131. continue;
  132. }
  133. if (!imageUrls || imageUrls.trim() === '') {
  134. console.log(`第${i + 1}行: item-image-urls列为空,跳过`);
  135. continue;
  136. }
  137. // 创建文件夹
  138. const folderPath = path.join(goodsDir, folderName.trim());
  139. if (!fs.existsSync(folderPath)) {
  140. fs.mkdirSync(folderPath, { recursive: true });
  141. console.log(`创建文件夹: ${folderPath}`);
  142. }
  143. // 处理图片链接
  144. const imageUrlList = imageUrls
  145. .split(';')
  146. .map(url => url.trim())
  147. .filter(url => url && url.length > 0);
  148. console.log(`[${i + 1}/${records.length}] 文件夹 ${folderName} 找到${imageUrlList.length}个图片链接`);
  149. // 下载图片
  150. for (let j = 0; j < imageUrlList.length; j++) {
  151. try {
  152. const imageUrl = imageUrlList[j];
  153. const filename = `image_${j + 1}${path.extname(imageUrl) || '.jpg'}`;
  154. const filepath = path.join(folderPath, filename);
  155. if (fs.existsSync(filepath)) {
  156. console.log(`[${i + 1}/${records.length}] 图片已存在,跳过: ${filepath}`);
  157. continue;
  158. }
  159. console.log(`[${i + 1}/${records.length}] 正在下载: ${imageUrl}`);
  160. await downloadImage(imageUrl, filepath);
  161. console.log(`[${i + 1}/${records.length}] 已保存到: ${filepath}`);
  162. // 添加下载间隔,避免请求过于频繁
  163. await new Promise(resolve => setTimeout(resolve, 1500));
  164. } catch (error) {
  165. console.error(`图片下载失败 [行 ${i + 1}, 图片 ${j + 1}]:`, error.message);
  166. continue;
  167. }
  168. }
  169. // 保存进度
  170. fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
  171. } catch (error) {
  172. console.error(`处理第 ${i + 1} 行时出错:`, error);
  173. // 保存进度后继续处理下一行
  174. fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
  175. continue;
  176. }
  177. }
  178. console.log('\nCSV处理完成!');
  179. // 处理完成后删除进度文件
  180. if (fs.existsSync(progressFile)) {
  181. fs.unlinkSync(progressFile);
  182. }
  183. }
  184. // 运行程序
  185. processCSV().catch(error => {
  186. console.error('程序出错:', error);
  187. });