将yahoo mall 的商品列表.csv 下载到本地按照 SKU 进行命名文件夹
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. const fs = require('fs');
  2. const path = require('path');
  3. const { execSync } = require('child_process');
  4. // 检查并安装依赖
  5. function ensureDependencies() {
  6. const dependencies = {
  7. 'csv-parse': '^4.16.3',
  8. 'axios': '^1.6.0'
  9. };
  10. const packageJsonPath = path.join(__dirname, 'package.json');
  11. const nodeModulesPath = path.join(__dirname, 'node_modules');
  12. // 检查package.json是否存在
  13. if (!fs.existsSync(packageJsonPath)) {
  14. console.log('创建package.json...');
  15. const packageJson = {
  16. name: 'yahoo-goods-export',
  17. version: '1.0.0',
  18. private: true,
  19. dependencies: dependencies
  20. };
  21. fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2));
  22. }
  23. // 检查node_modules是否存在
  24. if (!fs.existsSync(nodeModulesPath)) {
  25. console.log('安装依赖,这可能需要几分钟...');
  26. try {
  27. execSync('npm install', { cwd: __dirname, stdio: 'inherit' });
  28. console.log('依赖安装完成');
  29. } catch (error) {
  30. console.error('依赖安装失败:', error.message);
  31. process.exit(1);
  32. }
  33. }
  34. }
  35. // 确保依赖已安装
  36. ensureDependencies();
  37. // 现在导入依赖
  38. const { parse } = require('csv-parse/sync');
  39. const axios = require('axios');
  40. async function downloadImage(url, filepath, retries = 3) {
  41. for (let attempt = 1; attempt <= retries; attempt++) {
  42. try {
  43. const response = await axios({
  44. url,
  45. method: 'GET',
  46. responseType: 'stream',
  47. timeout: 50000, // 50秒超时
  48. maxContentLength: 50 * 1024 * 1024, // 50MB最大文件限制
  49. headers: {
  50. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
  51. }
  52. });
  53. await new Promise((resolve, reject) => {
  54. const writer = fs.createWriteStream(filepath);
  55. response.data.on('error', error => {
  56. writer.close();
  57. reject(error);
  58. });
  59. writer.on('error', error => {
  60. writer.close();
  61. reject(error);
  62. });
  63. writer.on('finish', resolve);
  64. response.data.pipe(writer);
  65. });
  66. return true;
  67. } catch (error) {
  68. console.error(`下载失败 (尝试 ${attempt}/${retries}): ${url}`, error.message);
  69. if (attempt === retries) {
  70. throw new Error(`下载失败 (已重试${retries}次): ${error.message}`);
  71. }
  72. // 等待一段时间后重试
  73. await new Promise(resolve => setTimeout(resolve, 2000 * attempt));
  74. }
  75. }
  76. }
  77. async function processCSV() {
  78. const csvFilePath = path.join(__dirname, '2025-02-07商城商品.csv');
  79. const progressFile = path.join(__dirname, 'progress.json');
  80. // 读取进度
  81. let lastProcessedRow = 0;
  82. if (fs.existsSync(progressFile)) {
  83. try {
  84. const progress = JSON.parse(fs.readFileSync(progressFile, 'utf8'));
  85. lastProcessedRow = progress.lastRow || 0;
  86. console.log(`从上次进度继续:第 ${lastProcessedRow} 行`);
  87. } catch (error) {
  88. console.error('读取进度文件失败:', error);
  89. }
  90. }
  91. // 添加文件存在检查
  92. if (!fs.existsSync(csvFilePath)) {
  93. throw new Error(`CSV文件不存在: ${csvFilePath}`);
  94. }
  95. console.log('开始处理CSV文件...');
  96. console.log(`CSV文件路径: ${csvFilePath}`);
  97. const fileContent = fs.readFileSync(csvFilePath);
  98. const records = parse(fileContent, {
  99. skip_empty_lines: true,
  100. relax_quotes: true,
  101. relax_column_count: true,
  102. encoding: 'utf8',
  103. bom: true
  104. });
  105. console.log(`总行数: ${records.length}`);
  106. // 获取列索引
  107. const headers = records[0];
  108. const codeIndex = headers.indexOf('code'); // C列
  109. const imageUrlsIndex = headers.indexOf('item-image-urls'); // CS列
  110. console.log('code列索引:', codeIndex);
  111. console.log('item-image-urls列索引:', imageUrlsIndex);
  112. // 创建goods文件夹
  113. const goodsDir = path.join(__dirname, 'goods');
  114. if (!fs.existsSync(goodsDir)) {
  115. fs.mkdirSync(goodsDir);
  116. }
  117. // 从上次处理的位置继续
  118. for (let i = Math.max(1, lastProcessedRow); i < records.length; i++) {
  119. console.log(`\n正在处理第 ${i + 1}/${records.length} 行`);
  120. try {
  121. const record = records[i];
  122. // 添加记录长度检查
  123. if (!record || record.length < Math.max(codeIndex, imageUrlsIndex) + 1) {
  124. console.log(`第${i + 1}行: 数据不完整,跳过`);
  125. continue;
  126. }
  127. const folderName = record[codeIndex];
  128. const imageUrls = record[imageUrlsIndex];
  129. if (!folderName || folderName.trim() === '') {
  130. console.log(`第${i + 1}行: code列为空,跳过`);
  131. continue;
  132. }
  133. if (!imageUrls || imageUrls.trim() === '') {
  134. console.log(`第${i + 1}行: item-image-urls列为空,跳过`);
  135. continue;
  136. }
  137. // 创建文件夹
  138. const folderPath = path.join(goodsDir, folderName.trim());
  139. if (!fs.existsSync(folderPath)) {
  140. fs.mkdirSync(folderPath, { recursive: true });
  141. console.log(`创建文件夹: ${folderPath}`);
  142. }
  143. // 处理图片链接
  144. const imageUrlList = imageUrls
  145. .split(';')
  146. .map(url => url.trim())
  147. .filter(url => url && url.length > 0);
  148. console.log(`[${i + 1}/${records.length}] 文件夹 ${folderName} 找到${imageUrlList.length}个图片链接`);
  149. // 下载图片
  150. for (let j = 0; j < imageUrlList.length; j++) {
  151. try {
  152. const imageUrl = imageUrlList[j];
  153. const filename = `image_${j + 1}${path.extname(imageUrl) || '.jpg'}`;
  154. const filepath = path.join(folderPath, filename);
  155. if (fs.existsSync(filepath)) {
  156. console.log(`[${i + 1}/${records.length}] 图片已存在,跳过: ${filepath}`);
  157. continue;
  158. }
  159. console.log(`[${i + 1}/${records.length}] 正在下载: ${imageUrl}`);
  160. await downloadImage(imageUrl, filepath);
  161. console.log(`[${i + 1}/${records.length}] 已保存到: ${filepath}`);
  162. // 添加下载间隔,避免请求过于频繁
  163. await new Promise(resolve => setTimeout(resolve, 1500));
  164. } catch (error) {
  165. console.error(`图片下载失败 [行 ${i + 1}, 图片 ${j + 1}]:`, error.message);
  166. continue;
  167. }
  168. }
  169. // 保存进度
  170. fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
  171. } catch (error) {
  172. console.error(`处理第 ${i + 1} 行时出错:`, error);
  173. // 保存进度后继续处理下一行
  174. fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
  175. continue;
  176. }
  177. }
  178. console.log('\nCSV处理完成!');
  179. // 处理完成后删除进度文件
  180. if (fs.existsSync(progressFile)) {
  181. fs.unlinkSync(progressFile);
  182. }
  183. }
  184. // 运行程序
  185. processCSV().catch(error => {
  186. console.error('程序出错:', error);
  187. });