const fs = require('fs'); const path = require('path'); const { execSync } = require('child_process'); // 检查并安装依赖 function ensureDependencies() { const dependencies = { 'csv-parse': '^4.16.3', 'axios': '^1.6.0' }; const packageJsonPath = path.join(__dirname, 'package.json'); const nodeModulesPath = path.join(__dirname, 'node_modules'); // 检查package.json是否存在 if (!fs.existsSync(packageJsonPath)) { console.log('创建package.json...'); const packageJson = { name: 'yahoo-goods-export', version: '1.0.0', private: true, dependencies: dependencies }; fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2)); } // 检查node_modules是否存在 if (!fs.existsSync(nodeModulesPath)) { console.log('安装依赖,这可能需要几分钟...'); try { execSync('npm install', { cwd: __dirname, stdio: 'inherit' }); console.log('依赖安装完成'); } catch (error) { console.error('依赖安装失败:', error.message); process.exit(1); } } } // 确保依赖已安装 ensureDependencies(); // 现在导入依赖 const { parse } = require('csv-parse/sync'); const axios = require('axios'); async function downloadImage(url, filepath, retries = 3) { for (let attempt = 1; attempt <= retries; attempt++) { try { const response = await axios({ url, method: 'GET', responseType: 'stream', timeout: 50000, // 50秒超时 maxContentLength: 50 * 1024 * 1024, // 50MB最大文件限制 headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } }); await new Promise((resolve, reject) => { const writer = fs.createWriteStream(filepath); response.data.on('error', error => { writer.close(); reject(error); }); writer.on('error', error => { writer.close(); reject(error); }); writer.on('finish', resolve); response.data.pipe(writer); }); return true; } catch (error) { console.error(`下载失败 (尝试 ${attempt}/${retries}): ${url}`, error.message); if (attempt === retries) { throw new Error(`下载失败 (已重试${retries}次): ${error.message}`); } // 等待一段时间后重试 await new Promise(resolve => setTimeout(resolve, 2000 * attempt)); } } } async function processCSV() { const csvFilePath = path.join(__dirname, '2025-02-07商城商品.csv'); const progressFile = path.join(__dirname, 'progress.json'); // 读取进度 let lastProcessedRow = 0; if (fs.existsSync(progressFile)) { try { const progress = JSON.parse(fs.readFileSync(progressFile, 'utf8')); lastProcessedRow = progress.lastRow || 0; console.log(`从上次进度继续:第 ${lastProcessedRow} 行`); } catch (error) { console.error('读取进度文件失败:', error); } } // 添加文件存在检查 if (!fs.existsSync(csvFilePath)) { throw new Error(`CSV文件不存在: ${csvFilePath}`); } console.log('开始处理CSV文件...'); console.log(`CSV文件路径: ${csvFilePath}`); const fileContent = fs.readFileSync(csvFilePath); const records = parse(fileContent, { skip_empty_lines: true, relax_quotes: true, relax_column_count: true, encoding: 'utf8', bom: true }); console.log(`总行数: ${records.length}`); // 获取列索引 const headers = records[0]; const codeIndex = headers.indexOf('code'); // C列 const imageUrlsIndex = headers.indexOf('item-image-urls'); // CS列 console.log('code列索引:', codeIndex); console.log('item-image-urls列索引:', imageUrlsIndex); // 创建goods文件夹 const goodsDir = path.join(__dirname, 'goods'); if (!fs.existsSync(goodsDir)) { fs.mkdirSync(goodsDir); } // 从上次处理的位置继续 for (let i = Math.max(1, lastProcessedRow); i < records.length; i++) { console.log(`\n正在处理第 ${i + 1}/${records.length} 行`); try { const record = records[i]; // 添加记录长度检查 if (!record || record.length < Math.max(codeIndex, imageUrlsIndex) + 1) { console.log(`第${i + 1}行: 数据不完整,跳过`); continue; } const folderName = record[codeIndex]; const imageUrls = record[imageUrlsIndex]; if (!folderName || folderName.trim() === '') { console.log(`第${i + 1}行: code列为空,跳过`); continue; } if (!imageUrls || imageUrls.trim() === '') { console.log(`第${i + 1}行: item-image-urls列为空,跳过`); continue; } // 创建文件夹 const folderPath = path.join(goodsDir, folderName.trim()); if (!fs.existsSync(folderPath)) { fs.mkdirSync(folderPath, { recursive: true }); console.log(`创建文件夹: ${folderPath}`); } // 处理图片链接 const imageUrlList = imageUrls .split(';') .map(url => url.trim()) .filter(url => url && url.length > 0); console.log(`[${i + 1}/${records.length}] 文件夹 ${folderName} 找到${imageUrlList.length}个图片链接`); // 下载图片 for (let j = 0; j < imageUrlList.length; j++) { try { const imageUrl = imageUrlList[j]; const filename = `image_${j + 1}${path.extname(imageUrl) || '.jpg'}`; const filepath = path.join(folderPath, filename); if (fs.existsSync(filepath)) { console.log(`[${i + 1}/${records.length}] 图片已存在,跳过: ${filepath}`); continue; } console.log(`[${i + 1}/${records.length}] 正在下载: ${imageUrl}`); await downloadImage(imageUrl, filepath); console.log(`[${i + 1}/${records.length}] 已保存到: ${filepath}`); // 添加下载间隔,避免请求过于频繁 await new Promise(resolve => setTimeout(resolve, 1500)); } catch (error) { console.error(`图片下载失败 [行 ${i + 1}, 图片 ${j + 1}]:`, error.message); continue; } } // 保存进度 fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i })); } catch (error) { console.error(`处理第 ${i + 1} 行时出错:`, error); // 保存进度后继续处理下一行 fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i })); continue; } } console.log('\nCSV处理完成!'); // 处理完成后删除进度文件 if (fs.existsSync(progressFile)) { fs.unlinkSync(progressFile); } } // 运行程序 processCSV().catch(error => { console.error('程序出错:', error); });