|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229 |
- const fs = require('fs');
- const path = require('path');
- const { execSync } = require('child_process');
-
- // 检查并安装依赖
- function ensureDependencies() {
- const dependencies = {
- 'csv-parse': '^4.16.3',
- 'axios': '^1.6.0'
- };
-
- const packageJsonPath = path.join(__dirname, 'package.json');
- const nodeModulesPath = path.join(__dirname, 'node_modules');
-
- // 检查package.json是否存在
- if (!fs.existsSync(packageJsonPath)) {
- console.log('创建package.json...');
- const packageJson = {
- name: 'yahoo-goods-export',
- version: '1.0.0',
- private: true,
- dependencies: dependencies
- };
- fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2));
- }
-
- // 检查node_modules是否存在
- if (!fs.existsSync(nodeModulesPath)) {
- console.log('安装依赖,这可能需要几分钟...');
- try {
- execSync('npm install', { cwd: __dirname, stdio: 'inherit' });
- console.log('依赖安装完成');
- } catch (error) {
- console.error('依赖安装失败:', error.message);
- process.exit(1);
- }
- }
- }
-
- // 确保依赖已安装
- ensureDependencies();
-
- // 现在导入依赖
- const { parse } = require('csv-parse/sync');
- const axios = require('axios');
-
- async function downloadImage(url, filepath, retries = 3) {
- for (let attempt = 1; attempt <= retries; attempt++) {
- try {
- const response = await axios({
- url,
- method: 'GET',
- responseType: 'stream',
- timeout: 50000, // 50秒超时
- maxContentLength: 50 * 1024 * 1024, // 50MB最大文件限制
- headers: {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
- }
- });
-
- await new Promise((resolve, reject) => {
- const writer = fs.createWriteStream(filepath);
-
- response.data.on('error', error => {
- writer.close();
- reject(error);
- });
-
- writer.on('error', error => {
- writer.close();
- reject(error);
- });
-
- writer.on('finish', resolve);
-
- response.data.pipe(writer);
- });
-
- return true;
-
- } catch (error) {
- console.error(`下载失败 (尝试 ${attempt}/${retries}): ${url}`, error.message);
-
- if (attempt === retries) {
- throw new Error(`下载失败 (已重试${retries}次): ${error.message}`);
- }
-
- // 等待一段时间后重试
- await new Promise(resolve => setTimeout(resolve, 2000 * attempt));
- }
- }
- }
-
- async function processCSV() {
- const csvFilePath = path.join(__dirname, '2025-02-07商城商品.csv');
- const progressFile = path.join(__dirname, 'progress.json');
-
- // 读取进度
- let lastProcessedRow = 0;
- if (fs.existsSync(progressFile)) {
- try {
- const progress = JSON.parse(fs.readFileSync(progressFile, 'utf8'));
- lastProcessedRow = progress.lastRow || 0;
- console.log(`从上次进度继续:第 ${lastProcessedRow} 行`);
- } catch (error) {
- console.error('读取进度文件失败:', error);
- }
- }
-
- // 添加文件存在检查
- if (!fs.existsSync(csvFilePath)) {
- throw new Error(`CSV文件不存在: ${csvFilePath}`);
- }
-
- console.log('开始处理CSV文件...');
- console.log(`CSV文件路径: ${csvFilePath}`);
-
- const fileContent = fs.readFileSync(csvFilePath);
- const records = parse(fileContent, {
- skip_empty_lines: true,
- relax_quotes: true,
- relax_column_count: true,
- encoding: 'utf8',
- bom: true
- });
-
- console.log(`总行数: ${records.length}`);
-
- // 获取列索引
- const headers = records[0];
- const codeIndex = headers.indexOf('code'); // C列
- const imageUrlsIndex = headers.indexOf('item-image-urls'); // CS列
-
- console.log('code列索引:', codeIndex);
- console.log('item-image-urls列索引:', imageUrlsIndex);
-
- // 创建goods文件夹
- const goodsDir = path.join(__dirname, 'goods');
- if (!fs.existsSync(goodsDir)) {
- fs.mkdirSync(goodsDir);
- }
-
- // 从上次处理的位置继续
- for (let i = Math.max(1, lastProcessedRow); i < records.length; i++) {
- console.log(`\n正在处理第 ${i + 1}/${records.length} 行`);
- try {
- const record = records[i];
-
- // 添加记录长度检查
- if (!record || record.length < Math.max(codeIndex, imageUrlsIndex) + 1) {
- console.log(`第${i + 1}行: 数据不完整,跳过`);
- continue;
- }
-
- const folderName = record[codeIndex];
- const imageUrls = record[imageUrlsIndex];
-
- if (!folderName || folderName.trim() === '') {
- console.log(`第${i + 1}行: code列为空,跳过`);
- continue;
- }
-
- if (!imageUrls || imageUrls.trim() === '') {
- console.log(`第${i + 1}行: item-image-urls列为空,跳过`);
- continue;
- }
-
- // 创建文件夹
- const folderPath = path.join(goodsDir, folderName.trim());
- if (!fs.existsSync(folderPath)) {
- fs.mkdirSync(folderPath, { recursive: true });
- console.log(`创建文件夹: ${folderPath}`);
- }
-
- // 处理图片链接
- const imageUrlList = imageUrls
- .split(';')
- .map(url => url.trim())
- .filter(url => url && url.length > 0);
-
- console.log(`[${i + 1}/${records.length}] 文件夹 ${folderName} 找到${imageUrlList.length}个图片链接`);
-
- // 下载图片
- for (let j = 0; j < imageUrlList.length; j++) {
- try {
- const imageUrl = imageUrlList[j];
- const filename = `image_${j + 1}${path.extname(imageUrl) || '.jpg'}`;
- const filepath = path.join(folderPath, filename);
-
- if (fs.existsSync(filepath)) {
- console.log(`[${i + 1}/${records.length}] 图片已存在,跳过: ${filepath}`);
- continue;
- }
-
- console.log(`[${i + 1}/${records.length}] 正在下载: ${imageUrl}`);
- await downloadImage(imageUrl, filepath);
- console.log(`[${i + 1}/${records.length}] 已保存到: ${filepath}`);
-
- // 添加下载间隔,避免请求过于频繁
- await new Promise(resolve => setTimeout(resolve, 1500));
-
- } catch (error) {
- console.error(`图片下载失败 [行 ${i + 1}, 图片 ${j + 1}]:`, error.message);
- continue;
- }
- }
-
- // 保存进度
- fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
-
- } catch (error) {
- console.error(`处理第 ${i + 1} 行时出错:`, error);
- // 保存进度后继续处理下一行
- fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i }));
- continue;
- }
- }
-
- console.log('\nCSV处理完成!');
- // 处理完成后删除进度文件
- if (fs.existsSync(progressFile)) {
- fs.unlinkSync(progressFile);
- }
- }
-
- // 运行程序
- processCSV().catch(error => {
- console.error('程序出错:', error);
- });
|