|
|
@@ -0,0 +1,229 @@ |
|
|
|
const fs = require('fs'); |
|
|
|
const path = require('path'); |
|
|
|
const { execSync } = require('child_process'); |
|
|
|
|
|
|
|
// 检查并安装依赖 |
|
|
|
function ensureDependencies() { |
|
|
|
const dependencies = { |
|
|
|
'csv-parse': '^4.16.3', |
|
|
|
'axios': '^1.6.0' |
|
|
|
}; |
|
|
|
|
|
|
|
const packageJsonPath = path.join(__dirname, 'package.json'); |
|
|
|
const nodeModulesPath = path.join(__dirname, 'node_modules'); |
|
|
|
|
|
|
|
// 检查package.json是否存在 |
|
|
|
if (!fs.existsSync(packageJsonPath)) { |
|
|
|
console.log('创建package.json...'); |
|
|
|
const packageJson = { |
|
|
|
name: 'yahoo-goods-export', |
|
|
|
version: '1.0.0', |
|
|
|
private: true, |
|
|
|
dependencies: dependencies |
|
|
|
}; |
|
|
|
fs.writeFileSync(packageJsonPath, JSON.stringify(packageJson, null, 2)); |
|
|
|
} |
|
|
|
|
|
|
|
// 检查node_modules是否存在 |
|
|
|
if (!fs.existsSync(nodeModulesPath)) { |
|
|
|
console.log('安装依赖,这可能需要几分钟...'); |
|
|
|
try { |
|
|
|
execSync('npm install', { cwd: __dirname, stdio: 'inherit' }); |
|
|
|
console.log('依赖安装完成'); |
|
|
|
} catch (error) { |
|
|
|
console.error('依赖安装失败:', error.message); |
|
|
|
process.exit(1); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 确保依赖已安装 |
|
|
|
ensureDependencies(); |
|
|
|
|
|
|
|
// 现在导入依赖 |
|
|
|
const { parse } = require('csv-parse/sync'); |
|
|
|
const axios = require('axios'); |
|
|
|
|
|
|
|
async function downloadImage(url, filepath, retries = 3) { |
|
|
|
for (let attempt = 1; attempt <= retries; attempt++) { |
|
|
|
try { |
|
|
|
const response = await axios({ |
|
|
|
url, |
|
|
|
method: 'GET', |
|
|
|
responseType: 'stream', |
|
|
|
timeout: 50000, // 50秒超时 |
|
|
|
maxContentLength: 50 * 1024 * 1024, // 50MB最大文件限制 |
|
|
|
headers: { |
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
|
|
} |
|
|
|
}); |
|
|
|
|
|
|
|
await new Promise((resolve, reject) => { |
|
|
|
const writer = fs.createWriteStream(filepath); |
|
|
|
|
|
|
|
response.data.on('error', error => { |
|
|
|
writer.close(); |
|
|
|
reject(error); |
|
|
|
}); |
|
|
|
|
|
|
|
writer.on('error', error => { |
|
|
|
writer.close(); |
|
|
|
reject(error); |
|
|
|
}); |
|
|
|
|
|
|
|
writer.on('finish', resolve); |
|
|
|
|
|
|
|
response.data.pipe(writer); |
|
|
|
}); |
|
|
|
|
|
|
|
return true; |
|
|
|
|
|
|
|
} catch (error) { |
|
|
|
console.error(`下载失败 (尝试 ${attempt}/${retries}): ${url}`, error.message); |
|
|
|
|
|
|
|
if (attempt === retries) { |
|
|
|
throw new Error(`下载失败 (已重试${retries}次): ${error.message}`); |
|
|
|
} |
|
|
|
|
|
|
|
// 等待一段时间后重试 |
|
|
|
await new Promise(resolve => setTimeout(resolve, 2000 * attempt)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
async function processCSV() { |
|
|
|
const csvFilePath = path.join(__dirname, '2025-02-07商城商品.csv'); |
|
|
|
const progressFile = path.join(__dirname, 'progress.json'); |
|
|
|
|
|
|
|
// 读取进度 |
|
|
|
let lastProcessedRow = 0; |
|
|
|
if (fs.existsSync(progressFile)) { |
|
|
|
try { |
|
|
|
const progress = JSON.parse(fs.readFileSync(progressFile, 'utf8')); |
|
|
|
lastProcessedRow = progress.lastRow || 0; |
|
|
|
console.log(`从上次进度继续:第 ${lastProcessedRow} 行`); |
|
|
|
} catch (error) { |
|
|
|
console.error('读取进度文件失败:', error); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 添加文件存在检查 |
|
|
|
if (!fs.existsSync(csvFilePath)) { |
|
|
|
throw new Error(`CSV文件不存在: ${csvFilePath}`); |
|
|
|
} |
|
|
|
|
|
|
|
console.log('开始处理CSV文件...'); |
|
|
|
console.log(`CSV文件路径: ${csvFilePath}`); |
|
|
|
|
|
|
|
const fileContent = fs.readFileSync(csvFilePath); |
|
|
|
const records = parse(fileContent, { |
|
|
|
skip_empty_lines: true, |
|
|
|
relax_quotes: true, |
|
|
|
relax_column_count: true, |
|
|
|
encoding: 'utf8', |
|
|
|
bom: true |
|
|
|
}); |
|
|
|
|
|
|
|
console.log(`总行数: ${records.length}`); |
|
|
|
|
|
|
|
// 获取列索引 |
|
|
|
const headers = records[0]; |
|
|
|
const codeIndex = headers.indexOf('code'); // C列 |
|
|
|
const imageUrlsIndex = headers.indexOf('item-image-urls'); // CS列 |
|
|
|
|
|
|
|
console.log('code列索引:', codeIndex); |
|
|
|
console.log('item-image-urls列索引:', imageUrlsIndex); |
|
|
|
|
|
|
|
// 创建goods文件夹 |
|
|
|
const goodsDir = path.join(__dirname, 'goods'); |
|
|
|
if (!fs.existsSync(goodsDir)) { |
|
|
|
fs.mkdirSync(goodsDir); |
|
|
|
} |
|
|
|
|
|
|
|
// 从上次处理的位置继续 |
|
|
|
for (let i = Math.max(1, lastProcessedRow); i < records.length; i++) { |
|
|
|
console.log(`\n正在处理第 ${i + 1}/${records.length} 行`); |
|
|
|
try { |
|
|
|
const record = records[i]; |
|
|
|
|
|
|
|
// 添加记录长度检查 |
|
|
|
if (!record || record.length < Math.max(codeIndex, imageUrlsIndex) + 1) { |
|
|
|
console.log(`第${i + 1}行: 数据不完整,跳过`); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
const folderName = record[codeIndex]; |
|
|
|
const imageUrls = record[imageUrlsIndex]; |
|
|
|
|
|
|
|
if (!folderName || folderName.trim() === '') { |
|
|
|
console.log(`第${i + 1}行: code列为空,跳过`); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
if (!imageUrls || imageUrls.trim() === '') { |
|
|
|
console.log(`第${i + 1}行: item-image-urls列为空,跳过`); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
// 创建文件夹 |
|
|
|
const folderPath = path.join(goodsDir, folderName.trim()); |
|
|
|
if (!fs.existsSync(folderPath)) { |
|
|
|
fs.mkdirSync(folderPath, { recursive: true }); |
|
|
|
console.log(`创建文件夹: ${folderPath}`); |
|
|
|
} |
|
|
|
|
|
|
|
// 处理图片链接 |
|
|
|
const imageUrlList = imageUrls |
|
|
|
.split(';') |
|
|
|
.map(url => url.trim()) |
|
|
|
.filter(url => url && url.length > 0); |
|
|
|
|
|
|
|
console.log(`[${i + 1}/${records.length}] 文件夹 ${folderName} 找到${imageUrlList.length}个图片链接`); |
|
|
|
|
|
|
|
// 下载图片 |
|
|
|
for (let j = 0; j < imageUrlList.length; j++) { |
|
|
|
try { |
|
|
|
const imageUrl = imageUrlList[j]; |
|
|
|
const filename = `image_${j + 1}${path.extname(imageUrl) || '.jpg'}`; |
|
|
|
const filepath = path.join(folderPath, filename); |
|
|
|
|
|
|
|
if (fs.existsSync(filepath)) { |
|
|
|
console.log(`[${i + 1}/${records.length}] 图片已存在,跳过: ${filepath}`); |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
|
console.log(`[${i + 1}/${records.length}] 正在下载: ${imageUrl}`); |
|
|
|
await downloadImage(imageUrl, filepath); |
|
|
|
console.log(`[${i + 1}/${records.length}] 已保存到: ${filepath}`); |
|
|
|
|
|
|
|
// 添加下载间隔,避免请求过于频繁 |
|
|
|
await new Promise(resolve => setTimeout(resolve, 1500)); |
|
|
|
|
|
|
|
} catch (error) { |
|
|
|
console.error(`图片下载失败 [行 ${i + 1}, 图片 ${j + 1}]:`, error.message); |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 保存进度 |
|
|
|
fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i })); |
|
|
|
|
|
|
|
} catch (error) { |
|
|
|
console.error(`处理第 ${i + 1} 行时出错:`, error); |
|
|
|
// 保存进度后继续处理下一行 |
|
|
|
fs.writeFileSync(progressFile, JSON.stringify({ lastRow: i })); |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
console.log('\nCSV处理完成!'); |
|
|
|
// 处理完成后删除进度文件 |
|
|
|
if (fs.existsSync(progressFile)) { |
|
|
|
fs.unlinkSync(progressFile); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// 运行程序 |
|
|
|
processCSV().catch(error => { |
|
|
|
console.error('程序出错:', error); |
|
|
|
}); |