- 新增 template.html 文件,包含商品价格变动通知的 HTML 模板,旨在提供用户友好的通知格式 - 新增 productInfoExample.js 文件,示例化商品信息的抓取和处理逻辑,便于开发者理解和使用爬虫服务 - 这些更改旨在增强项目的可用性和示例性,帮助开发者快速上手商品信息抓取功能master
@@ -0,0 +1,236 @@ | |||
const ProductApiClient = require("../src/services/productApiClient"); | |||
const axios = require("axios"); | |||
// 本地服务端实例 | |||
const localClient = new ProductApiClient({ | |||
// 根据实际部署环境修改baseURL | |||
baseURL: "http://localhost:8991", | |||
}); | |||
// 外网服务端实例 | |||
const serverClient = { | |||
baseURL: "http://192.168.1.107:8080", | |||
timeout: 10000 * 30, // 30秒 | |||
params: { | |||
pageNum: 1, | |||
pageSize: 500, | |||
}, | |||
}; | |||
axios.defaults.baseURL = serverClient.baseURL; | |||
axios.defaults.timeout = serverClient.timeout; | |||
// 获取商品列表和抓取配置的执行频率 | |||
const frequency = 1000 * 60 * 60 * 24; // 24小时 | |||
// 设置抓取配置 | |||
const config = { | |||
platform: "amazon", // 平台 | |||
needScreenshot: true, // 是否需要截图 | |||
warnTimeRange: 2, // 监控频率(小时) | |||
goodsList: [], // 商品列表 | |||
isRunning: false, // 是否正在执行抓取任务 | |||
timer: null, // 定时器 | |||
}; | |||
/** | |||
* 获取商品信息 | |||
* @param {Object} goods - 商品对象 | |||
* @param {boolean} isRetry - 是否为重试操作 | |||
* @returns {Promise<Object|null>} - 返回商品信息或null | |||
*/ | |||
async function fetchProductInfo(goods, isRetry = false) { | |||
try { | |||
console.log(`${isRetry ? "重试" : "开始"}抓取商品: ${goods.goodsSkuSn}`); | |||
const productInfo = await localClient.getProductInfo({ | |||
url: goods.goodsSkuUrl, | |||
platform: goods.platform, | |||
needScreenshot: config.needScreenshot, | |||
}); | |||
console.log( | |||
`${isRetry ? "重试" : "商品"} 抓取成功: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}` | |||
); | |||
console.log(productInfo); | |||
return productInfo; | |||
} catch (error) { | |||
console.error( | |||
`抓取失败: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`, | |||
error.message | |||
); | |||
return null; | |||
} | |||
} | |||
/** | |||
* 保存商品信息到服务器 | |||
* @param {Object} goods - 商品对象 | |||
* @param {Object} productInfo - 抓取到的商品信息 | |||
* @returns {Promise<boolean>} - 是否保存成功 | |||
*/ | |||
async function saveProductInfo(goods, productInfo) { | |||
try { | |||
console.log( | |||
`开始保存商品信息: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}` | |||
); | |||
const { title, price, sku, remark, screenshotUrl } = productInfo[0]; | |||
const res = await axios.post( | |||
serverClient.baseURL + | |||
"/system/operationWarnresult/receiveLatestGoodsInfo", | |||
{ | |||
title, | |||
price: price.toString(), | |||
sku, | |||
url: goods.goodsSkuUrl, | |||
remark, | |||
screenshotUrl: screenshotUrl, | |||
} | |||
); | |||
console.log(res.data); | |||
return true; | |||
} catch (saveError) { | |||
console.error( | |||
`商品信息保存失败: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`, | |||
saveError.message | |||
); | |||
return false; | |||
} | |||
} | |||
/** | |||
* 处理单个商品的抓取和保存 | |||
* @param {Object} goods - 商品对象 | |||
* @returns {Promise<void>} | |||
*/ | |||
async function processProduct(goods) { | |||
// 第一次尝试抓取 | |||
let productInfo = await fetchProductInfo(goods); | |||
// 如果第一次抓取成功,保存结果 | |||
if (productInfo) { | |||
await saveProductInfo(goods, productInfo); | |||
return; | |||
} | |||
// 第一次失败,进行重试 | |||
productInfo = await fetchProductInfo(goods, true); | |||
// 如果重试成功,保存结果 | |||
if (productInfo) { | |||
await saveProductInfo(goods, productInfo); | |||
} | |||
// 重试失败,跳过该商品 | |||
} | |||
/** | |||
* 获取抓取配置 | |||
* @returns {Promise<void>} | |||
*/ | |||
async function fetchConfig() { | |||
try { | |||
console.log(`开始获取抓取配置: ${new Date().toLocaleString()}`); | |||
const res = await axios.get(serverClient.baseURL + "/system/operationWarnconfig/noVerifyList", { | |||
params: serverClient.params, | |||
}); | |||
console.log(res.data); | |||
const { rows } = res.data; | |||
if (rows.length > 0) { | |||
config.warnTimeRange = rows[0].warnTimeRange; | |||
} else { | |||
config.warnTimeRange = 2; // 默认2小时 | |||
} | |||
console.log(`抓取频率设置为 ${config.warnTimeRange} 小时`); | |||
return true; | |||
} catch (error) { | |||
console.error(`获取抓取配置失败: ${new Date().toLocaleString()}`, error.message); | |||
return false; | |||
} | |||
} | |||
/** | |||
* 获取商品列表并处理 | |||
* @returns {Promise<void>} | |||
*/ | |||
async function fetchGoodsListAndProcess() { | |||
if (config.isRunning) { | |||
console.log(`上一次任务尚未完成,跳过本次执行: ${new Date().toLocaleString()}`); | |||
return; | |||
} | |||
config.isRunning = true; | |||
console.log(`开始执行抓取任务: ${new Date().toLocaleString()}`); | |||
try { | |||
const res = await axios.get(serverClient.baseURL + "/system/operationGoods/noVerifyList", { | |||
params: { | |||
...serverClient.params, | |||
isDisabled: 1, | |||
}, | |||
}); | |||
console.log(res.data); | |||
const { rows } = res.data; | |||
config.goodsList = rows; | |||
// 使用for...of循环按顺序处理每个商品 | |||
for (const goods of config.goodsList) { | |||
await processProduct(goods); | |||
} | |||
console.log("所有商品抓取完成", new Date().toLocaleString()); | |||
} catch (error) { | |||
console.error(`获取商品列表失败: ${new Date().toLocaleString()}`, error.message); | |||
} finally { | |||
config.isRunning = false; | |||
} | |||
} | |||
/** | |||
* 启动定时任务 | |||
*/ | |||
async function startScheduler() { | |||
// 先获取配置 | |||
await fetchConfig(); | |||
// 立即执行一次 | |||
await fetchGoodsListAndProcess(); | |||
// 清除之前的定时器(如果存在) | |||
if (config.timer) { | |||
clearInterval(config.timer); | |||
} | |||
// 设置定时器,根据warnTimeRange的小时数定时执行 | |||
const intervalMs = config.warnTimeRange * 60 * 60 * 1000; // 转换为毫秒 | |||
console.log(`设置定时任务,每 ${config.warnTimeRange} 小时执行一次,下次执行时间: ${new Date(Date.now() + intervalMs).toLocaleString()}`); | |||
config.timer = setInterval(async () => { | |||
console.log(`定时任务触发: ${new Date().toLocaleString()}`); | |||
// 重新获取配置(频率可能会改变) | |||
await fetchConfig(); | |||
// 执行抓取处理 | |||
await fetchGoodsListAndProcess(); | |||
// 如果warnTimeRange发生变化,重新设置定时器 | |||
const newIntervalMs = config.warnTimeRange * 60 * 60 * 1000; | |||
if (newIntervalMs !== intervalMs) { | |||
console.log(`监控频率已变更为 ${config.warnTimeRange} 小时,重新设置定时器`); | |||
clearInterval(config.timer); | |||
startScheduler(); // 重新启动调度器 | |||
} | |||
}, intervalMs); | |||
// 添加防止程序崩溃的错误处理 | |||
process.on('uncaughtException', (error) => { | |||
console.error(`未捕获的异常: ${new Date().toLocaleString()}`, error); | |||
// 尝试继续执行定时任务 | |||
if (!config.timer) { | |||
startScheduler(); | |||
} | |||
}); | |||
} | |||
// 启动调度器 | |||
startScheduler(); | |||
// 输出启动信息 | |||
console.log(`抓取服务已启动: ${new Date().toLocaleString()}`); | |||
console.log(`初始监控频率: ${config.warnTimeRange} 小时`); |
@@ -1,28 +0,0 @@ | |||
const express = require('express'); | |||
const cors = require('cors'); | |||
const helmet = require('helmet'); | |||
const { errorHandler } = require('./middlewares/errorHandler'); | |||
const routes = require('./routes'); | |||
// 创建Express应用 | |||
const app = express(); | |||
// 中间件配置 | |||
app.use(helmet()); // 安全头 | |||
app.use(cors()); // 跨域支持 | |||
app.use(express.json()); // JSON解析 | |||
app.use(express.urlencoded({ extended: true })); // URL编码解析 | |||
// 路由配置 | |||
app.use('/api', routes); | |||
// 错误处理中间件 | |||
app.use(errorHandler); | |||
// 启动服务器 | |||
const PORT = process.env.PORT || 8991; | |||
app.listen(PORT, () => { | |||
console.log(`服务器运行在 http://localhost:${PORT}`); | |||
}); | |||
module.exports = app; |
@@ -1,45 +0,0 @@ | |||
const crawlerService = require('../services/crawlerService'); | |||
/** | |||
* 爬虫控制器 | |||
*/ | |||
class CrawlerController { | |||
/** | |||
* 获取商品信息 | |||
* @param {Request} req - 请求对象 | |||
* @param {Response} res - 响应对象 | |||
* @param {NextFunction} next - 下一个中间件函数 | |||
*/ | |||
async getProductInfo(req, res, next) { | |||
try { | |||
const { url, screenshot, allSkus } = req.query; | |||
// 参数验证 | |||
if (!url) { | |||
return res.status(400).json({ | |||
success: false, | |||
error: { | |||
message: '商品URL是必需的', | |||
statusCode: 400 | |||
} | |||
}); | |||
} | |||
// 调用爬虫服务 | |||
const productInfo = await crawlerService.crawlProductInfo( | |||
url, | |||
screenshot === 'true', | |||
allSkus === 'true' | |||
); | |||
res.json({ | |||
success: true, | |||
data: productInfo | |||
}); | |||
} catch (error) { | |||
next(error); | |||
} | |||
} | |||
} | |||
module.exports = new CrawlerController(); |
@@ -1,25 +0,0 @@ | |||
/** | |||
* 全局错误处理中间件 | |||
* @param {Error} err - 错误对象 | |||
* @param {Request} req - 请求对象 | |||
* @param {Response} res - 响应对象 | |||
* @param {NextFunction} next - 下一个中间件函数 | |||
*/ | |||
const errorHandler = (err, req, res, next) => { | |||
console.error('错误:', err); | |||
// 默认错误状态码和消息 | |||
const statusCode = err.statusCode || 500; | |||
const message = err.message || '服务器内部错误'; | |||
res.status(statusCode).json({ | |||
success: false, | |||
error: { | |||
message, | |||
statusCode, | |||
...(process.env.NODE_ENV === 'development' && { stack: err.stack }) | |||
} | |||
}); | |||
}; | |||
module.exports = { errorHandler }; |
@@ -16,7 +16,7 @@ const rateLimiter = createRateLimiter({ | |||
*/ | |||
router.get('/product/info', rateLimiter, validateRequest, async (req, res, next) => { | |||
try { | |||
const { url, platform, needScreenshot, includeAllSkus } = req.query; | |||
const { url, platform, needScreenshot } = req.query; | |||
// 创建爬虫实例 | |||
const crawler = CrawlerFactory.createCrawler(platform, config[platform]); | |||
@@ -24,8 +24,7 @@ router.get('/product/info', rateLimiter, validateRequest, async (req, res, next) | |||
// 抓取商品信息 | |||
const data = await crawler.crawl( | |||
url, | |||
needScreenshot === 'true', | |||
includeAllSkus === 'true' | |||
needScreenshot === 'true' | |||
); | |||
res.json({ |
@@ -1,12 +0,0 @@ | |||
const express = require('express'); | |||
const router = express.Router(); | |||
const crawlerController = require('../controllers/crawlerController'); | |||
/** | |||
* @route GET /api/product | |||
* @desc 获取商品信息 | |||
* @access Public | |||
*/ | |||
router.get('/product', crawlerController.getProductInfo); | |||
module.exports = router; |
@@ -0,0 +1,204 @@ | |||
const axios = require('axios'); | |||
/** | |||
* 商品爬虫API SDK | |||
* 提供爬虫服务的完整客户端接口 | |||
*/ | |||
class CrawlerApiSDK { | |||
/** | |||
* 构造函数 | |||
* @param {Object} options - 配置选项 | |||
* @param {string} options.baseURL - API基础URL | |||
* @param {number} options.timeout - 请求超时时间(毫秒) | |||
* @param {Object} options.headers - 自定义请求头 | |||
* @param {boolean} options.debug - 是否开启调试模式 | |||
*/ | |||
constructor(options = {}) { | |||
this.options = { | |||
baseURL: options.baseURL || 'http://localhost:8991', | |||
timeout: options.timeout || 60000, | |||
headers: options.headers || {}, | |||
debug: options.debug || false | |||
}; | |||
// 创建axios实例 | |||
this.httpClient = axios.create({ | |||
baseURL: this.options.baseURL, | |||
timeout: this.options.timeout, | |||
headers: { | |||
'Content-Type': 'application/json', | |||
'Accept': 'application/json', | |||
...this.options.headers | |||
} | |||
}); | |||
// 请求拦截器 | |||
this.httpClient.interceptors.request.use( | |||
config => { | |||
if (this.options.debug) { | |||
console.log('API请求:', { | |||
method: config.method.toUpperCase(), | |||
url: config.url, | |||
params: config.params, | |||
headers: config.headers | |||
}); | |||
} | |||
return config; | |||
}, | |||
error => { | |||
if (this.options.debug) { | |||
console.error('API请求错误:', error); | |||
} | |||
return Promise.reject(error); | |||
} | |||
); | |||
// 响应拦截器 | |||
this.httpClient.interceptors.response.use( | |||
response => { | |||
if (this.options.debug) { | |||
console.log('API响应:', { | |||
status: response.status, | |||
statusText: response.statusText, | |||
url: response.config.url, | |||
data: response.data | |||
}); | |||
} | |||
return response.data; | |||
}, | |||
error => { | |||
return this._handleRequestError(error); | |||
} | |||
); | |||
} | |||
/** | |||
* 处理请求错误 | |||
* @private | |||
* @param {Error} error - 错误对象 | |||
* @returns {Promise<Error>} 处理后的错误 | |||
*/ | |||
_handleRequestError(error) { | |||
if (this.options.debug) { | |||
console.error('API响应错误:', error); | |||
} | |||
let errorMessage = '请求失败'; | |||
let errorCode = 'UNKNOWN_ERROR'; | |||
let statusCode = 500; | |||
let originalError = null; | |||
if (error.response) { | |||
// 服务器返回了错误状态码 | |||
statusCode = error.response.status; | |||
const errorData = error.response.data; | |||
if (errorData) { | |||
if (errorData.error) { | |||
errorMessage = typeof errorData.error === 'string' | |||
? errorData.error | |||
: errorData.error.message || errorMessage; | |||
} | |||
errorCode = errorData.code || `HTTP_${statusCode}`; | |||
} | |||
} else if (error.request) { | |||
// 请求已发送但没有收到响应 | |||
errorMessage = '服务器无响应,请检查网络连接'; | |||
errorCode = 'CONNECTION_ERROR'; | |||
statusCode = 0; | |||
} else { | |||
// 设置请求时发生错误 | |||
errorMessage = error.message || errorMessage; | |||
errorCode = 'REQUEST_SETUP_ERROR'; | |||
} | |||
originalError = error; | |||
// 创建增强的错误对象 | |||
const enhancedError = new Error(errorMessage); | |||
enhancedError.statusCode = statusCode; | |||
enhancedError.code = errorCode; | |||
enhancedError.originalError = originalError; | |||
return Promise.reject(enhancedError); | |||
} | |||
/** | |||
* 获取商品信息 | |||
* @param {Object} params - 请求参数 | |||
* @param {string} params.url - 商品URL | |||
* @param {string} params.platform - 平台名称,如'amazon' | |||
* @param {boolean} [params.needScreenshot=false] - 是否需要截图 | |||
* @returns {Promise<Object>} 商品信息 | |||
* @throws {Error} 如果请求失败 | |||
*/ | |||
async getProductInfo(params) { | |||
// 参数验证 | |||
if (!params || typeof params !== 'object') { | |||
throw new Error('参数必须是一个对象'); | |||
} | |||
if (!params.url) { | |||
throw new Error('商品URL不能为空'); | |||
} | |||
if (!params.platform) { | |||
throw new Error('平台名称不能为空'); | |||
} | |||
// 构建查询参数 | |||
const queryParams = { | |||
url: params.url, | |||
platform: params.platform.toLowerCase(), | |||
needScreenshot: params.needScreenshot === true ? 'true' : 'false' | |||
}; | |||
try { | |||
const response = await this.httpClient.get('/api/product/info', { | |||
params: queryParams | |||
}); | |||
if (!response.success) { | |||
throw new Error(response.error || '获取商品信息失败'); | |||
} | |||
return response.data; | |||
} catch (error) { | |||
throw error; | |||
} | |||
} | |||
/** | |||
* 获取支持的平台列表 | |||
* @returns {Promise<string[]>} 支持的平台列表 | |||
* @throws {Error} 如果请求失败 | |||
*/ | |||
async getSupportedPlatforms() { | |||
try { | |||
const response = await this.httpClient.get('/api/platforms'); | |||
if (!response.success) { | |||
throw new Error(response.error || '获取平台列表失败'); | |||
} | |||
return response.data; | |||
} catch (error) { | |||
throw error; | |||
} | |||
} | |||
/** | |||
* 健康检查 | |||
* @returns {Promise<Object>} 健康状态信息 | |||
* @throws {Error} 如果请求失败 | |||
*/ | |||
async healthCheck() { | |||
try { | |||
return await this.httpClient.get('/health'); | |||
} catch (error) { | |||
throw error; | |||
} | |||
} | |||
} | |||
module.exports = CrawlerApiSDK; |
@@ -1,196 +0,0 @@ | |||
const { chromium } = require('playwright'); | |||
const path = require('path'); | |||
const fs = require('fs').promises; | |||
const FormData = require('form-data'); | |||
const fetch = require('node-fetch'); | |||
/** | |||
* Amazon 商品信息爬虫服务(Playwright) | |||
*/ | |||
class CrawlerService { | |||
constructor() { | |||
// 设置静态文件访问的基础URL | |||
this.UPLOAD_URL = 'https://apibase.sohomall.jp/uploaders?scene=goods'; | |||
} | |||
async initBrowser() { | |||
return await chromium.launch({ headless: true }); | |||
} | |||
async createScreenshotDir() { | |||
const dir = path.join(process.cwd(), 'screenshots'); | |||
await fs.mkdir(dir, { recursive: true }); | |||
return dir; | |||
} | |||
/** | |||
* 获取单个 SKU 信息(含优惠券扣减) | |||
*/ | |||
async getSingleSkuInfo(page) { | |||
// 添加控制台日志监听 | |||
page.on('console', msg => console.log('Browser Console:', msg.text())); | |||
await page.waitForTimeout(500); | |||
let couponValue = 0; | |||
const couponTrigger = await page.$('.a-declarative[data-action="a-modal"], .couponLabelText'); | |||
if (couponTrigger) { | |||
try { | |||
await couponTrigger.click(); | |||
await page.waitForTimeout(500); | |||
} catch {} | |||
try { | |||
const couponText = await page.$eval('.couponLabelText', el => el.textContent.trim()); | |||
const m = couponText.match(/¥\s*([\d,]+)/); | |||
couponValue = m ? parseInt(m[1].replace(/,/g, '')) : 0; | |||
console.log('Found coupon value:', couponValue); | |||
} catch {} | |||
// 尝试关闭弹窗 | |||
try { await page.click('button.a-modal-close', { timeout: 1000 }); } catch { await page.keyboard.press('Escape'); } | |||
} | |||
return await page.evaluate(couponValue => { | |||
const title = document.querySelector('#productTitle')?.textContent.trim() || null; | |||
let priceText = document.querySelector('span.a-price > span.a-offscreen')?.textContent.trim() | |||
|| document.querySelector('#priceblock_dealprice')?.textContent.trim() | |||
|| document.querySelector('#priceblock_saleprice')?.textContent.trim() | |||
|| document.querySelector('#priceblock_ourprice')?.textContent.trim() | |||
|| null; | |||
if (priceText?.includes('ポイント')) priceText = priceText.split('ポイント')[0].trim(); | |||
priceText = priceText.replace('¥', ''); | |||
priceText = priceText.replace('JP¥', ''); | |||
const m = priceText?.match(/\s*([\d,]+)/); | |||
let priceVal = m ? parseInt(m[1].replace(/,/g, '')) : null; | |||
if (priceVal != null) priceVal -= couponValue; | |||
console.log('priceText', priceText); | |||
console.log('priceVal', priceVal); | |||
console.log('couponValue', couponValue); | |||
const price = priceVal != null ? `${priceVal.toLocaleString()}` : priceText; | |||
const url = window.location.href; | |||
const asin = url.match(/\/dp\/([A-Z0-9]{10})/)?.[1] || null; | |||
return { title, price, sku: asin, url, remark: `Original Price: JP¥${priceText} Coupon Price: JP¥${couponValue}` }; | |||
}, couponValue); | |||
} | |||
/** | |||
* 获取所有 SKU 组合信息(笛卡尔积方式) | |||
*/ | |||
async getAllSkuInfo(page) { | |||
// 等待 SKU 分组加载 | |||
await page.waitForSelector('.a-cardui-body #twister-plus-inline-twister > .a-section'); | |||
const groupEls = await page.$$('.a-cardui-body #twister-plus-inline-twister > .a-section'); | |||
const groups = []; | |||
for (const groupEl of groupEls) { | |||
const btns = await groupEl.$$('.a-button-inner .a-button-input'); | |||
if (btns.length) groups.push(btns); | |||
} | |||
if (!groups.length) return [await this.getSingleSkuInfo(page)]; | |||
// 生成笛卡尔积组合 | |||
const cartesian = (arr1, arr2) => arr1.flatMap(a => arr2.map(b => [...a, b])); | |||
let combos = groups[0].map(b => [b]); | |||
for (let i = 1; i < groups.length; i++) combos = cartesian(combos, groups[i]); | |||
const results = []; | |||
for (const combo of combos) { | |||
// 依次点击每个维度按钮 | |||
for (const btn of combo) { | |||
await btn.click(); | |||
await page.waitForLoadState('networkidle'); | |||
} | |||
// 获取当前组合信息 | |||
const info = await this.getSingleSkuInfo(page); | |||
// 添加 variants 字段 | |||
info.variants = await Promise.all( | |||
combo.map(btn => btn.getAttribute('aria-label') || btn.getAttribute('title')) | |||
); | |||
results.push(info); | |||
} | |||
return results; | |||
} | |||
/** | |||
* 将图片转换为base64 | |||
* @param {string} imagePath - 图片路径 | |||
* @returns {Promise<string>} base64字符串 | |||
*/ | |||
async convertImageToBase64(imagePath) { | |||
try { | |||
const imageBuffer = await fs.readFile(imagePath); | |||
return `data:image/png;base64,${imageBuffer.toString('base64')}`; | |||
} catch (error) { | |||
console.error('转换图片到base64失败:', error); | |||
return null; | |||
} | |||
} | |||
/** | |||
* 上传图片到服务器 | |||
* @param {string} imagePath - 图片路径 | |||
* @returns {Promise<string>} 上传后的图片URL | |||
*/ | |||
async uploadImage(imagePath) { | |||
try { | |||
const form = new FormData(); | |||
form.append('file', await fs.readFile(imagePath), { | |||
filename: path.basename(imagePath), | |||
contentType: 'image/png' | |||
}); | |||
const response = await fetch(this.UPLOAD_URL, { | |||
method: 'POST', | |||
body: form | |||
}); | |||
if (!response.ok) { | |||
throw new Error(`上传失败: ${response.statusText}`); | |||
} | |||
const result = await response.json(); | |||
return result.url; // 假设服务器返回的数据中包含url字段 | |||
} catch (error) { | |||
console.error('上传图片失败:', error); | |||
return null; | |||
} | |||
} | |||
/** | |||
* 主方法:抓取商品信息 | |||
*/ | |||
async crawlProductInfo(url, needScreenshot = false, includeAllSkus = false) { | |||
const browser = await this.initBrowser(); | |||
const context = await browser.newContext({ locale: 'ja-JP', userAgent: 'Mozilla/5.0' }); | |||
const page = await context.newPage(); | |||
await page.goto(url.split('?')[0], { waitUntil: 'networkidle' }); | |||
const data = includeAllSkus | |||
? await this.getAllSkuInfo(page) | |||
: [await this.getSingleSkuInfo(page)]; | |||
if (needScreenshot) { | |||
const dir = await this.createScreenshotDir(); | |||
const filename = `${Date.now()}.png`; | |||
const shot = path.join(dir, filename); | |||
await page.screenshot({ path: shot, fullPage: true }); | |||
// 上传图片并获取URL | |||
const imageUrl = await this.uploadImage(shot); | |||
// 更新数据,添加图片URL | |||
data.forEach(item => { | |||
item.screenshotUrl = imageUrl; | |||
}); | |||
// 删除临时文件 | |||
try { | |||
await fs.unlink(shot); | |||
} catch (error) { | |||
console.error('删除临时截图文件失败:', error); | |||
} | |||
} | |||
await browser.close(); | |||
return data; | |||
} | |||
} | |||
module.exports = new CrawlerService(); |
@@ -210,7 +210,7 @@ class AmazonCrawler extends BaseCrawler { | |||
// 等待弹窗消失 | |||
await this.page.waitForTimeout(500); | |||
} catch (clickError) { | |||
console.log('优惠券点击或处理失败:', clickError.message); | |||
console.log('没有优惠券', clickError.message); | |||
// 如果点击失败,尝试按ESC键关闭可能的弹窗 | |||
try { | |||
await this.page.keyboard.press('Escape'); | |||
@@ -322,52 +322,13 @@ class AmazonCrawler extends BaseCrawler { | |||
} | |||
} | |||
/** | |||
* 获取所有SKU组合信息 | |||
* @returns {Promise<Array>} SKU信息数组 | |||
*/ | |||
async getAllSkuInfo() { | |||
try { | |||
const groups = await this.getVariants(); | |||
if (!groups.length) return [await this.getSingleSkuInfo()]; | |||
// 生成笛卡尔积组合 | |||
const cartesian = (arr1, arr2) => arr1.flatMap(a => arr2.map(b => [...a, b])); | |||
let combos = groups[0].map(b => [b]); | |||
for (let i = 1; i < groups.length; i++) { | |||
combos = cartesian(combos, groups[i]); | |||
} | |||
const results = []; | |||
for (const combo of combos) { | |||
// 依次点击每个维度按钮 | |||
for (const btn of combo) { | |||
await btn.click(); | |||
await this.page.waitForLoadState('networkidle'); | |||
} | |||
// 获取当前组合信息 | |||
const info = await this.getSingleSkuInfo(); | |||
info.variants = await Promise.all( | |||
combo.map(btn => btn.getAttribute('aria-label') || btn.getAttribute('title')) | |||
); | |||
results.push(info); | |||
} | |||
return results; | |||
} catch (error) { | |||
throw new CrawlerError('获取所有SKU信息失败', 'ALL_SKU_INFO_GET_ERROR', 'amazon', error); | |||
} | |||
} | |||
/** | |||
* 主方法:抓取商品信息 | |||
* @param {string} url - 商品URL | |||
* @param {boolean} needScreenshot - 是否需要截图 | |||
* @param {boolean} includeAllSkus - 是否包含所有SKU | |||
* @returns {Promise<Array>} 商品信息数组 | |||
*/ | |||
async crawl(url, needScreenshot = false, includeAllSkus = false) { | |||
async crawl(url, needScreenshot = false) { | |||
try { | |||
await this.initBrowser(); | |||
@@ -389,9 +350,8 @@ class AmazonCrawler extends BaseCrawler { | |||
// 导航到目标页面 | |||
await this.navigateWithRetry(url.split('?')[0]); | |||
const data = includeAllSkus | |||
? await this.getAllSkuInfo() | |||
: [await this.getSingleSkuInfo()]; | |||
// 只获取单个SKU信息 | |||
const data = [await this.getSingleSkuInfo()]; | |||
if (needScreenshot) { | |||
try { |
@@ -1,6 +1,6 @@ | |||
const { chromium } = require('playwright'); | |||
const path = require('path'); | |||
const fs = require('fs').promises; | |||
const { chromium } = require("playwright"); | |||
const path = require("path"); | |||
const fs = require("fs").promises; | |||
/** | |||
* 基础爬虫类 | |||
@@ -19,13 +19,14 @@ class BaseCrawler { | |||
* @returns {Promise<void>} | |||
*/ | |||
async initBrowser() { | |||
this.browser = await chromium.launch({ | |||
this.browser = await chromium.launch({ | |||
headless: false, | |||
args: ['--no-sandbox', '--disable-setuid-sandbox'] | |||
args: ["--no-sandbox", "--disable-setuid-sandbox"], | |||
}); | |||
this.context = await this.browser.newContext({ | |||
locale: 'ja-JP', | |||
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |||
this.context = await this.browser.newContext({ | |||
locale: "ja-JP", | |||
userAgent: | |||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | |||
}); | |||
this.page = await this.context.newPage(); | |||
} | |||
@@ -48,60 +49,10 @@ class BaseCrawler { | |||
* @returns {Promise<string>} 截图目录路径 | |||
*/ | |||
async createScreenshotDir() { | |||
const dir = path.join(process.cwd(), 'screenshots'); | |||
const dir = path.join(process.cwd(), "screenshots"); | |||
await fs.mkdir(dir, { recursive: true }); | |||
return dir; | |||
} | |||
/** | |||
* 获取商品信息 | |||
* @param {string} url - 商品URL | |||
* @returns {Promise<Object>} 商品信息 | |||
*/ | |||
async crawl(url) { | |||
throw new Error('Method not implemented'); | |||
} | |||
/** | |||
* 解析价格 | |||
* @param {string} priceText - 价格文本 | |||
* @returns {Promise<number>} 解析后的价格 | |||
*/ | |||
async parsePrice(priceText) { | |||
throw new Error('Method not implemented'); | |||
} | |||
/** | |||
* 处理优惠券 | |||
* @returns {Promise<number>} 优惠券金额 | |||
*/ | |||
async handleCoupon() { | |||
throw new Error('Method not implemented'); | |||
} | |||
/** | |||
* 获取商品标题 | |||
* @returns {Promise<string>} 商品标题 | |||
*/ | |||
async getTitle() { | |||
throw new Error('Method not implemented'); | |||
} | |||
/** | |||
* 获取商品SKU | |||
* @returns {Promise<string>} 商品SKU | |||
*/ | |||
async getSku() { | |||
throw new Error('Method not implemented'); | |||
} | |||
/** | |||
* 获取商品变体信息 | |||
* @returns {Promise<Array>} 变体信息数组 | |||
*/ | |||
async getVariants() { | |||
throw new Error('Method not implemented'); | |||
} | |||
} | |||
module.exports = BaseCrawler; | |||
module.exports = BaseCrawler; |
@@ -17,25 +17,6 @@ class CrawlerError extends Error { | |||
this.originalError = originalError; | |||
this.timestamp = new Date().toISOString(); | |||
} | |||
/** | |||
* 转换为JSON对象 | |||
* @returns {Object} 错误信息对象 | |||
*/ | |||
toJSON() { | |||
return { | |||
name: this.name, | |||
message: this.message, | |||
code: this.code, | |||
platform: this.platform, | |||
timestamp: this.timestamp, | |||
stack: this.stack, | |||
originalError: this.originalError ? { | |||
message: this.originalError.message, | |||
stack: this.originalError.stack | |||
} : null | |||
}; | |||
} | |||
} | |||
module.exports = CrawlerError; |
@@ -0,0 +1,118 @@ | |||
const axios = require('axios'); | |||
/** | |||
* 商品API客户端服务 | |||
* 用于调用爬虫API获取商品信息 | |||
*/ | |||
class ProductApiClient { | |||
/** | |||
* 构造函数 | |||
* @param {Object} options - 配置选项 | |||
* @param {string} options.baseURL - API基础URL | |||
* @param {number} options.timeout - 请求超时时间(毫秒) | |||
* @param {Object} options.headers - 请求头 | |||
*/ | |||
constructor(options = {}) { | |||
this.baseURL = options.baseURL || 'http://localhost:8991'; | |||
this.timeout = options.timeout || 60000; | |||
this.headers = options.headers || { | |||
'Content-Type': 'application/json', | |||
'Accept': 'application/json' | |||
}; | |||
// 创建axios实例 | |||
this.client = axios.create({ | |||
baseURL: this.baseURL, | |||
timeout: this.timeout, | |||
headers: this.headers | |||
}); | |||
// 添加响应拦截器 | |||
this.client.interceptors.response.use( | |||
response => response.data, | |||
error => this.handleError(error) | |||
); | |||
} | |||
/** | |||
* 处理请求错误 | |||
* @private | |||
* @param {Error} error - 错误对象 | |||
* @throws {Error} 处理后的错误 | |||
*/ | |||
handleError(error) { | |||
let errorMessage = '获取商品信息失败'; | |||
let statusCode = 500; | |||
if (error.response) { | |||
// 服务器返回了错误状态码 | |||
statusCode = error.response.status; | |||
const errorData = error.response.data; | |||
if (errorData && errorData.error) { | |||
errorMessage = typeof errorData.error === 'string' | |||
? errorData.error | |||
: errorData.error.message || errorMessage; | |||
} | |||
} else if (error.request) { | |||
// 请求已发送但没有收到响应 | |||
errorMessage = '服务器无响应,请检查网络连接'; | |||
statusCode = 0; | |||
} | |||
const enhancedError = new Error(errorMessage); | |||
enhancedError.statusCode = statusCode; | |||
enhancedError.originalError = error; | |||
return Promise.reject(enhancedError); | |||
} | |||
/** | |||
* 获取商品信息 | |||
* @param {Object} params - 请求参数 | |||
* @param {string} params.url - 商品URL | |||
* @param {string} params.platform - 平台名称(amazon) | |||
* @param {boolean} [params.needScreenshot=false] - 是否需要截图 | |||
* @returns {Promise<Object>} 商品信息 | |||
*/ | |||
async getProductInfo(params) { | |||
if (!params.url) { | |||
throw new Error('商品URL不能为空'); | |||
} | |||
if (!params.platform) { | |||
throw new Error('平台名称不能为空'); | |||
} | |||
// 构建查询参数 | |||
const queryParams = { | |||
url: params.url, | |||
platform: params.platform, | |||
needScreenshot: params.needScreenshot === true ? 'true' : 'false' | |||
}; | |||
try { | |||
const response = await this.client.get('/api/product/info', { params: queryParams }); | |||
return response.data; | |||
} catch (error) { | |||
console.error('获取商品信息失败:', error); | |||
throw error; | |||
} | |||
} | |||
/** | |||
* 获取支持的平台列表 | |||
* @returns {Promise<string[]>} 平台列表 | |||
*/ | |||
async getSupportedPlatforms() { | |||
try { | |||
const response = await this.client.get('/api/platforms'); | |||
return response.data; | |||
} catch (error) { | |||
console.error('获取平台列表失败:', error); | |||
throw error; | |||
} | |||
} | |||
} | |||
module.exports = ProductApiClient; |
@@ -0,0 +1,97 @@ | |||
<!DOCTYPE html> | |||
<html lang="ja"> | |||
<head> | |||
<meta charset="UTF-8"> | |||
<title>商品价格变动通知</title> | |||
</head> | |||
<body | |||
style="margin: 0; padding: 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; background-color: #f5f7fa; color: #333333;"> | |||
<table width="100%" cellpadding="0" cellspacing="0" style="background-color: #f5f7fa; padding: 30px 0;"> | |||
<tr> | |||
<td align="center"> | |||
<table width="700" cellpadding="0" cellspacing="0" | |||
style="background-color: #ffffff; border-radius: 8px; overflow: hidden; box-shadow: 0 0 10px rgba(0,0,0,0.05);"> | |||
<!-- Header --> | |||
<tr style="background-color: #004b9b;"> | |||
<td style="padding: 20px 30px; color: white; font-size: 20px; font-weight: bold;"> | |||
商品价格变动通知 | |||
<a style="color: white; text-decoration: none; font-size: 12px; float: right; border: 1px solid white; padding: 5px; border-radius: 5px;" | |||
href="https://digital.sohomall.jp/system/operationGoods/operationWarnresult">立即处理</a> | |||
</td> | |||
</tr> | |||
<!-- Content Table --> | |||
<tr> | |||
<td style="padding: 30px;"> | |||
<table width="100%" cellpadding="8" cellspacing="0" | |||
style="border-collapse: collapse; font-size: 14px;"> | |||
<tr> | |||
<td | |||
style="background-color: #f2f4f6; font-weight: bold; vertical-align: top; width: 160px;"> | |||
监控平台</td> | |||
<td>Amazon</td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品名称 | |||
</td> | |||
<td>【メモリ DDR4】GIGASTONE 16GBx2枚 (32GB Kit) DDR4 2666MHz (2400MHz or 2133MHz) | |||
PC4-21300 (PC4-19200/17000) CL19 1.2V UDIMM 288 ピン アンバッファー 非 ECC PC デスクトップ専用 | |||
メモリモジュール アップグレード</td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品编号 | |||
</td> | |||
<td>B08KGRRSKH</td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品链接 | |||
</td> | |||
<td><a href="https://www.amazon.co.jp/dp/B08KGRRSKH" | |||
style="color: #004b9b; text-decoration: none;">点击查看商品</a></td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">原价格 | |||
</td> | |||
<td><span style="color: #666;">¥8280.00</span></td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">新价格 | |||
</td> | |||
<td><span style="color: #d32f2f; font-weight: bold;">¥8280.00</span></td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">预警时间 | |||
</td> | |||
<td>2025-05-15 10:47:19</td> | |||
</tr> | |||
<tr> | |||
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品快照 | |||
</td> | |||
<td> | |||
<img src="https://img.beical.com/goods/1747273525708-7D65E7A038F.png" alt="商品快照" | |||
width="300" style="border: 1px solid #ddd; border-radius: 4px;" /> | |||
</td> | |||
</tr> | |||
</table> | |||
</td> | |||
</tr> | |||
<!-- Footer --> | |||
<tr> | |||
<td | |||
style="padding: 20px 30px; font-size: 12px; color: #999999; text-align: center; background-color: #f9f9f9;"> | |||
时区:日本东京 金额单位:日元 <br> | |||
本邮件由系统(digital.sohomall.jp)自动发出,请勿直接回复。如有问题,请联系软件团队。 | |||
</td> | |||
</tr> | |||
</table> | |||
</td> | |||
</tr> | |||
</table> | |||
</body> | |||
</html> |