Bladeren bron

feat: 新增商品价格变动通知模板和商品信息示例

- 新增 template.html 文件,包含商品价格变动通知的 HTML 模板,旨在提供用户友好的通知格式
- 新增 productInfoExample.js 文件,示例化商品信息的抓取和处理逻辑,便于开发者理解和使用爬虫服务
- 这些更改旨在增强项目的可用性和示例性,帮助开发者快速上手商品信息抓取功能
master
lizhuang 1 maand geleden
bovenliggende
commit
994ef3ace8

+ 236
- 0
examples/productInfoExample.js Bestand weergeven

@@ -0,0 +1,236 @@
const ProductApiClient = require("../src/services/productApiClient");
const axios = require("axios");

// 本地服务端实例
const localClient = new ProductApiClient({
// 根据实际部署环境修改baseURL
baseURL: "http://localhost:8991",
});

// 外网服务端实例
const serverClient = {
baseURL: "http://192.168.1.107:8080",
timeout: 10000 * 30, // 30秒
params: {
pageNum: 1,
pageSize: 500,
},
};

axios.defaults.baseURL = serverClient.baseURL;
axios.defaults.timeout = serverClient.timeout;

// 获取商品列表和抓取配置的执行频率
const frequency = 1000 * 60 * 60 * 24; // 24小时

// 设置抓取配置
const config = {
platform: "amazon", // 平台
needScreenshot: true, // 是否需要截图
warnTimeRange: 2, // 监控频率(小时)
goodsList: [], // 商品列表
isRunning: false, // 是否正在执行抓取任务
timer: null, // 定时器
};

/**
* 获取商品信息
* @param {Object} goods - 商品对象
* @param {boolean} isRetry - 是否为重试操作
* @returns {Promise<Object|null>} - 返回商品信息或null
*/
async function fetchProductInfo(goods, isRetry = false) {
try {
console.log(`${isRetry ? "重试" : "开始"}抓取商品: ${goods.goodsSkuSn}`);
const productInfo = await localClient.getProductInfo({
url: goods.goodsSkuUrl,
platform: goods.platform,
needScreenshot: config.needScreenshot,
});
console.log(
`${isRetry ? "重试" : "商品"} 抓取成功: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`
);
console.log(productInfo);
return productInfo;
} catch (error) {
console.error(
`抓取失败: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`,
error.message
);
return null;
}
}

/**
* 保存商品信息到服务器
* @param {Object} goods - 商品对象
* @param {Object} productInfo - 抓取到的商品信息
* @returns {Promise<boolean>} - 是否保存成功
*/
async function saveProductInfo(goods, productInfo) {
try {
console.log(
`开始保存商品信息: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`
);
const { title, price, sku, remark, screenshotUrl } = productInfo[0];
const res = await axios.post(
serverClient.baseURL +
"/system/operationWarnresult/receiveLatestGoodsInfo",
{
title,
price: price.toString(),
sku,
url: goods.goodsSkuUrl,
remark,
screenshotUrl: screenshotUrl,
}
);
console.log(res.data);
return true;
} catch (saveError) {
console.error(
`商品信息保存失败: ${goods.goodsSkuSn} - ${new Date().toLocaleString()}`,
saveError.message
);
return false;
}
}

/**
* 处理单个商品的抓取和保存
* @param {Object} goods - 商品对象
* @returns {Promise<void>}
*/
async function processProduct(goods) {
// 第一次尝试抓取
let productInfo = await fetchProductInfo(goods);
// 如果第一次抓取成功,保存结果
if (productInfo) {
await saveProductInfo(goods, productInfo);
return;
}
// 第一次失败,进行重试
productInfo = await fetchProductInfo(goods, true);
// 如果重试成功,保存结果
if (productInfo) {
await saveProductInfo(goods, productInfo);
}
// 重试失败,跳过该商品
}

/**
* 获取抓取配置
* @returns {Promise<void>}
*/
async function fetchConfig() {
try {
console.log(`开始获取抓取配置: ${new Date().toLocaleString()}`);
const res = await axios.get(serverClient.baseURL + "/system/operationWarnconfig/noVerifyList", {
params: serverClient.params,
});
console.log(res.data);
const { rows } = res.data;
if (rows.length > 0) {
config.warnTimeRange = rows[0].warnTimeRange;
} else {
config.warnTimeRange = 2; // 默认2小时
}
console.log(`抓取频率设置为 ${config.warnTimeRange} 小时`);
return true;
} catch (error) {
console.error(`获取抓取配置失败: ${new Date().toLocaleString()}`, error.message);
return false;
}
}

/**
* 获取商品列表并处理
* @returns {Promise<void>}
*/
async function fetchGoodsListAndProcess() {
if (config.isRunning) {
console.log(`上一次任务尚未完成,跳过本次执行: ${new Date().toLocaleString()}`);
return;
}

config.isRunning = true;
console.log(`开始执行抓取任务: ${new Date().toLocaleString()}`);

try {
const res = await axios.get(serverClient.baseURL + "/system/operationGoods/noVerifyList", {
params: {
...serverClient.params,
isDisabled: 1,
},
});
console.log(res.data);
const { rows } = res.data;
config.goodsList = rows;

// 使用for...of循环按顺序处理每个商品
for (const goods of config.goodsList) {
await processProduct(goods);
}

console.log("所有商品抓取完成", new Date().toLocaleString());
} catch (error) {
console.error(`获取商品列表失败: ${new Date().toLocaleString()}`, error.message);
} finally {
config.isRunning = false;
}
}

/**
* 启动定时任务
*/
async function startScheduler() {
// 先获取配置
await fetchConfig();
// 立即执行一次
await fetchGoodsListAndProcess();
// 清除之前的定时器(如果存在)
if (config.timer) {
clearInterval(config.timer);
}
// 设置定时器,根据warnTimeRange的小时数定时执行
const intervalMs = config.warnTimeRange * 60 * 60 * 1000; // 转换为毫秒
console.log(`设置定时任务,每 ${config.warnTimeRange} 小时执行一次,下次执行时间: ${new Date(Date.now() + intervalMs).toLocaleString()}`);
config.timer = setInterval(async () => {
console.log(`定时任务触发: ${new Date().toLocaleString()}`);
// 重新获取配置(频率可能会改变)
await fetchConfig();
// 执行抓取处理
await fetchGoodsListAndProcess();
// 如果warnTimeRange发生变化,重新设置定时器
const newIntervalMs = config.warnTimeRange * 60 * 60 * 1000;
if (newIntervalMs !== intervalMs) {
console.log(`监控频率已变更为 ${config.warnTimeRange} 小时,重新设置定时器`);
clearInterval(config.timer);
startScheduler(); // 重新启动调度器
}
}, intervalMs);
// 添加防止程序崩溃的错误处理
process.on('uncaughtException', (error) => {
console.error(`未捕获的异常: ${new Date().toLocaleString()}`, error);
// 尝试继续执行定时任务
if (!config.timer) {
startScheduler();
}
});
}

// 启动调度器
startScheduler();

// 输出启动信息
console.log(`抓取服务已启动: ${new Date().toLocaleString()}`);
console.log(`初始监控频率: ${config.warnTimeRange} 小时`);

+ 0
- 28
src/app.js Bestand weergeven

@@ -1,28 +0,0 @@
const express = require('express');
const cors = require('cors');
const helmet = require('helmet');
const { errorHandler } = require('./middlewares/errorHandler');
const routes = require('./routes');

// 创建Express应用
const app = express();

// 中间件配置
app.use(helmet()); // 安全头
app.use(cors()); // 跨域支持
app.use(express.json()); // JSON解析
app.use(express.urlencoded({ extended: true })); // URL编码解析

// 路由配置
app.use('/api', routes);

// 错误处理中间件
app.use(errorHandler);

// 启动服务器
const PORT = process.env.PORT || 8991;
app.listen(PORT, () => {
console.log(`服务器运行在 http://localhost:${PORT}`);
});

module.exports = app;

+ 0
- 45
src/controllers/crawlerController.js Bestand weergeven

@@ -1,45 +0,0 @@
const crawlerService = require('../services/crawlerService');

/**
* 爬虫控制器
*/
class CrawlerController {
/**
* 获取商品信息
* @param {Request} req - 请求对象
* @param {Response} res - 响应对象
* @param {NextFunction} next - 下一个中间件函数
*/
async getProductInfo(req, res, next) {
try {
const { url, screenshot, allSkus } = req.query;

// 参数验证
if (!url) {
return res.status(400).json({
success: false,
error: {
message: '商品URL是必需的',
statusCode: 400
}
});
}

// 调用爬虫服务
const productInfo = await crawlerService.crawlProductInfo(
url,
screenshot === 'true',
allSkus === 'true'
);

res.json({
success: true,
data: productInfo
});
} catch (error) {
next(error);
}
}
}

module.exports = new CrawlerController();

+ 0
- 25
src/middlewares/errorHandler.js Bestand weergeven

@@ -1,25 +0,0 @@
/**
* 全局错误处理中间件
* @param {Error} err - 错误对象
* @param {Request} req - 请求对象
* @param {Response} res - 响应对象
* @param {NextFunction} next - 下一个中间件函数
*/
const errorHandler = (err, req, res, next) => {
console.error('错误:', err);

// 默认错误状态码和消息
const statusCode = err.statusCode || 500;
const message = err.message || '服务器内部错误';

res.status(statusCode).json({
success: false,
error: {
message,
statusCode,
...(process.env.NODE_ENV === 'development' && { stack: err.stack })
}
});
};

module.exports = { errorHandler };

+ 2
- 3
src/routes/crawler.js Bestand weergeven

@@ -16,7 +16,7 @@ const rateLimiter = createRateLimiter({
*/
router.get('/product/info', rateLimiter, validateRequest, async (req, res, next) => {
try {
const { url, platform, needScreenshot, includeAllSkus } = req.query;
const { url, platform, needScreenshot } = req.query;
// 创建爬虫实例
const crawler = CrawlerFactory.createCrawler(platform, config[platform]);
@@ -24,8 +24,7 @@ router.get('/product/info', rateLimiter, validateRequest, async (req, res, next)
// 抓取商品信息
const data = await crawler.crawl(
url,
needScreenshot === 'true',
includeAllSkus === 'true'
needScreenshot === 'true'
);
res.json({

+ 0
- 12
src/routes/index.js Bestand weergeven

@@ -1,12 +0,0 @@
const express = require('express');
const router = express.Router();
const crawlerController = require('../controllers/crawlerController');

/**
* @route GET /api/product
* @desc 获取商品信息
* @access Public
*/
router.get('/product', crawlerController.getProductInfo);

module.exports = router;

+ 204
- 0
src/services/CrawlerApiSDK.js Bestand weergeven

@@ -0,0 +1,204 @@
const axios = require('axios');

/**
* 商品爬虫API SDK
* 提供爬虫服务的完整客户端接口
*/
class CrawlerApiSDK {
/**
* 构造函数
* @param {Object} options - 配置选项
* @param {string} options.baseURL - API基础URL
* @param {number} options.timeout - 请求超时时间(毫秒)
* @param {Object} options.headers - 自定义请求头
* @param {boolean} options.debug - 是否开启调试模式
*/
constructor(options = {}) {
this.options = {
baseURL: options.baseURL || 'http://localhost:8991',
timeout: options.timeout || 60000,
headers: options.headers || {},
debug: options.debug || false
};

// 创建axios实例
this.httpClient = axios.create({
baseURL: this.options.baseURL,
timeout: this.options.timeout,
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json',
...this.options.headers
}
});

// 请求拦截器
this.httpClient.interceptors.request.use(
config => {
if (this.options.debug) {
console.log('API请求:', {
method: config.method.toUpperCase(),
url: config.url,
params: config.params,
headers: config.headers
});
}
return config;
},
error => {
if (this.options.debug) {
console.error('API请求错误:', error);
}
return Promise.reject(error);
}
);

// 响应拦截器
this.httpClient.interceptors.response.use(
response => {
if (this.options.debug) {
console.log('API响应:', {
status: response.status,
statusText: response.statusText,
url: response.config.url,
data: response.data
});
}
return response.data;
},
error => {
return this._handleRequestError(error);
}
);
}

/**
* 处理请求错误
* @private
* @param {Error} error - 错误对象
* @returns {Promise<Error>} 处理后的错误
*/
_handleRequestError(error) {
if (this.options.debug) {
console.error('API响应错误:', error);
}

let errorMessage = '请求失败';
let errorCode = 'UNKNOWN_ERROR';
let statusCode = 500;
let originalError = null;

if (error.response) {
// 服务器返回了错误状态码
statusCode = error.response.status;
const errorData = error.response.data;

if (errorData) {
if (errorData.error) {
errorMessage = typeof errorData.error === 'string'
? errorData.error
: errorData.error.message || errorMessage;
}
errorCode = errorData.code || `HTTP_${statusCode}`;
}
} else if (error.request) {
// 请求已发送但没有收到响应
errorMessage = '服务器无响应,请检查网络连接';
errorCode = 'CONNECTION_ERROR';
statusCode = 0;
} else {
// 设置请求时发生错误
errorMessage = error.message || errorMessage;
errorCode = 'REQUEST_SETUP_ERROR';
}

originalError = error;

// 创建增强的错误对象
const enhancedError = new Error(errorMessage);
enhancedError.statusCode = statusCode;
enhancedError.code = errorCode;
enhancedError.originalError = originalError;

return Promise.reject(enhancedError);
}

/**
* 获取商品信息
* @param {Object} params - 请求参数
* @param {string} params.url - 商品URL
* @param {string} params.platform - 平台名称,如'amazon'
* @param {boolean} [params.needScreenshot=false] - 是否需要截图
* @returns {Promise<Object>} 商品信息
* @throws {Error} 如果请求失败
*/
async getProductInfo(params) {
// 参数验证
if (!params || typeof params !== 'object') {
throw new Error('参数必须是一个对象');
}

if (!params.url) {
throw new Error('商品URL不能为空');
}

if (!params.platform) {
throw new Error('平台名称不能为空');
}

// 构建查询参数
const queryParams = {
url: params.url,
platform: params.platform.toLowerCase(),
needScreenshot: params.needScreenshot === true ? 'true' : 'false'
};

try {
const response = await this.httpClient.get('/api/product/info', {
params: queryParams
});
if (!response.success) {
throw new Error(response.error || '获取商品信息失败');
}
return response.data;
} catch (error) {
throw error;
}
}

/**
* 获取支持的平台列表
* @returns {Promise<string[]>} 支持的平台列表
* @throws {Error} 如果请求失败
*/
async getSupportedPlatforms() {
try {
const response = await this.httpClient.get('/api/platforms');
if (!response.success) {
throw new Error(response.error || '获取平台列表失败');
}
return response.data;
} catch (error) {
throw error;
}
}

/**
* 健康检查
* @returns {Promise<Object>} 健康状态信息
* @throws {Error} 如果请求失败
*/
async healthCheck() {
try {
return await this.httpClient.get('/health');
} catch (error) {
throw error;
}
}
}

module.exports = CrawlerApiSDK;

+ 0
- 196
src/services/crawlerService.js Bestand weergeven

@@ -1,196 +0,0 @@
const { chromium } = require('playwright');
const path = require('path');
const fs = require('fs').promises;
const FormData = require('form-data');
const fetch = require('node-fetch');

/**
* Amazon 商品信息爬虫服务(Playwright)
*/
class CrawlerService {
constructor() {
// 设置静态文件访问的基础URL
this.UPLOAD_URL = 'https://apibase.sohomall.jp/uploaders?scene=goods';
}

async initBrowser() {
return await chromium.launch({ headless: true });
}

async createScreenshotDir() {
const dir = path.join(process.cwd(), 'screenshots');
await fs.mkdir(dir, { recursive: true });
return dir;
}

/**
* 获取单个 SKU 信息(含优惠券扣减)
*/
async getSingleSkuInfo(page) {
// 添加控制台日志监听
page.on('console', msg => console.log('Browser Console:', msg.text()));
await page.waitForTimeout(500);
let couponValue = 0;
const couponTrigger = await page.$('.a-declarative[data-action="a-modal"], .couponLabelText');
if (couponTrigger) {
try {
await couponTrigger.click();
await page.waitForTimeout(500);
} catch {}
try {
const couponText = await page.$eval('.couponLabelText', el => el.textContent.trim());
const m = couponText.match(/¥\s*([\d,]+)/);
couponValue = m ? parseInt(m[1].replace(/,/g, '')) : 0;
console.log('Found coupon value:', couponValue);
} catch {}
// 尝试关闭弹窗
try { await page.click('button.a-modal-close', { timeout: 1000 }); } catch { await page.keyboard.press('Escape'); }
}

return await page.evaluate(couponValue => {
const title = document.querySelector('#productTitle')?.textContent.trim() || null;
let priceText = document.querySelector('span.a-price > span.a-offscreen')?.textContent.trim()
|| document.querySelector('#priceblock_dealprice')?.textContent.trim()
|| document.querySelector('#priceblock_saleprice')?.textContent.trim()
|| document.querySelector('#priceblock_ourprice')?.textContent.trim()
|| null;
if (priceText?.includes('ポイント')) priceText = priceText.split('ポイント')[0].trim();
priceText = priceText.replace('¥', '');
priceText = priceText.replace('JP¥', '');
const m = priceText?.match(/\s*([\d,]+)/);
let priceVal = m ? parseInt(m[1].replace(/,/g, '')) : null;
if (priceVal != null) priceVal -= couponValue;
console.log('priceText', priceText);
console.log('priceVal', priceVal);
console.log('couponValue', couponValue);
const price = priceVal != null ? `${priceVal.toLocaleString()}` : priceText;
const url = window.location.href;
const asin = url.match(/\/dp\/([A-Z0-9]{10})/)?.[1] || null;
return { title, price, sku: asin, url, remark: `Original Price: JP¥${priceText} Coupon Price: JP¥${couponValue}` };
}, couponValue);
}

/**
* 获取所有 SKU 组合信息(笛卡尔积方式)
*/
async getAllSkuInfo(page) {
// 等待 SKU 分组加载
await page.waitForSelector('.a-cardui-body #twister-plus-inline-twister > .a-section');
const groupEls = await page.$$('.a-cardui-body #twister-plus-inline-twister > .a-section');
const groups = [];
for (const groupEl of groupEls) {
const btns = await groupEl.$$('.a-button-inner .a-button-input');
if (btns.length) groups.push(btns);
}
if (!groups.length) return [await this.getSingleSkuInfo(page)];

// 生成笛卡尔积组合
const cartesian = (arr1, arr2) => arr1.flatMap(a => arr2.map(b => [...a, b]));
let combos = groups[0].map(b => [b]);
for (let i = 1; i < groups.length; i++) combos = cartesian(combos, groups[i]);

const results = [];
for (const combo of combos) {
// 依次点击每个维度按钮
for (const btn of combo) {
await btn.click();
await page.waitForLoadState('networkidle');
}
// 获取当前组合信息
const info = await this.getSingleSkuInfo(page);
// 添加 variants 字段
info.variants = await Promise.all(
combo.map(btn => btn.getAttribute('aria-label') || btn.getAttribute('title'))
);
results.push(info);
}
return results;
}

/**
* 将图片转换为base64
* @param {string} imagePath - 图片路径
* @returns {Promise<string>} base64字符串
*/
async convertImageToBase64(imagePath) {
try {
const imageBuffer = await fs.readFile(imagePath);
return `data:image/png;base64,${imageBuffer.toString('base64')}`;
} catch (error) {
console.error('转换图片到base64失败:', error);
return null;
}
}

/**
* 上传图片到服务器
* @param {string} imagePath - 图片路径
* @returns {Promise<string>} 上传后的图片URL
*/
async uploadImage(imagePath) {
try {
const form = new FormData();
form.append('file', await fs.readFile(imagePath), {
filename: path.basename(imagePath),
contentType: 'image/png'
});

const response = await fetch(this.UPLOAD_URL, {
method: 'POST',
body: form
});

if (!response.ok) {
throw new Error(`上传失败: ${response.statusText}`);
}

const result = await response.json();
return result.url; // 假设服务器返回的数据中包含url字段
} catch (error) {
console.error('上传图片失败:', error);
return null;
}
}

/**
* 主方法:抓取商品信息
*/
async crawlProductInfo(url, needScreenshot = false, includeAllSkus = false) {
const browser = await this.initBrowser();
const context = await browser.newContext({ locale: 'ja-JP', userAgent: 'Mozilla/5.0' });
const page = await context.newPage();
await page.goto(url.split('?')[0], { waitUntil: 'networkidle' });

const data = includeAllSkus
? await this.getAllSkuInfo(page)
: [await this.getSingleSkuInfo(page)];

if (needScreenshot) {
const dir = await this.createScreenshotDir();
const filename = `${Date.now()}.png`;
const shot = path.join(dir, filename);
await page.screenshot({ path: shot, fullPage: true });
// 上传图片并获取URL
const imageUrl = await this.uploadImage(shot);
// 更新数据,添加图片URL
data.forEach(item => {
item.screenshotUrl = imageUrl;
});

// 删除临时文件
try {
await fs.unlink(shot);
} catch (error) {
console.error('删除临时截图文件失败:', error);
}
}

await browser.close();
return data;
}
}

module.exports = new CrawlerService();

+ 4
- 44
src/services/crawlers/amazon/AmazonCrawler.js Bestand weergeven

@@ -210,7 +210,7 @@ class AmazonCrawler extends BaseCrawler {
// 等待弹窗消失
await this.page.waitForTimeout(500);
} catch (clickError) {
console.log('优惠券点击或处理失败:', clickError.message);
console.log('没有优惠券', clickError.message);
// 如果点击失败,尝试按ESC键关闭可能的弹窗
try {
await this.page.keyboard.press('Escape');
@@ -322,52 +322,13 @@ class AmazonCrawler extends BaseCrawler {
}
}

/**
* 获取所有SKU组合信息
* @returns {Promise<Array>} SKU信息数组
*/
async getAllSkuInfo() {
try {
const groups = await this.getVariants();
if (!groups.length) return [await this.getSingleSkuInfo()];

// 生成笛卡尔积组合
const cartesian = (arr1, arr2) => arr1.flatMap(a => arr2.map(b => [...a, b]));
let combos = groups[0].map(b => [b]);
for (let i = 1; i < groups.length; i++) {
combos = cartesian(combos, groups[i]);
}

const results = [];
for (const combo of combos) {
// 依次点击每个维度按钮
for (const btn of combo) {
await btn.click();
await this.page.waitForLoadState('networkidle');
}
// 获取当前组合信息
const info = await this.getSingleSkuInfo();
info.variants = await Promise.all(
combo.map(btn => btn.getAttribute('aria-label') || btn.getAttribute('title'))
);
results.push(info);
}
return results;
} catch (error) {
throw new CrawlerError('获取所有SKU信息失败', 'ALL_SKU_INFO_GET_ERROR', 'amazon', error);
}
}

/**
* 主方法:抓取商品信息
* @param {string} url - 商品URL
* @param {boolean} needScreenshot - 是否需要截图
* @param {boolean} includeAllSkus - 是否包含所有SKU
* @returns {Promise<Array>} 商品信息数组
*/
async crawl(url, needScreenshot = false, includeAllSkus = false) {
async crawl(url, needScreenshot = false) {
try {
await this.initBrowser();
@@ -389,9 +350,8 @@ class AmazonCrawler extends BaseCrawler {
// 导航到目标页面
await this.navigateWithRetry(url.split('?')[0]);

const data = includeAllSkus
? await this.getAllSkuInfo()
: [await this.getSingleSkuInfo()];
// 只获取单个SKU信息
const data = [await this.getSingleSkuInfo()];

if (needScreenshot) {
try {

+ 11
- 60
src/services/crawlers/base/BaseCrawler.js Bestand weergeven

@@ -1,6 +1,6 @@
const { chromium } = require('playwright');
const path = require('path');
const fs = require('fs').promises;
const { chromium } = require("playwright");
const path = require("path");
const fs = require("fs").promises;

/**
* 基础爬虫类
@@ -19,13 +19,14 @@ class BaseCrawler {
* @returns {Promise<void>}
*/
async initBrowser() {
this.browser = await chromium.launch({
this.browser = await chromium.launch({
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
args: ["--no-sandbox", "--disable-setuid-sandbox"],
});
this.context = await this.browser.newContext({
locale: 'ja-JP',
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
this.context = await this.browser.newContext({
locale: "ja-JP",
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
});
this.page = await this.context.newPage();
}
@@ -48,60 +49,10 @@ class BaseCrawler {
* @returns {Promise<string>} 截图目录路径
*/
async createScreenshotDir() {
const dir = path.join(process.cwd(), 'screenshots');
const dir = path.join(process.cwd(), "screenshots");
await fs.mkdir(dir, { recursive: true });
return dir;
}

/**
* 获取商品信息
* @param {string} url - 商品URL
* @returns {Promise<Object>} 商品信息
*/
async crawl(url) {
throw new Error('Method not implemented');
}

/**
* 解析价格
* @param {string} priceText - 价格文本
* @returns {Promise<number>} 解析后的价格
*/
async parsePrice(priceText) {
throw new Error('Method not implemented');
}

/**
* 处理优惠券
* @returns {Promise<number>} 优惠券金额
*/
async handleCoupon() {
throw new Error('Method not implemented');
}

/**
* 获取商品标题
* @returns {Promise<string>} 商品标题
*/
async getTitle() {
throw new Error('Method not implemented');
}

/**
* 获取商品SKU
* @returns {Promise<string>} 商品SKU
*/
async getSku() {
throw new Error('Method not implemented');
}

/**
* 获取商品变体信息
* @returns {Promise<Array>} 变体信息数组
*/
async getVariants() {
throw new Error('Method not implemented');
}
}

module.exports = BaseCrawler;
module.exports = BaseCrawler;

+ 0
- 19
src/services/errors/CrawlerError.js Bestand weergeven

@@ -17,25 +17,6 @@ class CrawlerError extends Error {
this.originalError = originalError;
this.timestamp = new Date().toISOString();
}

/**
* 转换为JSON对象
* @returns {Object} 错误信息对象
*/
toJSON() {
return {
name: this.name,
message: this.message,
code: this.code,
platform: this.platform,
timestamp: this.timestamp,
stack: this.stack,
originalError: this.originalError ? {
message: this.originalError.message,
stack: this.originalError.stack
} : null
};
}
}

module.exports = CrawlerError;

+ 118
- 0
src/services/productApiClient.js Bestand weergeven

@@ -0,0 +1,118 @@
const axios = require('axios');

/**
* 商品API客户端服务
* 用于调用爬虫API获取商品信息
*/
class ProductApiClient {
/**
* 构造函数
* @param {Object} options - 配置选项
* @param {string} options.baseURL - API基础URL
* @param {number} options.timeout - 请求超时时间(毫秒)
* @param {Object} options.headers - 请求头
*/
constructor(options = {}) {
this.baseURL = options.baseURL || 'http://localhost:8991';
this.timeout = options.timeout || 60000;
this.headers = options.headers || {
'Content-Type': 'application/json',
'Accept': 'application/json'
};
// 创建axios实例
this.client = axios.create({
baseURL: this.baseURL,
timeout: this.timeout,
headers: this.headers
});
// 添加响应拦截器
this.client.interceptors.response.use(
response => response.data,
error => this.handleError(error)
);
}
/**
* 处理请求错误
* @private
* @param {Error} error - 错误对象
* @throws {Error} 处理后的错误
*/
handleError(error) {
let errorMessage = '获取商品信息失败';
let statusCode = 500;
if (error.response) {
// 服务器返回了错误状态码
statusCode = error.response.status;
const errorData = error.response.data;
if (errorData && errorData.error) {
errorMessage = typeof errorData.error === 'string'
? errorData.error
: errorData.error.message || errorMessage;
}
} else if (error.request) {
// 请求已发送但没有收到响应
errorMessage = '服务器无响应,请检查网络连接';
statusCode = 0;
}
const enhancedError = new Error(errorMessage);
enhancedError.statusCode = statusCode;
enhancedError.originalError = error;
return Promise.reject(enhancedError);
}
/**
* 获取商品信息
* @param {Object} params - 请求参数
* @param {string} params.url - 商品URL
* @param {string} params.platform - 平台名称(amazon)
* @param {boolean} [params.needScreenshot=false] - 是否需要截图
* @returns {Promise<Object>} 商品信息
*/
async getProductInfo(params) {
if (!params.url) {
throw new Error('商品URL不能为空');
}
if (!params.platform) {
throw new Error('平台名称不能为空');
}
// 构建查询参数
const queryParams = {
url: params.url,
platform: params.platform,
needScreenshot: params.needScreenshot === true ? 'true' : 'false'
};
try {
const response = await this.client.get('/api/product/info', { params: queryParams });
return response.data;
} catch (error) {
console.error('获取商品信息失败:', error);
throw error;
}
}
/**
* 获取支持的平台列表
* @returns {Promise<string[]>} 平台列表
*/
async getSupportedPlatforms() {
try {
const response = await this.client.get('/api/platforms');
return response.data;
} catch (error) {
console.error('获取平台列表失败:', error);
throw error;
}
}
}

module.exports = ProductApiClient;

+ 97
- 0
template.html Bestand weergeven

@@ -0,0 +1,97 @@
<!DOCTYPE html>
<html lang="ja">

<head>
<meta charset="UTF-8">
<title>商品价格变动通知</title>
</head>

<body
style="margin: 0; padding: 0; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif; background-color: #f5f7fa; color: #333333;">
<table width="100%" cellpadding="0" cellspacing="0" style="background-color: #f5f7fa; padding: 30px 0;">
<tr>
<td align="center">
<table width="700" cellpadding="0" cellspacing="0"
style="background-color: #ffffff; border-radius: 8px; overflow: hidden; box-shadow: 0 0 10px rgba(0,0,0,0.05);">
<!-- Header -->
<tr style="background-color: #004b9b;">
<td style="padding: 20px 30px; color: white; font-size: 20px; font-weight: bold;">
商品价格变动通知

<a style="color: white; text-decoration: none; font-size: 12px; float: right; border: 1px solid white; padding: 5px; border-radius: 5px;"
href="https://digital.sohomall.jp/system/operationGoods/operationWarnresult">立即处理</a>
</td>
</tr>

<!-- Content Table -->
<tr>
<td style="padding: 30px;">
<table width="100%" cellpadding="8" cellspacing="0"
style="border-collapse: collapse; font-size: 14px;">
<tr>
<td
style="background-color: #f2f4f6; font-weight: bold; vertical-align: top; width: 160px;">
监控平台</td>
<td>Amazon</td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品名称
</td>
<td>【メモリ DDR4】GIGASTONE 16GBx2枚 (32GB Kit) DDR4 2666MHz (2400MHz or 2133MHz)
PC4-21300 (PC4-19200/17000) CL19 1.2V UDIMM 288 ピン アンバッファー 非 ECC PC デスクトップ専用
メモリモジュール アップグレード</td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品编号
</td>
<td>B08KGRRSKH</td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品链接
</td>
<td><a href="https://www.amazon.co.jp/dp/B08KGRRSKH"
style="color: #004b9b; text-decoration: none;">点击查看商品</a></td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">原价格
</td>
<td><span style="color: #666;">¥8280.00</span></td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">新价格
</td>
<td><span style="color: #d32f2f; font-weight: bold;">¥8280.00</span></td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">预警时间
</td>
<td>2025-05-15 10:47:19</td>
</tr>
<tr>
<td style="background-color: #f2f4f6; font-weight: bold; vertical-align: top;">商品快照
</td>
<td>
<img src="https://img.beical.com/goods/1747273525708-7D65E7A038F.png" alt="商品快照"
width="300" style="border: 1px solid #ddd; border-radius: 4px;" />
</td>
</tr>
</table>
</td>
</tr>

<!-- Footer -->
<tr>
<td
style="padding: 20px 30px; font-size: 12px; color: #999999; text-align: center; background-color: #f9f9f9;">
时区:日本东京 金额单位:日元 <br>
本邮件由系统(digital.sohomall.jp)自动发出,请勿直接回复。如有问题,请联系软件团队。
</td>
</tr>

</table>
</td>
</tr>
</table>
</body>

</html>

Laden…
Annuleren
Opslaan