- 将爬虫配置中的截图选项添加到配置文件中,包含视口尺寸、图片质量和格式,以增强截图功能的灵活性 - 在亚马逊爬虫中引入截图配置,确保在截图时使用配置中的选项,提高代码的可维护性和一致性 - 修改商品信息示例中的请求超时时间,从 30 秒增加到 60 秒,提升请求的稳定性 - 该更改旨在提高爬虫的功能性和代码的可读性,符合 SOLID 原则和命名规范master
const serverClient = { | const serverClient = { | ||||
// baseURL: "http://192.168.1.107:8080", // 本地 | // baseURL: "http://192.168.1.107:8080", // 本地 | ||||
baseURL: "https://digital.sohomall.jp/prod-api", // 外网 | baseURL: "https://digital.sohomall.jp/prod-api", // 外网 | ||||
timeout: 10000 * 30, // 30秒 | |||||
timeout: 10000 * 60, // 60秒 | |||||
params: { | params: { | ||||
pageNum: 1, | pageNum: 1, | ||||
pageSize: 500, | |||||
pageSize: 1000, | |||||
}, | }, | ||||
}; | }; | ||||
}, | }, | ||||
page: { | page: { | ||||
locale: 'ja-JP', | locale: 'ja-JP', | ||||
}, | |||||
// 截图配置 | |||||
screenshot: { | |||||
// 视口尺寸 | |||||
viewport: { | |||||
width: 1920, | |||||
height: 1080 | |||||
}, | |||||
// 截图选项 | |||||
options: { | |||||
fullPage: false, | |||||
quality: 60, // 图片质量 0-100 | |||||
type: 'png' // 图片格式: 'png' | 'jpeg' | |||||
} | |||||
} | } | ||||
}, | }, | ||||
scene: "digital-yy", | scene: "digital-yy", | ||||
timeout: 600000, | timeout: 600000, | ||||
}; | }; | ||||
// 截图配置 | |||||
this.screenshotConfig = { | |||||
// 截图选项 | |||||
options: { | |||||
fullPage: false, | |||||
quality: 60, // 图片质量 0-100 | |||||
type: "jpeg", | |||||
}, | |||||
}; | |||||
} | } | ||||
/** | /** | ||||
return dir; | return dir; | ||||
} | } | ||||
/** | |||||
* 设置页面视口尺寸 | |||||
* @param {number} width - 宽度 | |||||
* @param {number} height - 高度 | |||||
* @returns {Promise<void>} | |||||
*/ | |||||
async setViewportSize(width, height) { | |||||
try { | |||||
await this.page.setViewportSize({ width, height }); | |||||
} catch (error) { | |||||
throw new CrawlerError( | |||||
"设置视口尺寸失败", | |||||
"VIEWPORT_SIZE_ERROR", | |||||
"amazon", | |||||
error | |||||
); | |||||
} | |||||
} | |||||
/** | /** | ||||
* 上传图片到服务器 | * 上传图片到服务器 | ||||
* @param {string} imagePath - 图片路径 | * @param {string} imagePath - 图片路径 | ||||
throw new Error("上传响应格式错误"); | throw new Error("上传响应格式错误"); | ||||
} | } | ||||
console.log(response.data.url); | |||||
return response.data.url; | return response.data.url; | ||||
} catch (error) { | } catch (error) { | ||||
if (error.response) { | if (error.response) { | ||||
// 等待页面完全加载 | // 等待页面完全加载 | ||||
await this.page.waitForLoadState("networkidle"); | await this.page.waitForLoadState("networkidle"); | ||||
// 获取截图配置 | |||||
const screenshotOptions = this.screenshotConfig.options || { | |||||
fullPage: false, | |||||
}; | |||||
// 等待页面加载完成 | |||||
await this.page.waitForTimeout(500); | |||||
// 截取全页面 | // 截取全页面 | ||||
await this.page.screenshot({ | await this.page.screenshot({ | ||||
path: shot, | path: shot, | ||||
fullPage: true, | |||||
...screenshotOptions, | |||||
timeout: this.timeouts.elementWait, | timeout: this.timeouts.elementWait, | ||||
}); | }); | ||||
* @returns {Promise<void>} | * @returns {Promise<void>} | ||||
*/ | */ | ||||
async initBrowser() { | async initBrowser() { | ||||
// 获取截图配置 | |||||
const screenshotConfig = this.config?.common?.screenshot || {}; | |||||
const viewportConfig = screenshotConfig.viewport || { width: 1920, height: 1080 }; | |||||
this.browser = await chromium.launch({ | this.browser = await chromium.launch({ | ||||
headless: false, | headless: false, | ||||
args: ["--no-sandbox", "--disable-setuid-sandbox"], | args: ["--no-sandbox", "--disable-setuid-sandbox"], | ||||
locale: "ja-JP", | locale: "ja-JP", | ||||
userAgent: | userAgent: | ||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | ||||
// 使用配置中的视口尺寸 | |||||
viewport: viewportConfig | |||||
}); | }); | ||||
this.page = await this.context.newPage(); | this.page = await this.context.newPage(); | ||||
} | } |