Browse Source

refactor: 调整爬虫配置和浏览器启动参数

- 修改爬虫配置文件,增加上传超时时间至 150000 毫秒,页面加载超时时间至 600000 毫秒,元素等待时间至 100000 毫秒,网络空闲时间至 50000 毫秒,重试延迟时间至 6000 毫秒
- 更新亚马逊爬虫的上传配置,超时时间调整为 600000 毫秒
- 将浏览器启动参数中的 headless 设置为 false,以便于调试和观察爬虫行为
- 这些更改旨在提高爬虫的稳定性和调试便利性
master
lizhuang 1 month ago
parent
commit
90cfc8f287

+ 5
- 5
src/config/crawler.config.js View File

upload: { upload: {
url: 'https://apibase.sohomall.jp/uploaders', url: 'https://apibase.sohomall.jp/uploaders',
scene: 'goods', scene: 'goods',
timeout: 30000
timeout: 30000 * 5
}, },
browser: { browser: {
headless: true, headless: true,
variants: '.a-cardui-body #twister-plus-inline-twister > .a-section' variants: '.a-cardui-body #twister-plus-inline-twister > .a-section'
}, },
timeouts: { timeouts: {
pageLoad: 60000,
elementWait: 10000,
networkIdle: 5000
pageLoad: 600000,
elementWait: 100000,
networkIdle: 50000
}, },
retry: { retry: {
maxAttempts: 3, maxAttempts: 3,
delay: 2000
delay: 6000
}, },
headers: { headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',

+ 1
- 1
src/services/crawlers/amazon/AmazonCrawler.js View File

this.uploadConfig = config.common?.upload || { this.uploadConfig = config.common?.upload || {
url: 'https://apibase.sohomall.jp/uploaders', url: 'https://apibase.sohomall.jp/uploaders',
scene: 'goods', scene: 'goods',
timeout: 30000
timeout: 600000
}; };
} }



+ 1
- 1
src/services/crawlers/base/BaseCrawler.js View File

*/ */
async initBrowser() { async initBrowser() {
this.browser = await chromium.launch({ this.browser = await chromium.launch({
headless: true,
headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox'] args: ['--no-sandbox', '--disable-setuid-sandbox']
}); });
this.context = await this.browser.newContext({ this.context = await this.browser.newContext({

Loading…
Cancel
Save