Allow crawler to also send the configured cookies (#3472)

The crawler should open pages with the same setup in order to get full results. In my case an authentication cookie is needed, to properly open the page and see its full content (including crawlable links).
This commit is contained in:
dammg 2021-10-07 20:19:00 +02:00 committed by GitHub
parent 0b3bcf7d73
commit ad44d6290d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 20 additions and 0 deletions

View File

@ -18,6 +18,9 @@ module.exports = {
this.basicAuth = options.browsertime
? options.browsertime.basicAuth
: undefined;
this.cookie = options.browsertime.cookie
? options.browsertime.cookie
: undefined;
},
processMessage(message, queue) {
const make = this.make;
@ -44,6 +47,23 @@ module.exports = {
crawler.respectRobotsTxt = false;
}
function addCookie(cookie) {
const cookieSplit = cookie.split('=');
if (cookieSplit.length === 2) {
crawler.cookies.add(cookieSplit[0], cookieSplit[1]);
}
}
if (this.cookie) {
if (Array.isArray(this.cookie)) {
for (let e of this.cookie) {
addCookie(e);
}
} else if (typeof this.cookie === 'string') {
addCookie(this.cookie);
}
}
crawler.addFetchCondition(queueItem => {
const extension = path.extname(queueItem.path);
// Don't try to download these, based on file name.