无法登录网站以使用应用程序脚本抓取我的个人资料名称
Failed to log in to a website to scrape my profile name using apps script
我一直在尝试使用我的凭据登录此 website,以便使用 google 应用程序脚本抓取我的个人资料名称。状态代码为 200,我可以看到脚本能够获取 cookie。但是,我得到的结果是 Undefined
而不是个人资料名称。
这就是我正在尝试的方式:
function loginAndParseProfile() {
var link = 'https://whosebug.com/users/login?ssrc=head&returnurl=https%3a%2f%2fwhosebug.com%2f';
var options = {
"method": "get",
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
}
};
var res = UrlFetchApp.fetch(link, options);
var $ = Cheerio.load(res.getContentText());
var fkey = $("input[name='fkey']").first().attr('value');
var payload = {
'fkey': fkey,
'ssrc': 'head',
'email': 'emailaddress',
'password': 'password',
'oauth_version': '',
'oauth_server': ''
};
var options = {
"method" : "post",
'payload': payload,
'muteHttpExceptions': true,
"headers": {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
}
};
var loginURL = "https://whosebug.com/users/login?ssrc=head&returnurl=https%3a%2f%2fwhosebug.com%2f";
var resp = UrlFetchApp.fetch(loginURL,options);
console.log(resp.getResponseCode());
console.log(resp.getAllHeaders()['Set-Cookie']);
var $ = Cheerio.load(resp.getContentText());
var item = $('a.my-profile > [class^="gravatar-wrapper"]').first().attr('title');
console.log(item);
}
How can I make the script work?
禁用重定向 by setting followRedirects
to false
:
var options = {
"method" : "post",
'payload': payload,
'muteHttpExceptions': true,
"headers": {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
},
'followRedirects': false
};
从对 POST /users/login
请求的响应中获取 acct
cookie:
const acct = resp.getAllHeaders()['Set-Cookie']
.find(cookie => cookie.includes('acct=t='))
.match(/(acct=t=.*?)\s/)[1];
发出 GET /
请求并提供 acct
cookie 并获取您的个人资料名称:
const profileRequest = UrlFetchApp.fetch('https://whosebug.com', {
method: 'get',
headers: {
Cookie: acct
}
});
const $main = Cheerio.load(profileRequest.getContentText());
const myName = $main('a.my-profile > [class^="gravatar-wrapper"]').first().attr('title');
console.log(myName);
如果您的凭据正确,这应该输出 robots.txt
。
我一直在尝试使用我的凭据登录此 website,以便使用 google 应用程序脚本抓取我的个人资料名称。状态代码为 200,我可以看到脚本能够获取 cookie。但是,我得到的结果是 Undefined
而不是个人资料名称。
这就是我正在尝试的方式:
function loginAndParseProfile() {
var link = 'https://whosebug.com/users/login?ssrc=head&returnurl=https%3a%2f%2fwhosebug.com%2f';
var options = {
"method": "get",
"headers": {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36"
}
};
var res = UrlFetchApp.fetch(link, options);
var $ = Cheerio.load(res.getContentText());
var fkey = $("input[name='fkey']").first().attr('value');
var payload = {
'fkey': fkey,
'ssrc': 'head',
'email': 'emailaddress',
'password': 'password',
'oauth_version': '',
'oauth_server': ''
};
var options = {
"method" : "post",
'payload': payload,
'muteHttpExceptions': true,
"headers": {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36",
}
};
var loginURL = "https://whosebug.com/users/login?ssrc=head&returnurl=https%3a%2f%2fwhosebug.com%2f";
var resp = UrlFetchApp.fetch(loginURL,options);
console.log(resp.getResponseCode());
console.log(resp.getAllHeaders()['Set-Cookie']);
var $ = Cheerio.load(resp.getContentText());
var item = $('a.my-profile > [class^="gravatar-wrapper"]').first().attr('title');
console.log(item);
}
How can I make the script work?
禁用重定向 by setting
followRedirects
tofalse
:var options = { "method" : "post", 'payload': payload, 'muteHttpExceptions': true, "headers": { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36", }, 'followRedirects': false };
从对
POST /users/login
请求的响应中获取acct
cookie:const acct = resp.getAllHeaders()['Set-Cookie'] .find(cookie => cookie.includes('acct=t=')) .match(/(acct=t=.*?)\s/)[1];
发出
GET /
请求并提供acct
cookie 并获取您的个人资料名称:const profileRequest = UrlFetchApp.fetch('https://whosebug.com', { method: 'get', headers: { Cookie: acct } }); const $main = Cheerio.load(profileRequest.getContentText()); const myName = $main('a.my-profile > [class^="gravatar-wrapper"]').first().attr('title'); console.log(myName);
如果您的凭据正确,这应该输出 robots.txt
。