node.js 中的 Http 请求重定向和 cookie 处理
Http request redirect and cookie handling in node.js
我正在 node.js 中编写一个应用程序,它需要向外部网站发出 http 请求,并且需要包括从这些网站登录和管理 cookie 的功能,以便 session id 始终存在于任何后续请求中 headers.
在 Java 中执行类似任务时,使用 java.net.CookieHandler 和 java.net.CookieManager 以及 java.net.HttpURLConnection 来发出请求很简单(我可以提供示例代码因为这是有用的,但现在不想把这个线程搞得太多,因为重点应该放在 node.js 实现上):每次发出请求时,cookie 都会根据以下内容正确更新和维护Set-Cookie 响应 header 符合预期。
对于 node.js 应用程序,我尝试将 restler 用于 httq 请求 ^3.2.2 和 cookie-manager ^0.0.19。这似乎需要在发送每个请求时在请求 header 中手动设置 cookie,并在每次请求完成时根据响应 header 更新 cookie。登录请求示例代码:
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followredirects: true,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
if (res.headers["set-cookie"] != null){
//Loop through response cookies and add to cookie store for host1
cm.store(
host1,_.map(res.headers["set-cookie"], function(cookie){
return cookie.split(';')[0];
}, "").join(";")
);
}
successcallback(data,res);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
我面临的问题是重定向:有时第 3 方网站的登录页面涉及重定向到新的 host/sub 域等。应该发生的是 follow-up GET 请求应该发出到新主机,并且应该为重定向主机管理一个新的 cookie。最终重定向应该 return 到原始主机,并且原始 cookie 应该仍在使用中。此过程的示例请求 headers:
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
GET https://host2/page HTTP/1.1
Host: host2
<no cookie> //No cookie set yet for new host
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
GET https://host1/result HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; //Cookies from Re1 response appended for host1
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; host1-cookie3=val5; host1-cookie4=val6 //All cookies set as expected for host1
我看到 3 个问题:
- 重定向后跟 POST
- 与原始请求 header 设置的相同 cookie 用于所有后续请求,无论 follow-up 主机更改或重定向响应 headers 设置的任何 cookie (似乎只有在收到状态为 200 的响应后它们才会被设置。)
- 我上面使用的 cookie 设置代码应该遍历所有 "Set-cookie" header,并在 cookie 中设置每个字符串的第一部分。但是,它似乎只在遇到第一个 "Set-Cookie" header 时才这样做。
示例如下:
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
POST https://host2/page HTTP/1.1 //This should be GET not POST!
Host: host2
Cookie: host1-cookie0=val0 //This should not be set!
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
POST https://host1/result HTTP/1.1 //This should be GET not POST!
Host: host1
Cookie: host1-cookie0=val0 //Req1 response cookies not set!
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie3=va51; //Only first cookie from Req3 response is appended
这是 restler/cookie-manager 以这种方式使用库的限制,还是该方法需要更智能(例如,不使用自动重定向,并使用新 cookie 以 GET 方式手动发送后续请求)?
尽管正在构建的应用程序被限制为 node.js 中的 运行,但对正在使用的库没有任何限制,因此如果切换到其他 http/cookie 管理库是明智的,我愿意这样做。
1) 为了防止使用 POST 自动重定向 follow-up,我必须将 "followRedirects: false" 添加到初始请求选项,然后如果响应代码是,则手动重新提交 GET 请求[301,302,303] 之一。
2) 由于重定向是手动完成的,因此我能够根据每个请求的新域手动设置 cookie。
3) 无需从每个 "Set-cookie" header 中提取值并将它们附加到单个字符串中 - cookie-manager 会自动执行此操作。
有效的新代码(结合上面的修复 1、2、3):
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followRedirects: false,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
cm.store(host1, res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
function redirectCallback(url,successcallback,errorcallback,timeoutcallback){
var options = {
//Set the cookie for new host in the request header
headers : {'Cookie': cm.prepare( getHostFromUrl(url) )}, //getHostFromUrl strips page/queryparams from URL - cookie manager doesn't seem to do this automatically
followRedirects: false,
timeout: 5000,
};
restler.get(url,restlerOptions).on('complete',function(data,res){
cm.store(getHostFromUrl(url), res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
}
我正在 node.js 中编写一个应用程序,它需要向外部网站发出 http 请求,并且需要包括从这些网站登录和管理 cookie 的功能,以便 session id 始终存在于任何后续请求中 headers.
在 Java 中执行类似任务时,使用 java.net.CookieHandler 和 java.net.CookieManager 以及 java.net.HttpURLConnection 来发出请求很简单(我可以提供示例代码因为这是有用的,但现在不想把这个线程搞得太多,因为重点应该放在 node.js 实现上):每次发出请求时,cookie 都会根据以下内容正确更新和维护Set-Cookie 响应 header 符合预期。
对于 node.js 应用程序,我尝试将 restler 用于 httq 请求 ^3.2.2 和 cookie-manager ^0.0.19。这似乎需要在发送每个请求时在请求 header 中手动设置 cookie,并在每次请求完成时根据响应 header 更新 cookie。登录请求示例代码:
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followredirects: true,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
if (res.headers["set-cookie"] != null){
//Loop through response cookies and add to cookie store for host1
cm.store(
host1,_.map(res.headers["set-cookie"], function(cookie){
return cookie.split(';')[0];
}, "").join(";")
);
}
successcallback(data,res);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
我面临的问题是重定向:有时第 3 方网站的登录页面涉及重定向到新的 host/sub 域等。应该发生的是 follow-up GET 请求应该发出到新主机,并且应该为重定向主机管理一个新的 cookie。最终重定向应该 return 到原始主机,并且原始 cookie 应该仍在使用中。此过程的示例请求 headers:
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
GET https://host2/page HTTP/1.1
Host: host2
<no cookie> //No cookie set yet for new host
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
GET https://host1/result HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; //Cookies from Re1 response appended for host1
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; host1-cookie3=val5; host1-cookie4=val6 //All cookies set as expected for host1
我看到 3 个问题:
- 重定向后跟 POST
- 与原始请求 header 设置的相同 cookie 用于所有后续请求,无论 follow-up 主机更改或重定向响应 headers 设置的任何 cookie (似乎只有在收到状态为 200 的响应后它们才会被设置。)
- 我上面使用的 cookie 设置代码应该遍历所有 "Set-cookie" header,并在 cookie 中设置每个字符串的第一部分。但是,它似乎只在遇到第一个 "Set-Cookie" header 时才这样做。
示例如下:
Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host
Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page
Req2 headers:
POST https://host2/page HTTP/1.1 //This should be GET not POST!
Host: host2
Cookie: host1-cookie0=val0 //This should not be set!
Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result
Req3 headers:
POST https://host1/result HTTP/1.1 //This should be GET not POST!
Host: host1
Cookie: host1-cookie0=val0 //Req1 response cookies not set!
Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6
Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie3=va51; //Only first cookie from Req3 response is appended
这是 restler/cookie-manager 以这种方式使用库的限制,还是该方法需要更智能(例如,不使用自动重定向,并使用新 cookie 以 GET 方式手动发送后续请求)? 尽管正在构建的应用程序被限制为 node.js 中的 运行,但对正在使用的库没有任何限制,因此如果切换到其他 http/cookie 管理库是明智的,我愿意这样做。
1) 为了防止使用 POST 自动重定向 follow-up,我必须将 "followRedirects: false" 添加到初始请求选项,然后如果响应代码是,则手动重新提交 GET 请求[301,302,303] 之一。
2) 由于重定向是手动完成的,因此我能够根据每个请求的新域手动设置 cookie。
3) 无需从每个 "Set-cookie" header 中提取值并将它们附加到单个字符串中 - cookie-manager 会自动执行此操作。
有效的新代码(结合上面的修复 1、2、3):
var _ = require('lodash'),
restler = require('restler'),
CM = require('cookie-manager'),
cm = new CM();
var url = 'https://' + host1 + '/page';
restlerOptions = {
//Set the cookie for host1 in the request header
headers : {'Cookie': cm.prepare( host1 )},
followRedirects: false,
timeout: 5000,
multipart: false,
//post vars defined elsewhere for the request
data: postVars
};
//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
cm.store(host1, res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
function redirectCallback(url,successcallback,errorcallback,timeoutcallback){
var options = {
//Set the cookie for new host in the request header
headers : {'Cookie': cm.prepare( getHostFromUrl(url) )}, //getHostFromUrl strips page/queryparams from URL - cookie manager doesn't seem to do this automatically
followRedirects: false,
timeout: 5000,
};
restler.get(url,restlerOptions).on('complete',function(data,res){
cm.store(getHostFromUrl(url), res.headers["set-cookie"]);
if ([301,302,303].indexOf(res.statusCode) > -1){
redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
} else successCallback(data);
}).on("timeout",function(){
timeoutcallback();
}).on("error",function(err){
errorcallback(err);
});
}