node.js 中的 Http 请求重定向和 cookie 处理

Http request redirect and cookie handling in node.js

我正在 node.js 中编写一个应用程序,它需要向外部网站发出 http 请求,并且需要包括从这些网站登录和管理 cookie 的功能,以便 session id 始终存在于任何后续请求中 headers.

在 Java 中执行类似任务时,使用 java.net.CookieHandler 和 java.net.CookieManager 以及 java.net.HttpURLConnection 来发出请求很简单(我可以提供示例代码因为这是有用的,但现在不想把这个线程搞得太多,因为重点应该放在 node.js 实现上):每次发出请求时,cookie 都会根据以下内容正确更新和维护Set-Cookie 响应 header 符合预期。

对于 node.js 应用程序,我尝试将 restler 用于 httq 请求 ^3.2.2 和 cookie-manager ^0.0.19。这似乎需要在发送每个请求时在请求 header 中手动设置 cookie,并在每次请求完成时根据响应 header 更新 cookie。登录请求示例代码:

var _ = require('lodash'),
    restler = require('restler'),
    CM = require('cookie-manager'),
    cm = new CM();

var url = 'https://' + host1 + '/page';
    restlerOptions = {
        //Set the cookie for host1 in the request header
        headers : {'Cookie': cm.prepare( host1 )},
        followredirects: true,
        timeout: 5000,
        multipart: false,
        //post vars defined elsewhere for the request
        data: postVars
        };

//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
    if (res.headers["set-cookie"] != null){
        //Loop through response cookies and add to cookie store for host1
        cm.store(
            host1,_.map(res.headers["set-cookie"], function(cookie){
                    return cookie.split(';')[0];
                }, "").join(";")
        );
    }
    successcallback(data,res);
}).on("timeout",function(){
    timeoutcallback();
}).on("error",function(err){
    errorcallback(err);
});

我面临的问题是重定向:有时第 3 方网站的登录页面涉及重定向到新的 host/sub 域等。应该发生的是 follow-up GET 请求应该发出到新主机,并且应该为重定向主机管理一个新的 cookie。最终重定向应该 return 到原始主机,并且原始 cookie 应该仍在使用中。此过程的示例请求 headers:

Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host

Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page

Req2 headers:
GET https://host2/page HTTP/1.1
Host: host2
<no cookie> //No cookie set yet for new host

Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result

Req3 headers:
GET https://host1/result HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; //Cookies from Re1 response appended for host1

Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6

Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie1=val1; host1-cookie2=val2; host1-cookie3=val5; host1-cookie4=val6 //All cookies set as expected for host1

我看到 3 个问题:

  1. 重定向后跟 POST
  2. 与原始请求 header 设置的相同 cookie 用于所有后续请求,无论 follow-up 主机更改或重定向响应 headers 设置的任何 cookie (似乎只有在收到状态为 200 的响应后它们才会被设置。)
  3. 我上面使用的 cookie 设置代码应该遍历所有 "Set-cookie" header,并在 cookie 中设置每个字符串的第一部分。但是,它似乎只在遇到第一个 "Set-Cookie" header 时才这样做。

示例如下:

Req1 headers:
POST https://host1/page HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0 //Cookie already present for host

Req1 response:
HTTP/1.1 302 Found
Set-cookie: host1-cookie1=val1
Set-cookie: host1-cookie2=val2
Location: https://host2/page

Req2 headers:
POST https://host2/page HTTP/1.1 //This should be GET not POST!
Host: host2
Cookie: host1-cookie0=val0 //This should not be set!

Req2 response:
HTTP/1.1 302 Found
Set-cookie: host2-cookie1=val3
Set-cookie: host2-cookie2=val4
Location: https://host1/result

Req3 headers:
POST https://host1/result HTTP/1.1 //This should be GET not POST!
Host: host1
Cookie: host1-cookie0=val0 //Req1 response cookies not set!

Req3 response:
HTTP/1.1 200 OK
Set-cookie: host1-cookie3=val5
Set-cookie: host1-cookie4=val6

Req4 headers:
GET https://host1/newpage HTTP/1.1
Host: host1
Cookie: host1-cookie0=val0; host1-cookie3=va51; //Only first cookie from Req3 response is appended

这是 restler/cookie-manager 以这种方式使用库的限制,还是该方法需要更智能(例如,不使用自动重定向,并使用新 cookie 以 GET 方式手动发送后续请求)? 尽管正在构建的应用程序被限制为 node.js 中的 运行,但对正在使用的库没有任何限制,因此如果切换到其他 http/cookie 管理库是明智的,我愿意这样做。

1) 为了防止使用 POST 自动重定向 follow-up,我必须将 "followRedirects: false" 添加到初始请求选项,然后如果响应代码是,则手动重新提交 GET 请求[301,302,303] 之一。

2) 由于重定向是手动完成的,因此我能够根据每个请求的新域手动设置 cookie。

3) 无需从每个 "Set-cookie" header 中提取值并将它们附加到单个字符串中 - cookie-manager 会自动执行此操作。

有效的新代码(结合上面的修复 1、2、3):

var _ = require('lodash'),
    restler = require('restler'),
    CM = require('cookie-manager'),
    cm = new CM();

var url = 'https://' + host1 + '/page';
    restlerOptions = {
        //Set the cookie for host1 in the request header
        headers : {'Cookie': cm.prepare( host1 )},
        followRedirects: false,
        timeout: 5000,
        multipart: false,
        //post vars defined elsewhere for the request
        data: postVars
        };

//Various callback functions defined elsewhere for each request
restler.post(url,restlerOptions).on('complete',function(data,res){
    cm.store(host1, res.headers["set-cookie"]);
    if ([301,302,303].indexOf(res.statusCode) > -1){
        redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
    } else successCallback(data);
}).on("timeout",function(){
    timeoutcallback();
}).on("error",function(err){
    errorcallback(err);
});

function redirectCallback(url,successcallback,errorcallback,timeoutcallback){
    var options = {
        //Set the cookie for new host in the request header
        headers : {'Cookie': cm.prepare( getHostFromUrl(url) )}, //getHostFromUrl strips page/queryparams from URL - cookie manager doesn't seem to do this automatically
        followRedirects: false,
        timeout: 5000,
        };

    restler.get(url,restlerOptions).on('complete',function(data,res){
        cm.store(getHostFromUrl(url), res.headers["set-cookie"]);
        if ([301,302,303].indexOf(res.statusCode) > -1){
            redirectcallback(res.headers["location"],successcallback,errorcallback,timeoutcallback);
        } else successCallback(data);
    }).on("timeout",function(){
        timeoutcallback();
    }).on("error",function(err){
        errorcallback(err);
    });
}