GET请求获取页面内容

GET request to obtain page's content

我正在尝试执行与 python 等效的 C: requests.get('http://test.com')

我首先使用 getaddrinfo() 将主机名(它有 4 个 ips)解析为 ip,然后 server.sin_addr.s_addr = inet_addr(ip); 并成功连接(没有给出错误)。但是当我尝试通过发送 "GET / HTTP/1.1\r\n\r\n 请求来显示页面时,它基本上是 returns 404 错误(找不到页面)的内容。这是函数:


void foo ()
{
    struct addrinfo     hints;
    struct addrinfo     *result = NULL;

    ZeroMemory(&hints, sizeof(hints));
    hints.ai_family     = AF_UNSPEC;
    hints.ai_socktype   = SOCK_STREAM;
    hints.ai_protocol   = IPPROTO_TCP;

    char host[256], port [256];
    strcpy(host, "www.test.com");
    strcpy(port, "80");
    getaddrinfo(host, port, &hints, &result);

    struct sockaddr_in  *sockaddr_ipv4 = (struct sockaddr_in *)result->ai_addr;
    char ip [256];
    strcpy(ip, inet_ntoa(sockaddr_ipv4->sin_addr));

    struct sockaddr_in server;
    SOCKET s = socket(AF_INET , SOCK_STREAM , 0 );
    char *message , server_reply[2000];
    int recv_size;

    server.sin_addr.s_addr = inet_addr(ip);
    server.sin_family = AF_INET;
    server.sin_port = htons( 80 );

    if (connect(s , (struct sockaddr *)&server , sizeof(server)) < 0)
    {
        puts("connect error");
        return 1;
    }
    message = "GET / HTTP/1.1\r\n\r\n";
    if( send(s , message , strlen(message) , 0) < 0)
    {
        puts("Send failed");
        return 1;
    }
    if((recv_size = recv(s , server_reply , 2000 , 0)) == SOCKET_ERROR)
    {
        puts("recv failed");
    }
    server_reply[recv_size] = '[=10=]';
    puts(server_reply);*/
    system("PAUSE");
}

结果

HTTP/1.1 404 Not Found Date: Fri, 15 Sep 2017 03:19:41 GMT Content-Type: text/html; charset=UTF-8 Server: ghs Content-Length: 1561 X-XSS-Protection: 1; mode=block X-Frame-Options: SAMEORIGIN

Error 404 (Not Found)!!1
{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{backgrou nd:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height :180px;padding:30px 0 15px} > body{background:url(//www.google.com/images/error s/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflo w:hidden}ins{color:#777;text-decoration:none}a img{border:0}@media screen and (m ax-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0 }}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo _color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-reso lution:192dpi){#logo{background:url(//www.google.com/images/branding/googlelogo/ 2x/googlelogo_color_150x54dp.png) no-repeat 0% 0%/100% 100%;-moz-border-image:ur l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) 0}}@media only screen and (-webkit-min-device-pixel-ratio:2){#logo{background:ur l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) no-repeat;-webkit-background-size:100% 100%}}#logo{display:inline-block;height:5 4px;width:150px}

404. Thata?Ts an error.

The requested URL / was not found on this server. Thata? Ts all we know.


我做错了什么?我该如何处理?

在 HTTP/1.1 中,您需要指定一个 Host header。在 HTTP/1.0 内你没有。因此,您必须将其更改为:

 GET / HTTP/1.0\r\n\r\n

 GET / HTTP/1.1\r\n
 Host: the.hostname.com\r\n\r\n

进行此更改是因为虚拟主机名变得如此普遍。