GET请求获取页面内容
GET request to obtain page's content
我正在尝试执行与 python 等效的 C:
requests.get('http://test.com')
我首先使用 getaddrinfo()
将主机名(它有 4 个 ips)解析为 ip
,然后 server.sin_addr.s_addr = inet_addr(ip);
并成功连接(没有给出错误)。但是当我尝试通过发送 "GET / HTTP/1.1\r\n\r\n
请求来显示页面时,它基本上是 returns 404 错误(找不到页面)的内容。这是函数:
void foo ()
{
struct addrinfo hints;
struct addrinfo *result = NULL;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
char host[256], port [256];
strcpy(host, "www.test.com");
strcpy(port, "80");
getaddrinfo(host, port, &hints, &result);
struct sockaddr_in *sockaddr_ipv4 = (struct sockaddr_in *)result->ai_addr;
char ip [256];
strcpy(ip, inet_ntoa(sockaddr_ipv4->sin_addr));
struct sockaddr_in server;
SOCKET s = socket(AF_INET , SOCK_STREAM , 0 );
char *message , server_reply[2000];
int recv_size;
server.sin_addr.s_addr = inet_addr(ip);
server.sin_family = AF_INET;
server.sin_port = htons( 80 );
if (connect(s , (struct sockaddr *)&server , sizeof(server)) < 0)
{
puts("connect error");
return 1;
}
message = "GET / HTTP/1.1\r\n\r\n";
if( send(s , message , strlen(message) , 0) < 0)
{
puts("Send failed");
return 1;
}
if((recv_size = recv(s , server_reply , 2000 , 0)) == SOCKET_ERROR)
{
puts("recv failed");
}
server_reply[recv_size] = '[=10=]';
puts(server_reply);*/
system("PAUSE");
}
结果
HTTP/1.1 404 Not Found Date: Fri, 15 Sep 2017 03:19:41 GMT
Content-Type: text/html; charset=UTF-8 Server: ghs Content-Length:
1561 X-XSS-Protection: 1; mode=block X-Frame-Options: SAMEORIGIN
Error 404 (Not Found)!!1
{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{backgrou
nd:#fff;color:#222;padding:15px}body{margin:7% auto
0;max-width:390px;min-height :180px;padding:30px 0 15px} >
body{background:url(//www.google.com/images/error s/robot.png) 100%
5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflo
w:hidden}ins{color:#777;text-decoration:none}a img{border:0}@media
screen and (m
ax-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0
}}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo
_color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-reso
lution:192dpi){#logo{background:url(//www.google.com/images/branding/googlelogo/
2x/googlelogo_color_150x54dp.png) no-repeat 0% 0%/100%
100%;-moz-border-image:ur
l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png)
0}}@media only screen and
(-webkit-min-device-pixel-ratio:2){#logo{background:ur
l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png)
no-repeat;-webkit-background-size:100%
100%}}#logo{display:inline-block;height:5 4px;width:150px}
404. Thata?Ts an error.
The requested URL
/
was not found on this server. Thata? Ts all we
know.
我做错了什么?我该如何处理?
在 HTTP/1.1 中,您需要指定一个 Host
header。在 HTTP/1.0 内你没有。因此,您必须将其更改为:
GET / HTTP/1.0\r\n\r\n
或
GET / HTTP/1.1\r\n
Host: the.hostname.com\r\n\r\n
进行此更改是因为虚拟主机名变得如此普遍。
我正在尝试执行与 python 等效的 C:
requests.get('http://test.com')
我首先使用 getaddrinfo()
将主机名(它有 4 个 ips)解析为 ip
,然后 server.sin_addr.s_addr = inet_addr(ip);
并成功连接(没有给出错误)。但是当我尝试通过发送 "GET / HTTP/1.1\r\n\r\n
请求来显示页面时,它基本上是 returns 404 错误(找不到页面)的内容。这是函数:
void foo ()
{
struct addrinfo hints;
struct addrinfo *result = NULL;
ZeroMemory(&hints, sizeof(hints));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
char host[256], port [256];
strcpy(host, "www.test.com");
strcpy(port, "80");
getaddrinfo(host, port, &hints, &result);
struct sockaddr_in *sockaddr_ipv4 = (struct sockaddr_in *)result->ai_addr;
char ip [256];
strcpy(ip, inet_ntoa(sockaddr_ipv4->sin_addr));
struct sockaddr_in server;
SOCKET s = socket(AF_INET , SOCK_STREAM , 0 );
char *message , server_reply[2000];
int recv_size;
server.sin_addr.s_addr = inet_addr(ip);
server.sin_family = AF_INET;
server.sin_port = htons( 80 );
if (connect(s , (struct sockaddr *)&server , sizeof(server)) < 0)
{
puts("connect error");
return 1;
}
message = "GET / HTTP/1.1\r\n\r\n";
if( send(s , message , strlen(message) , 0) < 0)
{
puts("Send failed");
return 1;
}
if((recv_size = recv(s , server_reply , 2000 , 0)) == SOCKET_ERROR)
{
puts("recv failed");
}
server_reply[recv_size] = '[=10=]';
puts(server_reply);*/
system("PAUSE");
}
结果
HTTP/1.1 404 Not Found Date: Fri, 15 Sep 2017 03:19:41 GMT Content-Type: text/html; charset=UTF-8 Server: ghs Content-Length: 1561 X-XSS-Protection: 1; mode=block X-Frame-Options: SAMEORIGIN
Error 404 (Not Found)!!1
{margin:0;padding:0}html,code{font:15px/22px arial,sans-serif}html{backgrou nd:#fff;color:#222;padding:15px}body{margin:7% auto 0;max-width:390px;min-height :180px;padding:30px 0 15px} > body{background:url(//www.google.com/images/error s/robot.png) 100% 5px no-repeat;padding-right:205px}p{margin:11px 0 22px;overflo w:hidden}ins{color:#777;text-decoration:none}a img{border:0}@media screen and (m ax-width:772px){body{background:none;margin-top:0;max-width:none;padding-right:0 }}#logo{background:url(//www.google.com/images/branding/googlelogo/1x/googlelogo _color_150x54dp.png) no-repeat;margin-left:-5px}@media only screen and (min-reso lution:192dpi){#logo{background:url(//www.google.com/images/branding/googlelogo/ 2x/googlelogo_color_150x54dp.png) no-repeat 0% 0%/100% 100%;-moz-border-image:ur l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) 0}}@media only screen and (-webkit-min-device-pixel-ratio:2){#logo{background:ur l(//www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png) no-repeat;-webkit-background-size:100% 100%}}#logo{display:inline-block;height:5 4px;width:150px}404. Thata?Ts an error.
The requested URL
/
was not found on this server. Thata? Ts all we know.
我做错了什么?我该如何处理?
在 HTTP/1.1 中,您需要指定一个 Host
header。在 HTTP/1.0 内你没有。因此,您必须将其更改为:
GET / HTTP/1.0\r\n\r\n
或
GET / HTTP/1.1\r\n
Host: the.hostname.com\r\n\r\n
进行此更改是因为虚拟主机名变得如此普遍。