在客户端用 getaddrinfo 连接 returns -1

Connect returns -1 with getaddrinfo on client side

我是 运行 Linux 上的客户端和服务器代码。

服务器使用 TCP/IP 与 AF_INETSOCK_STREAM 协议。

起初,在客户端,我(错误地?)使用bindgetaddrinfo返回的链表中得到正确的addr_info,返回适当的sockaddr_in。我有一个在客户端看起来像的方法:

struct sockaddr_in *hostname_to_ip(char *hostname, struct addrinfo *servinfo)
{ 
    int check_sfd;
    struct addrinfo hints, *p;
    //struct sockaddr_in* ret_value;

    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_INET;
    hints.ai_socktype = SOCK_STREAM;

    int res = getaddrinfo(hostname, NULL, &hints, &servinfo) 
    if (res != 0) {
        fprintf(stderr, "Error: error in getaddrinfo on hostname: %s\n", gai_strerror(res));
        exit(EXIT_FAILURE);
    }

    // getaddrinfo returned a linked list of relevant addresses
    // loop through the addresses and return the first one available
    for (p = servinfo; p != NULL; p = p->ai_next) {
        check_sfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
        if (checksfd < 0)
            continue;

        if (bind(check_sfd, p->ai_addr, p->ai_addrlen) == 0) { // bind successful
            return (struct sockaddr_in*) p->ai_addr;
        }
    }

    return NULL;
}

稍后连接到连接中的返回值。

代码的形式运行良好,客户端和服务器之间的连接已成功建立(并且所有功能都正常运行)。但是,由于没有正确使用 freeaddrinfo,我发生了内存泄漏。

所以我决定将代码更改为以下内容:

void hostname_to_ip(char *hostname, int *connection)
{ 
    int check_sfd;
    struct addrinfo hints, *p, *servinfo;
    //struct sockaddr_in* ret_value;

    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_INET;
    hints.ai_socktype = SOCK_STREAM;

    int res = getaddrinfo(hostname, NULL, &hints, &servinfo) 
    if (res != 0) {
        fprintf(stderr, "Error: error in getaddrinfo on hostname: %s\n", gai_strerror(s));
        exit(EXIT_FAILURE);
    }

    // getaddrinfo returned a linked list of relevant addresses
    // loop through the addresses and return the first one available
    for (p = servinfo; p != NULL; p = p->ai_next) {
        check_sfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
        if (check_sfd == -1)
            continue;

        if (connect(check_sfd, p->ai_addr, p->ai_addrlen) == 0) { // connection successful
        *connection = check_sfd;
        break;
        }

        close(check_sfd);
    }

    if (p == NULL) {
        // print error
        exit(EXIT_FAILURE);
     }

    freeaddrinfo(&servinfo);
}

但是,现在我似乎无法连接到服务器,因为 connect 总是 returns -1(p 以非 NULL 开始,所以与套接字的连接刚刚失败)。错误是 connection refused.

当我将 ai_socktype 更改为 SOCK_DGRAM 时,连接成功但客户端稍后在发送数据时失败(有意义,因为服务器正在使用 SOCK_STREAM),这是很奇怪。

有什么想法吗?客户端和服务器的代码很长,所以我没有全部附上,但我尝试添加所有相关信息。有什么不足的可以评论,我会补上的。

这是客户端的 strace 信息:

execve("./pcc_client", ["./pcc_client", "localhost", "2001", "1000"], [/* 71 vars */]) = 0
brk(NULL)                               = 0x2446000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=88205, ...}) = 0
mmap(NULL, 88205, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f0a0f4fa000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "7ELF[=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=]>[=13=][=13=][=13=][=13=]P\t[=13=][=13=][=13=][=13=][=13=]"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=1868984, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0a0f4f9000
mmap(NULL, 3971488, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f0a0ef21000
mprotect(0x7f0a0f0e1000, 2097152, PROT_NONE) = 0
mmap(0x7f0a0f2e1000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1c0000) = 0x7f0a0f2e1000
mmap(0x7f0a0f2e7000, 14752, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f0a0f2e7000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0a0f4f8000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f0a0f4f7000
arch_prctl(ARCH_SET_FS, 0x7f0a0f4f8700) = 0
mprotect(0x7f0a0f2e1000, 16384, PROT_READ) = 0
mprotect(0x601000, 4096, PROT_READ)     = 0
mprotect(0x7f0a0f510000, 4096, PROT_READ) = 0
munmap(0x7f0a0f4fa000, 88205)           = 0
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3
getsockname(3, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("0.0.0.0")}, [16]) = 0
socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 4
connect(4, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory)
close(4)                                = 0
socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 4
connect(4, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = -1 ENOENT (No such file or directory)
close(4)                                = 0
brk(NULL)                               = 0x2446000
brk(0x2467000)                          = 0x2467000
open("/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=529, ...}) = 0
read(4, "# /etc/nsswitch.conf\n#\n# Example"..., 4096) = 529
read(4, "", 4096)                       = 0
close(4)                                = 0
open("/etc/host.conf", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=92, ...}) = 0
read(4, "# The \"order\" line is only used "..., 4096) = 92
read(4, "", 4096)                       = 0
close(4)                                = 0
getpid()                                = 12014
open("/etc/resolv.conf", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=184, ...}) = 0
read(4, "# Dynamic resolv.conf(5) file fo"..., 4096) = 184
read(4, "", 4096)                       = 0
close(4)                                = 0
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=88205, ...}) = 0
mmap(NULL, 88205, PROT_READ, MAP_PRIVATE, 4, 0) = 0x7f0a0f4fa000
close(4)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
open("/lib/x86_64-linux-gnu/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = 4
read(4, "7ELF[=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=][=13=]>[=13=][=13=][=13=][=13=]0![=13=][=13=][=13=][=13=][=13=][=13=]"..., 832) = 832
fstat(4, {st_mode=S_IFREG|0644, st_size=47600, ...}) = 0
mmap(NULL, 2168600, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 4, 0) = 0x7f0a0ed0f000
mprotect(0x7f0a0ed1a000, 2093056, PROT_NONE) = 0
mmap(0x7f0a0ef19000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 4, 0xa000) = 0x7f0a0ef19000
mmap(0x7f0a0ef1b000, 22296, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f0a0ef1b000
close(4)                                = 0
mprotect(0x7f0a0ef19000, 4096, PROT_READ) = 0
munmap(0x7f0a0f4fa000, 88205)           = 0
open("/etc/hosts", O_RDONLY|O_CLOEXEC)  = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=233, ...}) = 0
read(4, "127.0.0.1\tlocalhost\n127.0.1.1\tmi"..., 4096) = 233
read(4, "", 4096)                       = 0
close(4)                                = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 2), ...}) = 0
write(1, "x\n", 2x
)                      = 2
socket(PF_INET, SOCK_STREAM, IPPROTO_TCP) = 4
write(1, "4\n", 24
)                      = 2
write(1, "\n", 1
)                       = 1
connect(4, {sa_family=AF_INET, sin_port=htons(53511), sin_addr=inet_addr("127.0.0.1")}, 16) = -1 ECONNREFUSED (Connection refused)
close(4)                                = 0
write(1, "Error: couldn't connect to hostn"..., 56Error: couldn't connect to hostname. Connection refused
) = 56
exit_group(1)                           = ?
+++ exited with 1 +++

谢谢!

感谢@MarkPlotnick,我发现问题与链表中返回地址的未设置(或错误设置)端口号有关。以下代码更新修复了它(因为我知道调用函数时的端口号):

void hostname_to_ip(char *hostname, int *connection_socket, int port_number)
{ 
    int check_sfd;
    struct addrinfo hints, *p, *servinfo;

    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_UNSPEC;//AF_INET;
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = 0;
    hints.ai_protocol = 0;

    int res = getaddrinfo(hostname, NULL, &hints, &servinfo);
    if (res != 0) {
        fprintf(stderr, "Error: error in getaddrinfo on hostname: %s\n", gai_strerror(res));
        exit(EXIT_FAILURE);
    }

    // getaddrinfo returned a linked list of relevant addresses
    // loop through the addresses and return the first one available
    for (p = servinfo; p != NULL; p = p->ai_next) {
        check_sfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
        if (check_sfd == -1)
            continue;

        struct sockaddr_in *sockstruct = (struct sockaddr_in*) p->ai_addr;
        sockstruct->sin_port = htons(port_number);
        sockstruct->sin_family = AF_INET;
        socklen_t addrsize = sizeof(struct sockaddr_in);

        if (connect(check_sfd, (struct sockaddr*)sockstruct, addrsize) != -1) { // successfuly connected
            *connection_socket = check_sfd;
            break;
        }

        close(check_sfd);
    }

    if (p == NULL) { // couldn't connect to any item in the linked list
        printf("Error: couldn't connect to hostname. %s\n", strerror(errno));
        exit(EXIT_FAILURE);
    }

    freeaddrinfo(servinfo);
}

只要编译器配合并且您不需要支持 IPv6,您对问题的修复就会奏效。但更好的解决方法是首先将端口号作为 "service" 参数传递给 getaddrinfo。您必须将其转换为字符串才能执行此操作,但您可以返回使用 AF_UNSPEC,而不必强制转换或操作 getaddrinfo 的 return 列表中的 sockaddrs:

void hostname_to_ip(char *hostname, int *connection_socket,
                    unsigned short port_number)
{
    int check_sfd;
    struct addrinfo hints, *p, *servinfo;
    char port_number_s[sizeof("65535")];

    memset(&hints, 0, sizeof(struct addrinfo));
    hints.ai_family = AF_UNSPEC;
    hints.ai_socktype = SOCK_STREAM;
    hints.ai_flags = AI_NUMERICSERV;
    hints.ai_protocol = 0;

    snprintf(port_number_s, sizeof port_number_s, "%u", port_number);

    int res = getaddrinfo(hostname, port_number_s, &hints, &servinfo);
    if (res == EAI_SYSTEM) {
        fprintf(stderr, "Error looking up %s: %s\n",
                hostname, strerror(errno));
        exit(1);
    } else if (res != 0) {
        fprintf(stderr, "Error looking up %s: %s\n",
                hostname, gai_strerror(res));
        exit(1);
    } else if (servinfo == NULL) {
        fprintf(stderr, "Error looking up %s: No addresses found\n",
                hostname);
        exit(1);
    }

    // getaddrinfo returned a linked list of relevant addresses
    // loop through the addresses and return the first one we can connect to
    for (p = servinfo; p != NULL; p = p->ai_next) {
        check_sfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
        if (check_sfd == -1)
            continue;

        if (connect(check_sfd, p->ai_addr, p->ai_addrlen)) {
            *connection_socket = check_sfd;
            freeaddrinfo(servinfo);
            return;
        }
        close(check_sfd);
    }
    // If we get here, we couldn't connect to any of the addresses.
    fprintf(stderr, "Couldn't connect to %s: %s\n", hostname, strerror(errno));
    exit(1);
}

我还修复了其他一些细微的错误:

  • port_number 应该是 unsigned short 因为它在 TCP 中就是这样。
  • 如果getaddrinfo returns EAI_SYSTEM,则需要打印strerror(errno)而不是gai_strerror(res).
  • 在某些系统上,getaddrinfo 可以声称已成功但 return 零地址。
  • 始终在有关连接到网络的错误消息中打印您尝试与之交谈的对等方的主机名。与文件操作失败时始终打印文件名的原理相同。
  • 如果在 connect 成功时在循环内部使用早期的 return ,则无需在循环后再次检查是否成功。当您要打印错误消息并退出时,不需要释放内存。

后续练习,难度从小到大:

  • 最好在失败时 return -1 或在成功时打开套接字,而不是写入输出参数或退出程序。
  • 使用 getnameinfo 打印 "Connecting to 128.52.0.2..." 消息,就像 telnet 那样。
  • 使用非阻塞套接字一次连接到所有地址,然后使用selectpoll(您的选择)获得第一个有效的并关闭所有其他地址。