Linux 套接字超时适用于 WSL,但不适用于 Ubuntu

Linux Socket timeout works on WSL, but not on Ubuntu

我尝试 运行 没有服务器的 TCP 客户端。这个想法只是定期尝试连接。 为此,客户端尝试连接到本地主机上的端口 1500。

一段代码:

    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket created\n");
    address.sin_family = AF_INET;
    address.sin_port = htons (1500);
    inet_aton (argv[1], &address.sin_addr);

    // Connect to server
    connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);
    tv.tv_sec = 2;             /* 2 seconds timeout */
    tv.tv_usec = 0;

    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
          printf ("Connection with server (%s) established \n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfull\n");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connect\n");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connect\n");
      close (create_socket);
      continue;
    }

我在 WSL 的 Ubuntu 18.04 中设置了它。在那里,代码等待 select 定义的 2 秒超时和 returns 适当的 return 值。 (超时时为 0,连接时为 1)。 connect 的 return 值在 WSL 和 VMware 上为 -1。 在 Ubuntu 18 (VMware) 中,该行没有停顿。在任何情况下,即使没有任何服务器侦听该端口,我也会立即得到一个 return 值 1.

为什么会有这种差异?

该代码后面有类似的行为:

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '[=13=]';
        printf ("Message received: %s\n", buffer);
      }
      else if (size == -1)
      {
      // on VMware, errno is 107 if there is no server, but coming to that line was not intended
        printf ("Timeout\n");
      }
      else //
      {
        printf("Server offline\n");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }

这里,在 WSL 中,recv 最多需要 2 秒,同时等待任何传入数据。 (但前提是上述块 (connect, select) 表示有效连接) 在 VMware 中,我直接获得反馈。 (即使没有连接)

它只是偶然在 WSL 上工作吗?

参数包含服务器IP,为127.0.0.1。 lsof 显示没有连接。


2020-11-18更新

这是

要求的完整代码
#include <iostream>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/socket.h>
#include <cstring>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <fcntl.h>

#define BUF 1024
using namespace std;

int main (int argc, char **argv) {
  int create_socket;
  char *buffer =  (char*)malloc(BUF);
  struct sockaddr_in address;
  int size;
  int rv;
  struct timeval tv;
  fd_set fdset;

  // HERE STARTS THE OUTER LOOP - Connect and restart connection
  do
  {
    // Create socket
    if ((create_socket=socket (AF_INET, SOCK_STREAM, PF_UNSPEC)) > 0)
      printf ("Socket created\n");
    address.sin_family = AF_INET;
    address.sin_port = htons (15000);
    inet_aton ("127.0.0.1", &address.sin_addr);

    // Connect to server
    int flags = fcntl(create_socket, F_GETFL, 0);
    if (flags == -1) return false;
    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %i\n", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED)\n", errno); break;
        default:  printf ("errno = %i (ECONNREFUSED)\n", errno); break;
      }

    }

    FD_ZERO(&fdset);
    FD_SET(create_socket, &fdset);

    tv.tv_sec = 2;
    tv.tv_usec = 0;
    rv = select(create_socket + 1, NULL, &fdset, NULL, &tv);
    if (rv == 1)
    {
        int so_error;
        socklen_t len = sizeof so_error;

        getsockopt(create_socket, SOL_SOCKET, SO_ERROR, &so_error, &len);

        if (so_error == 0)
        {
         printf ("Connection with server (%s) established \n",
          inet_ntoa (address.sin_addr));
        }
        else
        {
          printf("Error on connect: unsuccessfull\n");
          close (create_socket);
          continue;
        }
    }
    else if (rv == 0)
    {
      printf("Timeout on connect\n");
      close (create_socket);
      continue;
    }
    else
    {
      printf("Error on connect\n");
      close (create_socket);
      continue;
    }

    if (setsockopt(create_socket, SOL_SOCKET, SO_RCVTIMEO, (const char*)&tv, sizeof tv) < 0)
    {
      printf("Error on setsockopt SO_RCVTIMEO");
      exit(EXIT_FAILURE);
    }
    // INNER LOOP: Receive data
    do
    {
      size = recv(create_socket, buffer, BUF-1, 0);
      if( size > 0)
      {
        buffer[size] = '[=14=]';
        printf ("Data received: %s\n", buffer);
      }
      else if (size == -1)
      {
        printf ("Timeout\n");
      }
      else //
      {
        printf("Server offline\n");
        // GO BACK TO OUTER LOOP and reconnect
        break;
      }
    } while (strcmp (buffer, "quit\n") != 0);
    close (create_socket);
  } while (strcmp (buffer, "quit\n") != 0);
  return EXIT_SUCCESS;
}

在 WSL 中,输出是

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

然后 2 秒内什么都没有 之后

Timeout on connect
Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)

然后 2 秒内什么也没有...

VMware 中的输出

Socket created
Connect. rv = -1
errno = 111 (ECONNREFUSED)
Connection with server (127.0.0.1) established 
Timeout
Timeout
Timeout
Timeout

没有超时。

超时的想法是定期尝试连接,但不是越快越好。

errno = 111 (ECONNREFUSED)后面跟着Connection with server (127.0.0.1) established.

的时候,显然有问题

connect returns -1errno 不是 EINPROGRESS 时,你应该 使用 selectgetsockopt(...SO_ERROR...)。根据 https://man7.org/linux/man-pages/man2/connect.2.html,这仅记录在 EINPROGRESS.

在真实 Linux 和 WSL 中,您在 connect 失败后获得 errno = 111 (ECONNREFUSED)。我认为 WSL 中的超时是错误的,因为已经报告了错误(连接被拒绝),因此等待结果是没有意义的。但由于未指定行为,它可能取决于实现。

如果你想在下一次连接尝试之前有延迟,你不应该使用 select 但例如 sleep 然后重复循环。

我建议这样:

    rv = connect ( create_socket,
                    (struct sockaddr *) &address,
                    sizeof (address));

    printf ("Connect. rv = %i\n", rv);

    if (rv == -1)
    {
      switch (errno)
      {
        case ECONNREFUSED:  printf ("errno = %i (ECONNREFUSED) %s\n", errno, strerror(errno)); break;
        default:  printf ("errno = %i (other) %s\n", errno, strerror(errno)); break;
      }
      if(errno != EINPROGRESS)
      {
        sleep(10); // chose a suitable delay before next connection attempt
        continue;
      }
    }