使用 MSG_ZEROCOPY 发送 udp 消息时,什么会导致返回 SO_EE_CODE_ZEROCOPY_COPIED?

What can cause returning SO_EE_CODE_ZEROCOPY_COPIED when sending udp messages with MSG_ZEROCOPY?

环境

Linux版本:Linux 5.4.0-4-amd64 Debian 5.4.19-1 x86_64 GNU/Linux

网卡分散-聚集:

scatter-gather: on tx-scatter-gather: on tx-scatter-gather-fraglist: off [fixed]

输出

sock_extended_err 代码设置为 SO_EE_CODE_ZEROCOPY_COPIED。根据Linux Kernel Doc,当设备不支持scatter-gather时I/O,会返回此代码,但你可以看到我的网卡支持并启用了scatter-gatherI/O.

链接的文档是为了显示 SO_EE_CODE_ZEROCOPY_COPIED 和 linux supports udp msg_zerocopy 版本 >= 5.0

的官方解释

那么,关于其他原因有什么想法吗?还是我的代码有误?

代码

#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/rds.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY       5
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED  1
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY    0x4000000
#endif

#define TESTSIZE 16*1024
static char payload[TESTSIZE];
static long packets, bytes, completions, expected_completions;
static int  zerocopied = -1;
static uint32_t next_completion;

static void do_setsockopt(int fd, int level, int optname, int val)
{
    if (optname == SO_ZEROCOPY) {
        printf("set so_zerocopy\n");
    }
    if (setsockopt(fd, level, optname, &val, sizeof(val)))
        error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
}

static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
{
    int ret, len, flags;
    size_t i;
    len = 0;
    for (i = 0; i < msg->msg_iovlen; i++)
        len += msg->msg_iov[i].iov_len;
    flags = MSG_DONTWAIT;
    if (do_zerocopy) {
        printf("set msg_zerocopy\n");
        flags |= MSG_ZEROCOPY;
    }
    ret = sendmsg(fd, msg, flags);
    if (ret == -1 && errno == EAGAIN)
        return false;
    if (ret == -1)
        error(1, errno, "send");

    if (len) {
        packets++;
        bytes += ret;
        if (do_zerocopy && ret)
            expected_completions++;
    }
    return true;
}

static int do_setup_tx(int domain, int type, int protocol)
{
    int fd;
    fd = socket(domain, type, protocol);
    if (fd == -1)
        error(1, errno, "socket t");
    do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
    return fd;
}


static bool do_recv_completion(int fd)
{
    struct sock_extended_err *serr;
    struct msghdr msg = {};
    struct cmsghdr *cm;
    uint32_t hi, lo, range;
    int ret, zerocopy;
    char control[100];
    msg.msg_control = control;
    msg.msg_controllen = sizeof(control);
    ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
    if (ret == -1 && errno == EAGAIN)
        return false;
    if (ret == -1)
        error(1, errno, "recvmsg notification");
    if (msg.msg_flags & MSG_CTRUNC)
        error(1, errno, "recvmsg notification: truncated");
    cm = CMSG_FIRSTHDR(&msg);
    if (!cm)
        error(1, 0, "cmsg: no cmsg");
    if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
          (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
          (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
        error(1, 0, "serr: wrong type: %d.%d",
              cm->cmsg_level, cm->cmsg_type);
    serr = (void *) CMSG_DATA(cm);
    if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
        error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
    if (serr->ee_errno != 0)
        error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
    hi = serr->ee_data;
    lo = serr->ee_info;
    range = hi - lo + 1;
    /* Detect notification gaps. These should not happen often, if at all.
     * Gaps can occur due to drops, reordering and retransmissions.
     */
    if (lo != next_completion)
        fprintf(stderr, "gap: %u..%u does not append to %u\n",
            lo, hi, next_completion);
    next_completion = hi + 1;
    zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
    if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
        printf("zerocopy is not valid, but why? It is rediculous!\n");
    }
    else {
        printf("zerocopy is available\n");
    }
    if (zerocopied == -1)
        zerocopied = zerocopy;
    else if (zerocopied != zerocopy) {
        fprintf(stderr, "serr: inconsistent\n");
        zerocopied = zerocopy;
    }

    completions += range;
    return true;
}


static void do_tx(int domain, int type, int protocol)
{
    struct iovec iov[3] = { {0} };
    struct msghdr msg = {0};
    int fd;
    fd = do_setup_tx(domain, type, protocol);
    struct sockaddr_in serv_addr;
    memset(&serv_addr, '0', sizeof(serv_addr));
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_port = htons(5000);
    inet_pton(AF_INET, "114.114.114.114", &serv_addr.sin_addr);
    connect(fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));

    iov[0].iov_base = payload;
    iov[0].iov_len = sizeof(payload);
    msg.msg_iovlen++;
    msg.msg_iov = &iov[0];

    // printf("sendmsg\n");
    do_sendmsg(fd, &msg, true);
    // printf("wait notification\n");
    while(!do_recv_completion(fd));
    sleep(1);

    if (close(fd))
        error(1, errno, "close");
    fprintf(stderr, "tx=%lu (%lu B) txc=%lu zc=%c\n",
        packets, bytes, completions,
        zerocopied == 1 ? 'y' : 'n');
}

static void do_test(int domain, int type, int protocol)
{
    int i;
    for (i = 0; i < TESTSIZE; i++)
        payload[i] = 'a' + (i % 26);
    do_tx(domain, type, protocol);
}

int main()
{
    do_test(AF_INET, SOCK_DGRAM, 0);
    return 0;
}

跟踪内核堆栈后,我发现skb_copy_ubufs导致结果被dev_queue_xmit_nit调用。这意味着如果正在使用网络分流器,MSG_ZEROCOPY 通知将 return SO_EE_CODE_ZEROCOPY_COPIED。就我而言,它们是 dhclient 和 lldpd.service。杀死他们后,代码消失了。