使用 MSG_ZEROCOPY 发送 udp 消息时,什么会导致返回 SO_EE_CODE_ZEROCOPY_COPIED?
What can cause returning SO_EE_CODE_ZEROCOPY_COPIED when sending udp messages with MSG_ZEROCOPY?
环境
Linux版本:Linux 5.4.0-4-amd64 Debian 5.4.19-1 x86_64 GNU/Linux
网卡分散-聚集:
scatter-gather: on
tx-scatter-gather: on
tx-scatter-gather-fraglist: off [fixed]
输出
sock_extended_err 代码设置为 SO_EE_CODE_ZEROCOPY_COPIED。根据Linux Kernel Doc,当设备不支持scatter-gather时I/O,会返回此代码,但你可以看到我的网卡支持并启用了scatter-gatherI/O.
链接的文档是为了显示 SO_EE_CODE_ZEROCOPY_COPIED 和 linux supports udp msg_zerocopy 版本 >= 5.0
的官方解释
那么,关于其他原因有什么想法吗?还是我的代码有误?
代码
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/rds.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY 5
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
#define TESTSIZE 16*1024
static char payload[TESTSIZE];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
static void do_setsockopt(int fd, int level, int optname, int val)
{
if (optname == SO_ZEROCOPY) {
printf("set so_zerocopy\n");
}
if (setsockopt(fd, level, optname, &val, sizeof(val)))
error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
}
static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
{
int ret, len, flags;
size_t i;
len = 0;
for (i = 0; i < msg->msg_iovlen; i++)
len += msg->msg_iov[i].iov_len;
flags = MSG_DONTWAIT;
if (do_zerocopy) {
printf("set msg_zerocopy\n");
flags |= MSG_ZEROCOPY;
}
ret = sendmsg(fd, msg, flags);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "send");
if (len) {
packets++;
bytes += ret;
if (do_zerocopy && ret)
expected_completions++;
}
return true;
}
static int do_setup_tx(int domain, int type, int protocol)
{
int fd;
fd = socket(domain, type, protocol);
if (fd == -1)
error(1, errno, "socket t");
do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
return fd;
}
static bool do_recv_completion(int fd)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
struct cmsghdr *cm;
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "recvmsg notification");
if (msg.msg_flags & MSG_CTRUNC)
error(1, errno, "recvmsg notification: truncated");
cm = CMSG_FIRSTHDR(&msg);
if (!cm)
error(1, 0, "cmsg: no cmsg");
if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
(cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
error(1, 0, "serr: wrong type: %d.%d",
cm->cmsg_level, cm->cmsg_type);
serr = (void *) CMSG_DATA(cm);
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
hi = serr->ee_data;
lo = serr->ee_info;
range = hi - lo + 1;
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
if (lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
printf("zerocopy is not valid, but why? It is rediculous!\n");
}
else {
printf("zerocopy is available\n");
}
if (zerocopied == -1)
zerocopied = zerocopy;
else if (zerocopied != zerocopy) {
fprintf(stderr, "serr: inconsistent\n");
zerocopied = zerocopy;
}
completions += range;
return true;
}
static void do_tx(int domain, int type, int protocol)
{
struct iovec iov[3] = { {0} };
struct msghdr msg = {0};
int fd;
fd = do_setup_tx(domain, type, protocol);
struct sockaddr_in serv_addr;
memset(&serv_addr, '0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(5000);
inet_pton(AF_INET, "114.114.114.114", &serv_addr.sin_addr);
connect(fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
iov[0].iov_base = payload;
iov[0].iov_len = sizeof(payload);
msg.msg_iovlen++;
msg.msg_iov = &iov[0];
// printf("sendmsg\n");
do_sendmsg(fd, &msg, true);
// printf("wait notification\n");
while(!do_recv_completion(fd));
sleep(1);
if (close(fd))
error(1, errno, "close");
fprintf(stderr, "tx=%lu (%lu B) txc=%lu zc=%c\n",
packets, bytes, completions,
zerocopied == 1 ? 'y' : 'n');
}
static void do_test(int domain, int type, int protocol)
{
int i;
for (i = 0; i < TESTSIZE; i++)
payload[i] = 'a' + (i % 26);
do_tx(domain, type, protocol);
}
int main()
{
do_test(AF_INET, SOCK_DGRAM, 0);
return 0;
}
跟踪内核堆栈后,我发现skb_copy_ubufs导致结果被dev_queue_xmit_nit调用。这意味着如果正在使用网络分流器,MSG_ZEROCOPY 通知将 return SO_EE_CODE_ZEROCOPY_COPIED。就我而言,它们是 dhclient 和 lldpd.service。杀死他们后,代码消失了。
环境
Linux版本:Linux 5.4.0-4-amd64 Debian 5.4.19-1 x86_64 GNU/Linux
网卡分散-聚集:
scatter-gather: on
tx-scatter-gather: on
tx-scatter-gather-fraglist: off [fixed]
输出
sock_extended_err 代码设置为 SO_EE_CODE_ZEROCOPY_COPIED。根据Linux Kernel Doc,当设备不支持scatter-gather时I/O,会返回此代码,但你可以看到我的网卡支持并启用了scatter-gatherI/O.
链接的文档是为了显示 SO_EE_CODE_ZEROCOPY_COPIED 和 linux supports udp msg_zerocopy 版本 >= 5.0
的官方解释那么,关于其他原因有什么想法吗?还是我的代码有误?
代码
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
#include <limits.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/udp.h>
#include <poll.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <linux/rds.h>
#ifndef SO_EE_ORIGIN_ZEROCOPY
#define SO_EE_ORIGIN_ZEROCOPY 5
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
#define TESTSIZE 16*1024
static char payload[TESTSIZE];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
static void do_setsockopt(int fd, int level, int optname, int val)
{
if (optname == SO_ZEROCOPY) {
printf("set so_zerocopy\n");
}
if (setsockopt(fd, level, optname, &val, sizeof(val)))
error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
}
static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
{
int ret, len, flags;
size_t i;
len = 0;
for (i = 0; i < msg->msg_iovlen; i++)
len += msg->msg_iov[i].iov_len;
flags = MSG_DONTWAIT;
if (do_zerocopy) {
printf("set msg_zerocopy\n");
flags |= MSG_ZEROCOPY;
}
ret = sendmsg(fd, msg, flags);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "send");
if (len) {
packets++;
bytes += ret;
if (do_zerocopy && ret)
expected_completions++;
}
return true;
}
static int do_setup_tx(int domain, int type, int protocol)
{
int fd;
fd = socket(domain, type, protocol);
if (fd == -1)
error(1, errno, "socket t");
do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
return fd;
}
static bool do_recv_completion(int fd)
{
struct sock_extended_err *serr;
struct msghdr msg = {};
struct cmsghdr *cm;
uint32_t hi, lo, range;
int ret, zerocopy;
char control[100];
msg.msg_control = control;
msg.msg_controllen = sizeof(control);
ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
return false;
if (ret == -1)
error(1, errno, "recvmsg notification");
if (msg.msg_flags & MSG_CTRUNC)
error(1, errno, "recvmsg notification: truncated");
cm = CMSG_FIRSTHDR(&msg);
if (!cm)
error(1, 0, "cmsg: no cmsg");
if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
(cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
(cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
error(1, 0, "serr: wrong type: %d.%d",
cm->cmsg_level, cm->cmsg_type);
serr = (void *) CMSG_DATA(cm);
if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
if (serr->ee_errno != 0)
error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
hi = serr->ee_data;
lo = serr->ee_info;
range = hi - lo + 1;
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
if (lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
if (serr->ee_code == SO_EE_CODE_ZEROCOPY_COPIED) {
printf("zerocopy is not valid, but why? It is rediculous!\n");
}
else {
printf("zerocopy is available\n");
}
if (zerocopied == -1)
zerocopied = zerocopy;
else if (zerocopied != zerocopy) {
fprintf(stderr, "serr: inconsistent\n");
zerocopied = zerocopy;
}
completions += range;
return true;
}
static void do_tx(int domain, int type, int protocol)
{
struct iovec iov[3] = { {0} };
struct msghdr msg = {0};
int fd;
fd = do_setup_tx(domain, type, protocol);
struct sockaddr_in serv_addr;
memset(&serv_addr, '0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(5000);
inet_pton(AF_INET, "114.114.114.114", &serv_addr.sin_addr);
connect(fd, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
iov[0].iov_base = payload;
iov[0].iov_len = sizeof(payload);
msg.msg_iovlen++;
msg.msg_iov = &iov[0];
// printf("sendmsg\n");
do_sendmsg(fd, &msg, true);
// printf("wait notification\n");
while(!do_recv_completion(fd));
sleep(1);
if (close(fd))
error(1, errno, "close");
fprintf(stderr, "tx=%lu (%lu B) txc=%lu zc=%c\n",
packets, bytes, completions,
zerocopied == 1 ? 'y' : 'n');
}
static void do_test(int domain, int type, int protocol)
{
int i;
for (i = 0; i < TESTSIZE; i++)
payload[i] = 'a' + (i % 26);
do_tx(domain, type, protocol);
}
int main()
{
do_test(AF_INET, SOCK_DGRAM, 0);
return 0;
}
跟踪内核堆栈后,我发现skb_copy_ubufs导致结果被dev_queue_xmit_nit调用。这意味着如果正在使用网络分流器,MSG_ZEROCOPY 通知将 return SO_EE_CODE_ZEROCOPY_COPIED。就我而言,它们是 dhclient 和 lldpd.service。杀死他们后,代码消失了。