DPDK 在通过 `rte_eth_tx_burst` 发送数据包时更改 pakcets 内容
DPDK changes pakcets content when sending out packets through `rte_eth_tx_burst`
我用以下代码构造DPDK数据包:
#define PKG_GEN_COUNT 1
#define EIU_HEADER_LEN 42
#define ETHERNET_HEADER_LEN 14
#define IP_DEFTTL 64 /* from RFC 1340. */
#define IP_VERSION 0x40
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
#define MEGA_JOB_GET 0x2
#define MEGA_JOB_SET 0x3
#define MEGA_END_MARK_LEN 2
#define PROTOCOL_TYPE_LEN 2U
#define KEY_LEN 8
#define VAL_LEN 8
#define PROTOCOL_KEYLEN_LEN 2U
#define PROTOCOL_VALLEN_LEN 4U
#define PROTOCOL_HEADER_LEN 8U
struct rte_mbuf *tx_bufs_pt[PKG_GEN_COUNT];
struct rte_ether_hdr *ethh;
struct rte_ipv4_hdr *ip_hdr;
struct rte_udp_hdr *udph;
for (int i = 0; i < PKG_GEN_COUNT; i++) {
struct rte_mbuf *pkt = (struct rte_mbuf *)rte_pktmbuf_alloc(
(struct rte_mempool *)send_mbuf_pool);
if (pkt == NULL)
rte_exit(EXIT_FAILURE,
"Cannot alloc storage memory in port %" PRIu16 "\n",
port);
pkt->data_len = 1484;
pkt->nb_segs = 1; // nb_segs
pkt->pkt_len = pkt->data_len;
pkt->ol_flags = PKT_TX_IPV4; // ol_flags
pkt->vlan_tci = 0; // vlan_tci
pkt->vlan_tci_outer = 0; // vlan_tci_outer
pkt->l2_len = sizeof(struct rte_ether_hdr);
pkt->l3_len = sizeof(struct rte_ipv4_hdr);
ethh = (struct rte_ether_hdr *)rte_pktmbuf_mtod(pkt, unsigned char *);
ethh->s_addr = S_Addr;
ethh->d_addr = D_Addr;
ethh->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
ip_hdr = (struct rte_ipv4_hdr *)((unsigned char *)ethh +
sizeof(struct rte_ether_hdr));
ip_hdr->version_ihl = IP_VHL_DEF;
ip_hdr->type_of_service = 0;
ip_hdr->fragment_offset = 0;
ip_hdr->time_to_live = IP_DEFTTL;
ip_hdr->next_proto_id = IPPROTO_UDP;
ip_hdr->packet_id = 0;
ip_hdr->total_length = rte_cpu_to_be_16(pktlen);
ip_hdr->src_addr = rte_cpu_to_be_32(IP_SRC_ADDR);
ip_hdr->dst_addr = rte_cpu_to_be_32(IP_DST_ADDR);
ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
udph = (struct rte_udp_hdr *)((unsigned char *)ip_hdr +
sizeof(struct rte_ipv4_hdr));
udph->src_port = 123;
udph->dst_port = 123;
udph->dgram_len =
rte_cpu_to_be_16((uint16_t)(pktlen - sizeof(struct rte_ether_hdr) -
sizeof(struct rte_ipv4_hdr)));
tx_bufs_pt[i] = pkt;
}
char *ptr = NULL;
uint64_t set_key = 1;
while (1) {
for (i = 0; i < PKG_GEN_COUNT; i++) {
/* Load request */
ptr = (char *)((char *)rte_pktmbuf_mtod(tx_bufs_pt[i], char *) +
EIU_HEADER_LEN);
tx_pkt_load(ptr, &set_key);
}
int nb_tx = rte_eth_tx_burst(port, queue_id, tx_bufs_pt, PKG_GEN_COUNT);
}
tx_pkt_load
函数填充IP数据包的内容。
static void tx_pkt_load(char *ptr, uint64_t *start_set_key) {
uint64_t k, get_key, set_key = *start_set_key;
for (k = 0; k < number_packet_set[WORKLOAD_ID]; k++) {
*(uint16_t *)ptr = MEGA_JOB_SET;
ptr += sizeof(uint16_t);
*(uint16_t *)ptr = KEY_LEN;
ptr += sizeof(uint16_t);
*(uint32_t *)ptr = VALUE_LEN;
ptr += sizeof(uint32_t);
set_key++;
*(uint64_t *)(ptr) = set_key;
ptr += KEY_LEN;
*(uint64_t *)(ptr) = set_key + 1;
ptr += VALUE_LEN;
*(uint16_t *)ptr = MEGA_JOB_GET;
ptr += sizeof(uint16_t);
*(uint16_t *)ptr = KEY_LEN;
ptr += sizeof(uint16_t);
get_key = set_key;
*(uint64_t *)(ptr) = get_key;
ptr += KEY_LEN;
}
*start_set_key = set_key;
/* pkt ending mark */
*(uint16_t *)ptr = 0xFFFF;
}
在调用 rte_eth_tx_burst
之前,我使用 show_pkt
函数转储 IP 数据包的内容。
void show_pkt(struct rte_mbuf *pkt) {
int pktlen = pkt->data_len - EIU_HEADER_LEN;
uint8_t *ptr = (uint8_t *)((uint8_t *)rte_pktmbuf_mtod(pkt, uint8_t *) +
EIU_HEADER_LEN);
while (*(uint16_t *)ptr != 0xFFFF) {
uint32_t key_len = *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN);
if (*(uint16_t *)ptr == MEGA_JOB_GET) {
fprintf(
fp[sched_getcpu()], "GET\t%lu\n",
*(uint64_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN));
ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN + key_len;
} else if (*(uint16_t *)ptr == MEGA_JOB_SET) {
uint32_t val_len =
*(uint16_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN);
fprintf(fp[sched_getcpu()], "SET\t%lu\t%lu\n",
*(uint64_t *)(ptr + PROTOCOL_HEADER_LEN),
*(uint64_t *)(ptr + PROTOCOL_HEADER_LEN + key_len));
ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN +
PROTOCOL_VALLEN_LEN + key_len + val_len;
}
}
fprintf(fp[sched_getcpu()], "END_MARK: %04x \n", *(uint16_t *)ptr);
fprintf(fp[sched_getcpu()], "\n");
fflush(fp[sched_getcpu()]);
}
生成的文件显示了预期的数据包内容。每个 GET
都有与最后一个 SET
的第一个参数相同的参数,并且 GET
的参数应该是递增的。 SET
的第二个参数等于它的第一个参数加一,SET
的参数也应该分别是递增的。
SET 82 83
GET 82
SET 83 84
GET 83
SET 84 85
GET 84
SET 85 86
GET 85
SET 86 87
GET 86
SET 87 88
GET 87
SET 88 89
GET 88
SET 89 90
GET 89
SET 90 91
GET 90
SET 91 92
GET 91
SET 92 93
GET 92
SET 93 94
GET 93
SET 94 95
GET 94
SET 95 96
GET 95
SET 96 97
GET 96
SET 97 98
GET 97
SET 98 99
GET 98
SET 99 100
GET 99
SET 100 101
GET 100
SET 101 102
GET 101
SET 102 103
GET 102
SET 103 104
GET 103
SET 104 105
GET 104
SET 105 106
GET 105
SET 106 107
GET 106
SET 107 108
GET 107
SET 108 109
GET 108
SET 109 110
GET 109
SET 110 111
GET 110
SET 111 112
GET 111
SET 112 113
GET 112
SET 113 114
GET 113
SET 114 115
GET 114
SET 115 116
GET 115
SET 116 117
GET 116
SET 117 118
GET 117
SET 118 119
GET 118
SET 119 120
GET 119
SET 120 121
GET 120
SET 121 122
GET 121
END_MARK: ffff
但是,当我使用tcpdump
在目标机器上捕获接收到的数据包时,捕获的数据包不包含预期的内容。我还尝试使用 rte_eth_rx_burst
通过相同的函数 show_pkt
接收数据包和转储数据包的内容。它显示与以下相同的结果。太奇怪了。
SET 82 83
GET 82
SET 83 84
GET 83
SET 84 85
GET 84
SET 85 86
GET 85
SET 86 87
GET 86
SET 87 88
GET 87
SET 88 89
GET 88
SET 89 90
GET 89
SET 90 91
GET 90
SET 91 92
GET 91
SET 92 93
GET 92
SET 93 94
GET 93
SET 94 95
GET 94
SET 95 96
GET 95
SET 96 97
GET 96
SET 97 98
GET 97
SET 98 99
GET 98
SET 99 100
GET 99
SET 100 101
GET 100
SET 101 102
GET 101
SET 102 103
GET 102
SET 103 104
GET 103
SET 104 105
GET 104
SET 105 106
GET 105
SET 106 107
GET 106
SET 107 108
GET 107
SET 108 109
GET 108
SET 109 110
GET 109
SET 110 111
GET 110
SET 111 112
GET 111
SET 112 113
GET 112
SET 73 74
GET 73
SET 74 75
GET 74
SET 75 76
GET 75
SET 76 77
GET 76
SET 77 78
GET 77
SET 78 79
GET 78
SET 79 80
GET 79
SET 80 81
GET 80
SET 81 82
GET 81
END_MARK: ffff
[更新]
tcpdump
捕获的packets dumped through rte_pktmbuf_dump
contain excepted content. And the packets是奇数。
数据包的内容具有以下模式。
uint16_t (0x03)
uint16_t (0x08)
uint32_t (0x08)
uint64_t (x)
uint64_t (x + 1)
uint16_t (0x02)
uint16_t (0x8)
uint64_t (x)
x
应该在所有数据包中单调递增。 tcpdump
捕获的第二个数据包不符合这个规律。开始的 x
是 82
并且在包的末尾,x
是 81
.
[更新]
rte_pktmbuf_dump
转储的第二个数据包的一部分:
00000030: 00 00 2A 00 00 00 00 00 00 00 2B 00 00 00 00 00
...
000005C0: 08 00 51 00 00 00 00 00 00 00 FF FF
通过tcpdump
捕获的第二个数据包的一部分:
0x0020: 0800 0000 5200 0000 0000 0000 5300 0000
...
0x05b0: 0200 0800 5100 0000 0000 0000 ffff
与rte_pktmbuf_dump
转储的数据包的第0x32字节相比,tcpdump
捕获的数据包的第0x24字节应该是2a
。因为两个数据包的最后12个字节相同,这意味着两个数据包应该相同。
DPDK API 具有给定的选项和逻辑,不会在 NIC 传输之前修改数据包内容。为了确保相同,我用 tcpdump 测试了逻辑以捕获 Linux 端的数据包。
注意:由于缺少确切的代码或片段,已编辑代码以满足要求。我能够毫无问题地发送和接收数据包。
- DPDK 测试应用命令:
sudo LD_LIBRARY_PATH=[path to shared dpdk library] ./a.out --no-pci --vdev=net_tap0 -l 10 -- -p 0x1
- tcpdump 命令:
sudo tcpdump -exxxi dtap0 -Q in
- 代码:https://paste.ubuntu.com/p/zHP5q89yMz/
pktmbuf_dump:
01 02 03 04 05 06 01 02 03 04 05 06 08 00 45 00
05 A2 00 00 00 00 40 11 5B 2E 01 02 03 04 0A 0B
0C 0D 7B 00 7B 00 64 00 00 00 03 00 02 00 02 00
00 00 02 00 03 00 02 00 02 00 02 00 FF FF 00 00
tcpdump:
0x0000: 0102 0304 0506 0102 0304 0506 0800 4500
0x0010: 05a2 0000 0000 4011 5b2e 0102 0304 0a0b
0x0020: 0c0d 7b00 7b00 6400 0000 0300 0200 0200
0x0030: 0000 0200 0300 0200 0200 0200 ffff 0000
我用以下代码构造DPDK数据包:
#define PKG_GEN_COUNT 1
#define EIU_HEADER_LEN 42
#define ETHERNET_HEADER_LEN 14
#define IP_DEFTTL 64 /* from RFC 1340. */
#define IP_VERSION 0x40
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
#define MEGA_JOB_GET 0x2
#define MEGA_JOB_SET 0x3
#define MEGA_END_MARK_LEN 2
#define PROTOCOL_TYPE_LEN 2U
#define KEY_LEN 8
#define VAL_LEN 8
#define PROTOCOL_KEYLEN_LEN 2U
#define PROTOCOL_VALLEN_LEN 4U
#define PROTOCOL_HEADER_LEN 8U
struct rte_mbuf *tx_bufs_pt[PKG_GEN_COUNT];
struct rte_ether_hdr *ethh;
struct rte_ipv4_hdr *ip_hdr;
struct rte_udp_hdr *udph;
for (int i = 0; i < PKG_GEN_COUNT; i++) {
struct rte_mbuf *pkt = (struct rte_mbuf *)rte_pktmbuf_alloc(
(struct rte_mempool *)send_mbuf_pool);
if (pkt == NULL)
rte_exit(EXIT_FAILURE,
"Cannot alloc storage memory in port %" PRIu16 "\n",
port);
pkt->data_len = 1484;
pkt->nb_segs = 1; // nb_segs
pkt->pkt_len = pkt->data_len;
pkt->ol_flags = PKT_TX_IPV4; // ol_flags
pkt->vlan_tci = 0; // vlan_tci
pkt->vlan_tci_outer = 0; // vlan_tci_outer
pkt->l2_len = sizeof(struct rte_ether_hdr);
pkt->l3_len = sizeof(struct rte_ipv4_hdr);
ethh = (struct rte_ether_hdr *)rte_pktmbuf_mtod(pkt, unsigned char *);
ethh->s_addr = S_Addr;
ethh->d_addr = D_Addr;
ethh->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
ip_hdr = (struct rte_ipv4_hdr *)((unsigned char *)ethh +
sizeof(struct rte_ether_hdr));
ip_hdr->version_ihl = IP_VHL_DEF;
ip_hdr->type_of_service = 0;
ip_hdr->fragment_offset = 0;
ip_hdr->time_to_live = IP_DEFTTL;
ip_hdr->next_proto_id = IPPROTO_UDP;
ip_hdr->packet_id = 0;
ip_hdr->total_length = rte_cpu_to_be_16(pktlen);
ip_hdr->src_addr = rte_cpu_to_be_32(IP_SRC_ADDR);
ip_hdr->dst_addr = rte_cpu_to_be_32(IP_DST_ADDR);
ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);
udph = (struct rte_udp_hdr *)((unsigned char *)ip_hdr +
sizeof(struct rte_ipv4_hdr));
udph->src_port = 123;
udph->dst_port = 123;
udph->dgram_len =
rte_cpu_to_be_16((uint16_t)(pktlen - sizeof(struct rte_ether_hdr) -
sizeof(struct rte_ipv4_hdr)));
tx_bufs_pt[i] = pkt;
}
char *ptr = NULL;
uint64_t set_key = 1;
while (1) {
for (i = 0; i < PKG_GEN_COUNT; i++) {
/* Load request */
ptr = (char *)((char *)rte_pktmbuf_mtod(tx_bufs_pt[i], char *) +
EIU_HEADER_LEN);
tx_pkt_load(ptr, &set_key);
}
int nb_tx = rte_eth_tx_burst(port, queue_id, tx_bufs_pt, PKG_GEN_COUNT);
}
tx_pkt_load
函数填充IP数据包的内容。
static void tx_pkt_load(char *ptr, uint64_t *start_set_key) {
uint64_t k, get_key, set_key = *start_set_key;
for (k = 0; k < number_packet_set[WORKLOAD_ID]; k++) {
*(uint16_t *)ptr = MEGA_JOB_SET;
ptr += sizeof(uint16_t);
*(uint16_t *)ptr = KEY_LEN;
ptr += sizeof(uint16_t);
*(uint32_t *)ptr = VALUE_LEN;
ptr += sizeof(uint32_t);
set_key++;
*(uint64_t *)(ptr) = set_key;
ptr += KEY_LEN;
*(uint64_t *)(ptr) = set_key + 1;
ptr += VALUE_LEN;
*(uint16_t *)ptr = MEGA_JOB_GET;
ptr += sizeof(uint16_t);
*(uint16_t *)ptr = KEY_LEN;
ptr += sizeof(uint16_t);
get_key = set_key;
*(uint64_t *)(ptr) = get_key;
ptr += KEY_LEN;
}
*start_set_key = set_key;
/* pkt ending mark */
*(uint16_t *)ptr = 0xFFFF;
}
在调用 rte_eth_tx_burst
之前,我使用 show_pkt
函数转储 IP 数据包的内容。
void show_pkt(struct rte_mbuf *pkt) {
int pktlen = pkt->data_len - EIU_HEADER_LEN;
uint8_t *ptr = (uint8_t *)((uint8_t *)rte_pktmbuf_mtod(pkt, uint8_t *) +
EIU_HEADER_LEN);
while (*(uint16_t *)ptr != 0xFFFF) {
uint32_t key_len = *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN);
if (*(uint16_t *)ptr == MEGA_JOB_GET) {
fprintf(
fp[sched_getcpu()], "GET\t%lu\n",
*(uint64_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN));
ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN + key_len;
} else if (*(uint16_t *)ptr == MEGA_JOB_SET) {
uint32_t val_len =
*(uint16_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN);
fprintf(fp[sched_getcpu()], "SET\t%lu\t%lu\n",
*(uint64_t *)(ptr + PROTOCOL_HEADER_LEN),
*(uint64_t *)(ptr + PROTOCOL_HEADER_LEN + key_len));
ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN +
PROTOCOL_VALLEN_LEN + key_len + val_len;
}
}
fprintf(fp[sched_getcpu()], "END_MARK: %04x \n", *(uint16_t *)ptr);
fprintf(fp[sched_getcpu()], "\n");
fflush(fp[sched_getcpu()]);
}
生成的文件显示了预期的数据包内容。每个 GET
都有与最后一个 SET
的第一个参数相同的参数,并且 GET
的参数应该是递增的。 SET
的第二个参数等于它的第一个参数加一,SET
的参数也应该分别是递增的。
SET 82 83
GET 82
SET 83 84
GET 83
SET 84 85
GET 84
SET 85 86
GET 85
SET 86 87
GET 86
SET 87 88
GET 87
SET 88 89
GET 88
SET 89 90
GET 89
SET 90 91
GET 90
SET 91 92
GET 91
SET 92 93
GET 92
SET 93 94
GET 93
SET 94 95
GET 94
SET 95 96
GET 95
SET 96 97
GET 96
SET 97 98
GET 97
SET 98 99
GET 98
SET 99 100
GET 99
SET 100 101
GET 100
SET 101 102
GET 101
SET 102 103
GET 102
SET 103 104
GET 103
SET 104 105
GET 104
SET 105 106
GET 105
SET 106 107
GET 106
SET 107 108
GET 107
SET 108 109
GET 108
SET 109 110
GET 109
SET 110 111
GET 110
SET 111 112
GET 111
SET 112 113
GET 112
SET 113 114
GET 113
SET 114 115
GET 114
SET 115 116
GET 115
SET 116 117
GET 116
SET 117 118
GET 117
SET 118 119
GET 118
SET 119 120
GET 119
SET 120 121
GET 120
SET 121 122
GET 121
END_MARK: ffff
但是,当我使用tcpdump
在目标机器上捕获接收到的数据包时,捕获的数据包不包含预期的内容。我还尝试使用 rte_eth_rx_burst
通过相同的函数 show_pkt
接收数据包和转储数据包的内容。它显示与以下相同的结果。太奇怪了。
SET 82 83
GET 82
SET 83 84
GET 83
SET 84 85
GET 84
SET 85 86
GET 85
SET 86 87
GET 86
SET 87 88
GET 87
SET 88 89
GET 88
SET 89 90
GET 89
SET 90 91
GET 90
SET 91 92
GET 91
SET 92 93
GET 92
SET 93 94
GET 93
SET 94 95
GET 94
SET 95 96
GET 95
SET 96 97
GET 96
SET 97 98
GET 97
SET 98 99
GET 98
SET 99 100
GET 99
SET 100 101
GET 100
SET 101 102
GET 101
SET 102 103
GET 102
SET 103 104
GET 103
SET 104 105
GET 104
SET 105 106
GET 105
SET 106 107
GET 106
SET 107 108
GET 107
SET 108 109
GET 108
SET 109 110
GET 109
SET 110 111
GET 110
SET 111 112
GET 111
SET 112 113
GET 112
SET 73 74
GET 73
SET 74 75
GET 74
SET 75 76
GET 75
SET 76 77
GET 76
SET 77 78
GET 77
SET 78 79
GET 78
SET 79 80
GET 79
SET 80 81
GET 80
SET 81 82
GET 81
END_MARK: ffff
[更新]
tcpdump
捕获的packets dumped through rte_pktmbuf_dump
contain excepted content. And the packets是奇数。
数据包的内容具有以下模式。
uint16_t (0x03)
uint16_t (0x08)
uint32_t (0x08)
uint64_t (x)
uint64_t (x + 1)
uint16_t (0x02)
uint16_t (0x8)
uint64_t (x)
x
应该在所有数据包中单调递增。 tcpdump
捕获的第二个数据包不符合这个规律。开始的 x
是 82
并且在包的末尾,x
是 81
.
[更新]
rte_pktmbuf_dump
转储的第二个数据包的一部分:
00000030: 00 00 2A 00 00 00 00 00 00 00 2B 00 00 00 00 00
...
000005C0: 08 00 51 00 00 00 00 00 00 00 FF FF
通过tcpdump
捕获的第二个数据包的一部分:
0x0020: 0800 0000 5200 0000 0000 0000 5300 0000
...
0x05b0: 0200 0800 5100 0000 0000 0000 ffff
与rte_pktmbuf_dump
转储的数据包的第0x32字节相比,tcpdump
捕获的数据包的第0x24字节应该是2a
。因为两个数据包的最后12个字节相同,这意味着两个数据包应该相同。
DPDK API 具有给定的选项和逻辑,不会在 NIC 传输之前修改数据包内容。为了确保相同,我用 tcpdump 测试了逻辑以捕获 Linux 端的数据包。
注意:由于缺少确切的代码或片段,已编辑代码以满足要求。我能够毫无问题地发送和接收数据包。
- DPDK 测试应用命令:
sudo LD_LIBRARY_PATH=[path to shared dpdk library] ./a.out --no-pci --vdev=net_tap0 -l 10 -- -p 0x1
- tcpdump 命令:
sudo tcpdump -exxxi dtap0 -Q in
- 代码:https://paste.ubuntu.com/p/zHP5q89yMz/
pktmbuf_dump:
01 02 03 04 05 06 01 02 03 04 05 06 08 00 45 00
05 A2 00 00 00 00 40 11 5B 2E 01 02 03 04 0A 0B
0C 0D 7B 00 7B 00 64 00 00 00 03 00 02 00 02 00
00 00 02 00 03 00 02 00 02 00 02 00 FF FF 00 00
tcpdump:
0x0000: 0102 0304 0506 0102 0304 0506 0800 4500
0x0010: 05a2 0000 0000 4011 5b2e 0102 0304 0a0b
0x0020: 0c0d 7b00 7b00 6400 0000 0300 0200 0200
0x0030: 0000 0200 0300 0200 0200 0200 ffff 0000