DPDK 在通过 `rte_eth_tx_burst` 发送数据包时更改 pakcets 内容

DPDK changes pakcets content when sending out packets through `rte_eth_tx_burst`

我用以下代码构造DPDK数据包:

#define PKG_GEN_COUNT 1
#define EIU_HEADER_LEN 42
#define ETHERNET_HEADER_LEN 14
#define IP_DEFTTL 64 /* from RFC 1340. */
#define IP_VERSION 0x40
#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)

#define MEGA_JOB_GET 0x2
#define MEGA_JOB_SET 0x3
#define MEGA_END_MARK_LEN 2
#define PROTOCOL_TYPE_LEN 2U
#define KEY_LEN 8
#define VAL_LEN 8
#define PROTOCOL_KEYLEN_LEN 2U
#define PROTOCOL_VALLEN_LEN 4U
#define PROTOCOL_HEADER_LEN 8U

struct rte_mbuf *tx_bufs_pt[PKG_GEN_COUNT];
struct rte_ether_hdr *ethh;
struct rte_ipv4_hdr *ip_hdr;
struct rte_udp_hdr *udph;
for (int i = 0; i < PKG_GEN_COUNT; i++) {
    struct rte_mbuf *pkt = (struct rte_mbuf *)rte_pktmbuf_alloc(
        (struct rte_mempool *)send_mbuf_pool);
    if (pkt == NULL)
        rte_exit(EXIT_FAILURE,
                    "Cannot alloc storage memory in  port %" PRIu16 "\n",
                    port);
    pkt->data_len = 1484;
    pkt->nb_segs = 1;  // nb_segs
    pkt->pkt_len = pkt->data_len;
    pkt->ol_flags = PKT_TX_IPV4;  // ol_flags
    pkt->vlan_tci = 0;            // vlan_tci
    pkt->vlan_tci_outer = 0;      // vlan_tci_outer
    pkt->l2_len = sizeof(struct rte_ether_hdr);
    pkt->l3_len = sizeof(struct rte_ipv4_hdr);

    ethh = (struct rte_ether_hdr *)rte_pktmbuf_mtod(pkt, unsigned char *);
    ethh->s_addr = S_Addr;
    ethh->d_addr = D_Addr;
    ethh->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);

    ip_hdr = (struct rte_ipv4_hdr *)((unsigned char *)ethh +
                                        sizeof(struct rte_ether_hdr));
    ip_hdr->version_ihl = IP_VHL_DEF;
    ip_hdr->type_of_service = 0;
    ip_hdr->fragment_offset = 0;
    ip_hdr->time_to_live = IP_DEFTTL;
    ip_hdr->next_proto_id = IPPROTO_UDP;
    ip_hdr->packet_id = 0;
    ip_hdr->total_length = rte_cpu_to_be_16(pktlen);
    ip_hdr->src_addr = rte_cpu_to_be_32(IP_SRC_ADDR);
    ip_hdr->dst_addr = rte_cpu_to_be_32(IP_DST_ADDR);
    ip_hdr->hdr_checksum = rte_ipv4_cksum(ip_hdr);

    udph = (struct rte_udp_hdr *)((unsigned char *)ip_hdr +
                                    sizeof(struct rte_ipv4_hdr));
    udph->src_port = 123;
    udph->dst_port = 123;
    udph->dgram_len =
        rte_cpu_to_be_16((uint16_t)(pktlen - sizeof(struct rte_ether_hdr) -
                                    sizeof(struct rte_ipv4_hdr)));
    tx_bufs_pt[i] = pkt;
}
char *ptr = NULL;
uint64_t set_key = 1;
while (1) {
    for (i = 0; i < PKG_GEN_COUNT; i++) {
        /* Load request */
        ptr = (char *)((char *)rte_pktmbuf_mtod(tx_bufs_pt[i], char *) +
                        EIU_HEADER_LEN);
        tx_pkt_load(ptr, &set_key);
    }
    int nb_tx = rte_eth_tx_burst(port, queue_id, tx_bufs_pt, PKG_GEN_COUNT);
}

tx_pkt_load函数填充IP数据包的内容。

static void tx_pkt_load(char *ptr, uint64_t *start_set_key) {
    uint64_t k, get_key, set_key = *start_set_key;
    for (k = 0; k < number_packet_set[WORKLOAD_ID]; k++) {
        *(uint16_t *)ptr = MEGA_JOB_SET;
        ptr += sizeof(uint16_t);
        *(uint16_t *)ptr = KEY_LEN;
        ptr += sizeof(uint16_t);
        *(uint32_t *)ptr = VALUE_LEN;
        ptr += sizeof(uint32_t);

        set_key++;

        *(uint64_t *)(ptr) = set_key;
        ptr += KEY_LEN;
        *(uint64_t *)(ptr) = set_key + 1;
        ptr += VALUE_LEN;

        *(uint16_t *)ptr = MEGA_JOB_GET;
        ptr += sizeof(uint16_t);
        *(uint16_t *)ptr = KEY_LEN;
        ptr += sizeof(uint16_t);

        get_key = set_key;

        *(uint64_t *)(ptr) = get_key;
        ptr += KEY_LEN;
    }
    *start_set_key = set_key;
    /* pkt ending mark */
    *(uint16_t *)ptr = 0xFFFF;
}

在调用 rte_eth_tx_burst 之前,我使用 show_pkt 函数转储 IP 数据包的内容。

void show_pkt(struct rte_mbuf *pkt) {
    int pktlen = pkt->data_len - EIU_HEADER_LEN;
    uint8_t *ptr = (uint8_t *)((uint8_t *)rte_pktmbuf_mtod(pkt, uint8_t *) +
                               EIU_HEADER_LEN);
    while (*(uint16_t *)ptr != 0xFFFF) {
        uint32_t key_len = *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN);
        if (*(uint16_t *)ptr == MEGA_JOB_GET) {
            fprintf(
                fp[sched_getcpu()], "GET\t%lu\n",
                *(uint64_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN));
            ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN + key_len;
        } else if (*(uint16_t *)ptr == MEGA_JOB_SET) {
            uint32_t val_len =
                *(uint16_t *)(ptr + PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN);
            fprintf(fp[sched_getcpu()], "SET\t%lu\t%lu\n",
                    *(uint64_t *)(ptr + PROTOCOL_HEADER_LEN),
                    *(uint64_t *)(ptr + PROTOCOL_HEADER_LEN + key_len));
            ptr += PROTOCOL_TYPE_LEN + PROTOCOL_KEYLEN_LEN +
                   PROTOCOL_VALLEN_LEN + key_len + val_len;
        }
    }
    fprintf(fp[sched_getcpu()], "END_MARK: %04x \n", *(uint16_t *)ptr);
    fprintf(fp[sched_getcpu()], "\n");
    fflush(fp[sched_getcpu()]);
}

生成的文件显示了预期的数据包内容。每个 GET 都有与最后一个 SET 的第一个参数相同的参数,并且 GET 的参数应该是递增的。 SET的第二个参数等于它的第一个参数加一,SET的参数也应该分别是递增的。

SET     82      83
GET     82
SET     83      84
GET     83
SET     84      85
GET     84
SET     85      86
GET     85
SET     86      87
GET     86
SET     87      88
GET     87
SET     88      89
GET     88
SET     89      90
GET     89
SET     90      91
GET     90
SET     91      92
GET     91
SET     92      93
GET     92
SET     93      94
GET     93
SET     94      95
GET     94
SET     95      96
GET     95
SET     96      97
GET     96
SET     97      98
GET     97
SET     98      99
GET     98
SET     99      100
GET     99
SET     100     101
GET     100
SET     101     102
GET     101
SET     102     103
GET     102
SET     103     104
GET     103
SET     104     105
GET     104
SET     105     106
GET     105
SET     106     107
GET     106
SET     107     108
GET     107
SET     108     109
GET     108
SET     109     110
GET     109
SET     110     111
GET     110
SET     111     112
GET     111
SET     112     113
GET     112
SET     113     114
GET     113
SET     114     115
GET     114
SET     115     116
GET     115
SET     116     117
GET     116
SET     117     118
GET     117
SET     118     119
GET     118
SET     119     120
GET     119
SET     120     121
GET     120
SET     121     122
GET     121
END_MARK: ffff

但是,当我使用tcpdump在目标机器上捕获接收到的数据包时,捕获的数据包不包含预期的内容。我还尝试使用 rte_eth_rx_burst 通过相同的函数 show_pkt 接收数据包和转储数据包的内容。它显示与以下相同的结果。太奇怪了。

SET     82      83
GET     82
SET     83      84
GET     83
SET     84      85
GET     84
SET     85      86
GET     85
SET     86      87
GET     86
SET     87      88
GET     87
SET     88      89
GET     88
SET     89      90
GET     89
SET     90      91
GET     90
SET     91      92
GET     91
SET     92      93
GET     92
SET     93      94
GET     93
SET     94      95
GET     94
SET     95      96
GET     95
SET     96      97
GET     96
SET     97      98
GET     97
SET     98      99
GET     98
SET     99      100
GET     99
SET     100     101
GET     100
SET     101     102
GET     101
SET     102     103
GET     102
SET     103     104
GET     103
SET     104     105
GET     104
SET     105     106
GET     105
SET     106     107
GET     106
SET     107     108
GET     107
SET     108     109
GET     108
SET     109     110
GET     109
SET     110     111
GET     110
SET     111     112
GET     111
SET     112     113
GET     112
SET     73      74
GET     73
SET     74      75
GET     74
SET     75      76
GET     75
SET     76      77
GET     76
SET     77      78
GET     77
SET     78      79
GET     78
SET     79      80
GET     79
SET     80      81
GET     80
SET     81      82
GET     81
END_MARK: ffff

[更新] tcpdump捕获的packets dumped through rte_pktmbuf_dump contain excepted content. And the packets是奇数。 数据包的内容具有以下模式。

uint16_t (0x03)
uint16_t (0x08)
uint32_t (0x08)
uint64_t (x)
uint64_t (x + 1)
uint16_t (0x02)
uint16_t (0x8)
uint64_t (x)

x 应该在所有数据包中单调递增。 tcpdump捕获的第二个数据包不符合这个规律。开始的 x82 并且在包的末尾,x81.

[更新]

rte_pktmbuf_dump转储的第二个数据包的一部分:

00000030: 00 00 2A 00 00 00 00 00 00 00 2B 00 00 00 00 00
...
000005C0: 08 00 51 00 00 00 00 00 00 00 FF FF

通过tcpdump捕获的第二个数据包的一部分:

0x0020:  0800 0000 5200 0000 0000 0000 5300 0000
...
0x05b0:  0200 0800 5100 0000 0000 0000 ffff 

rte_pktmbuf_dump转储的数据包的第0x32字节相比,tcpdump捕获的数据包的第0x24字节应该是2a。因为两个数据包的最后12个字节相同,这意味着两个数据包应该相同。

DPDK API 具有给定的选项和逻辑,不会在 NIC 传输之前修改数据包内容。为了确保相同,我用 tcpdump 测试了逻辑以捕获 Linux 端的数据包。

注意:由于缺少确切的代码或片段,已编辑代码以满足要求。我能够毫无问题地发送和接收数据包。

  1. DPDK 测试应用命令: sudo LD_LIBRARY_PATH=[path to shared dpdk library] ./a.out --no-pci --vdev=net_tap0 -l 10 -- -p 0x1
  2. tcpdump 命令:sudo tcpdump -exxxi dtap0 -Q in
  3. 代码:https://paste.ubuntu.com/p/zHP5q89yMz/

pktmbuf_dump:

01 02 03 04 05 06 01 02 03 04 05 06 08 00 45 00
05 A2 00 00 00 00 40 11 5B 2E 01 02 03 04 0A 0B
0C 0D 7B 00 7B 00 64 00 00 00 03 00 02 00 02 00
00 00 02 00 03 00 02 00 02 00 02 00 FF FF 00 00

tcpdump:

        0x0000:  0102 0304 0506 0102 0304 0506 0800 4500
        0x0010:  05a2 0000 0000 4011 5b2e 0102 0304 0a0b
        0x0020:  0c0d 7b00 7b00 6400 0000 0300 0200 0200
        0x0030:  0000 0200 0300 0200 0200 0200 ffff 0000