启用 RSS 的 DPDK 中只有第一个 q_ipackets 统计 return 非零数字

Only the first q_ipackets statistics return non-zero number in DPDK with RSS enabled

平台是Ubuntu 20.04 with intel 82599 NIC,使用DPDK 20.08。下面是主要的演示代码。即使每个虚拟线程都可以获取带有 rte_eth_rx_burst 的数据包,但只有 q_ipackets[0] 可以 return 非零数,这等于 ipackets。是网卡不支持还是配置错误导致的?

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <sys/types.h>
#include <string.h>
#include <sys/queue.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <unistd.h>
#include <signal.h>
#include <pthread.h>


#include <rte_common.h>
#include <rte_byteorder.h>
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_eal.h>
#include <rte_launch.h>
#include <rte_atomic.h>
#include <rte_cycles.h>
#include <rte_prefetch.h>
#include <rte_lcore.h>
#include <rte_per_lcore.h>
#include <rte_branch_prediction.h>
#include <rte_interrupts.h>
#include <rte_random.h>
#include <rte_debug.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_udp.h>
#include <rte_string_fns.h>
#include <rte_acl.h>
#include <rte_ring.h>
#include <rte_ethdev.h>
#include <rte_hash.h>
#include <rte_rwlock.h>
#include <rte_flow.h>
#include <rte_ring_elem.h>
#include <rte_bpf.h>
#include <rte_member.h>

#include "include/data_structure.h"
#include "include/utils.h"

#define DEFAULT_RX_PORT 0
#define DEFAULT_TX_PORT 1
#define RTE_TEST_RX_DESC_DEFAULT 4096
#define RTE_TEST_TX_DESC_DEFAULT 4096
#define NB_MBUF 1048576
#define MEMPOOL_CACHE_SIZE 256
#define BURST_SIZE 32

static struct rte_eth_conf port_conf = {
    .rxmode = {
        .mq_mode    = ETH_MQ_RX_RSS,
        .max_rx_pkt_len = RTE_ETHER_MAX_LEN,
    },
    .rx_adv_conf = {
        .rss_conf = {
            .rss_key = NULL,
            .rss_hf = ETH_RSS_IPV4 | ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP,
        },
    },
    .txmode = {
        .mq_mode = ETH_MQ_TX_NONE,
    },
};


static int
dummy_thread( void *args ){
    struct thread_args *t_args = (struct thread_args*)args;
    struct rte_mbuf *bufs[BURST_SIZE];
    uint32_t nb_deq = 0;
    
    printf("Entering thread...\n");
    printf("Port %u queue %u\n", t_args->portid, t_args->queueid);
    while(true){
        nb_deq = rte_eth_rx_burst(t_args->portid, t_args->queueid, bufs, BURST_SIZE);
        
        if(unlikely(nb_deq == 0))
            continue;

        do{
            rte_pktmbuf_free(bufs[--nb_deq]);
        }while (nb_deq > 0);

    }
}



int 
main(int argc, char **argv)
{
    int ret;
    ret = rte_eal_init(argc, argv);
    if(ret < 0)
        rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
    argc -= ret;
    argv += ret;

    unsigned nb_ports = rte_eth_dev_count_avail();
    uint32_t nb_lcores = rte_lcore_count();
    uint16_t portid;
    uint8_t nb_rx_queue, nb_tx_queue;
    struct rte_eth_dev_info dev_info;
    struct rte_eth_conf local_port_conf = port_conf;
    portid = 0;
    ret = rte_eth_dev_info_get(portid, &dev_info);

    nb_rx_queue = 5;
    nb_tx_queue = 0;
    ret = rte_eth_dev_configure(portid, nb_rx_queue, nb_tx_queue, &local_port_conf);
    if (ret < 0)
        rte_exit(EXIT_FAILURE,
            "Cannot configure device: err=%d, port=%d\n",
            ret, portid);
    else
        printf("Configuring finished...\n");
    
    uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
    uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
    ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
    if (ret < 0)
        rte_exit(EXIT_FAILURE,
            "rte_eth_dev_adjust_nb_rx_tx_desc: err=%d, port=%d\n",
            ret, portid);
    
    struct rte_mempool *pktmbuf_pool;
    pktmbuf_pool = rte_pktmbuf_pool_create("global_pktmbuf_pool", NB_MBUF,
        MEMPOOL_CACHE_SIZE, 0,
        RTE_MBUF_DEFAULT_BUF_SIZE,
        rte_socket_id());
    if (pktmbuf_pool == NULL)
        rte_exit(EXIT_FAILURE,
                    "Cannot init mbuf pool on socket %d\n",
                    rte_socket_id());
    else
        printf("Allocated Succeeded...\n");

    for (uint8_t queue = 0; queue < nb_rx_queue; queue ++){
        struct rte_eth_rxconf rxq_conf;

        ret = rte_eth_dev_info_get(portid, &dev_info);
        if (ret != 0)
            rte_exit(EXIT_FAILURE,
                "Error during getting device (port %u) info: %s\n",
                portid, strerror(-ret));

        rxq_conf = dev_info.default_rxconf;
        rxq_conf.offloads = port_conf.rxmode.offloads;
        ret = rte_eth_rx_queue_setup(portid, queue, nb_rxd, rte_socket_id(), &rxq_conf, pktmbuf_pool);
        if (ret < 0)
            rte_exit(EXIT_FAILURE,
                "rte_eth_rx_queue_setup: err=%d,"
                "port=%d\n", ret, portid);
    }

    struct thread_args *args = NULL;
    for (uint8_t queue = 0; queue < nb_rx_queue; queue ++){
        args = (struct thread_args*)malloc(sizeof(struct thread_args));
        args->logic_no = queue;
        args->portid = portid;
        args->queueid = queue;
        // rte_eal_remote_launch(thread, (void*)args, queue + 1);
        rte_eal_remote_launch(dummy_thread, (void*)args, queue + 1);
    }

    ret = rte_eth_promiscuous_enable(portid);
    if (ret != 0)
        rte_exit(EXIT_FAILURE,
            "rte_eth_promiscuous_enable: err=%s, port=%u\n",
            rte_strerror(-ret), portid);
    else
        printf("Enabled Promiscuous Mode...\n");
    
    if(ret == 0){
        printf("Succeeded...\n");
    }else{
        printf("failed...\n");
    }

    ret = rte_eth_dev_start(portid);
    if (ret < 0)
        rte_exit(EXIT_FAILURE,
            "rte_eth_dev_start: err=%d, port=%d\n",
            ret, portid);
    else
        printf("Starting port succeeded...\n");
    
    struct rte_eth_stats stats;
    while(true){
        sleep(2);
        rte_eth_stats_get(portid, &stats);
        printf("***************************************\n");
        for( unsigned long i = 0; i < nb_rx_queue; i++ ){
            printf("hardware port 0 queue %lu queue length: %d\n", i, rte_eth_rx_queue_count(0, i));
        }
        for( unsigned long i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++ ){
            printf("hardware port 0 queue %lu queue received: %lu\n", i, stats.q_ipackets[i]);
        }
        printf("%lu\n", stats.ipackets);
        printf("%lu\n", stats.imissed);
    }

    rte_eal_mp_wait_lcore();
}

ps: 使用的流量由MoonGen生成,只包含udp包和l3信息as

啜饮:10.1.0.10 倾角:10.2.0.10 - 10.3.0.10 运动:1234 运动:319

ethtool 统计数据是

NIC statistics:
     rx_packets: 17029206
     tx_packets: 18
     rx_bytes: 5108761800
     tx_bytes: 1476
     rx_pkts_nic: 17029206
     tx_pkts_nic: 18
     rx_bytes_nic: 5178531776
     tx_bytes_nic: 1548
     lsc_int: 11
     tx_busy: 0
     non_eop_descs: 0
     rx_errors: 0
     tx_errors: 0
     rx_dropped: 0
     tx_dropped: 0
     multicast: 0
     broadcast: 0
     rx_no_buffer_count: 0
     collisions: 0
     rx_over_errors: 0
     rx_crc_errors: 0
     rx_frame_errors: 0
     hw_rsc_aggregated: 0
     hw_rsc_flushed: 0
     fdir_match: 0
     fdir_miss: 17045796
     fdir_overflow: 0
     rx_fifo_errors: 0
     rx_missed_errors: 5438
     tx_aborted_errors: 0
     tx_carrier_errors: 0
     tx_fifo_errors: 0
     tx_heartbeat_errors: 0
     tx_timeout_count: 0
     tx_restart_queue: 0
     rx_length_errors: 0
     rx_long_length_errors: 0
     rx_short_length_errors: 0
     tx_flow_control_xon: 2
     rx_flow_control_xon: 0
     tx_flow_control_xoff: 4
     rx_flow_control_xoff: 0
     rx_csum_offload_errors: 17028934
     alloc_rx_page: 32287
     alloc_rx_page_failed: 0
     alloc_rx_buff_failed: 0
     rx_no_dma_resources: 0
     os2bmc_rx_by_bmc: 0
     os2bmc_tx_by_bmc: 0
     os2bmc_tx_by_host: 0
     os2bmc_rx_by_host: 0
     tx_hwtstamp_timeouts: 0
     tx_hwtstamp_skipped: 0
     rx_hwtstamp_cleared: 0
     tx_ipsec: 0
     rx_ipsec: 0
     fcoe_bad_fccrc: 0
     rx_fcoe_dropped: 0
     rx_fcoe_packets: 0
     rx_fcoe_dwords: 0
     fcoe_noddp: 0
     fcoe_noddp_ext_buff: 0
     tx_fcoe_packets: 0
     tx_fcoe_dwords: 0
     tx_queue_0_packets: 0
     tx_queue_0_bytes: 0
     tx_queue_1_packets: 12
     tx_queue_1_bytes: 936
     tx_queue_2_packets: 0
     tx_queue_2_bytes: 0
     tx_queue_3_packets: 0
     tx_queue_3_bytes: 0
     tx_queue_4_packets: 0
     tx_queue_4_bytes: 0
     tx_queue_5_packets: 0
     tx_queue_5_bytes: 0
     tx_queue_6_packets: 0
     tx_queue_6_bytes: 0
     tx_queue_7_packets: 0
     tx_queue_7_bytes: 0
     tx_queue_8_packets: 0
     tx_queue_8_bytes: 0
     tx_queue_9_packets: 0
     tx_queue_9_bytes: 0
     tx_queue_10_packets: 0
     tx_queue_10_bytes: 0
     tx_queue_11_packets: 0
     tx_queue_11_bytes: 0
     tx_queue_12_packets: 0
     tx_queue_12_bytes: 0
     tx_queue_13_packets: 0
     tx_queue_13_bytes: 0
     tx_queue_14_packets: 0
     tx_queue_14_bytes: 0
     tx_queue_15_packets: 0
     tx_queue_15_bytes: 0
     tx_queue_16_packets: 0
     tx_queue_16_bytes: 0
     tx_queue_17_packets: 0
     tx_queue_17_bytes: 0
     tx_queue_18_packets: 0
     tx_queue_18_bytes: 0
     tx_queue_19_packets: 0
     tx_queue_19_bytes: 0
     tx_queue_20_packets: 0
     tx_queue_20_bytes: 0
     tx_queue_21_packets: 0
     tx_queue_21_bytes: 0
     tx_queue_22_packets: 0
     tx_queue_22_bytes: 0
     tx_queue_23_packets: 0
     tx_queue_23_bytes: 0
     tx_queue_24_packets: 0
     tx_queue_24_bytes: 0
     tx_queue_25_packets: 0
     tx_queue_25_bytes: 0
     tx_queue_26_packets: 0
     tx_queue_26_bytes: 0
     tx_queue_27_packets: 0
     tx_queue_27_bytes: 0
     tx_queue_28_packets: 0
     tx_queue_28_bytes: 0
     tx_queue_29_packets: 0
     tx_queue_29_bytes: 0
     tx_queue_30_packets: 0
     tx_queue_30_bytes: 0
     tx_queue_31_packets: 0
     tx_queue_31_bytes: 0
     tx_queue_32_packets: 0
     tx_queue_32_bytes: 0
     tx_queue_33_packets: 0
     tx_queue_33_bytes: 0
     tx_queue_34_packets: 0
     tx_queue_34_bytes: 0
     tx_queue_35_packets: 0
     tx_queue_35_bytes: 0
     tx_queue_36_packets: 0
     tx_queue_36_bytes: 0
     tx_queue_37_packets: 0
     tx_queue_37_bytes: 0
     tx_queue_38_packets: 0
     tx_queue_38_bytes: 0
     tx_queue_39_packets: 0
     tx_queue_39_bytes: 0
     tx_queue_40_packets: 0
     tx_queue_40_bytes: 0
     tx_queue_41_packets: 0
     tx_queue_41_bytes: 0
     tx_queue_42_packets: 0
     tx_queue_42_bytes: 0
     tx_queue_43_packets: 0
     tx_queue_43_bytes: 0
     tx_queue_44_packets: 0
     tx_queue_44_bytes: 0
     tx_queue_45_packets: 0
     tx_queue_45_bytes: 0
     tx_queue_46_packets: 0
     tx_queue_46_bytes: 0
     tx_queue_47_packets: 0
     tx_queue_47_bytes: 0
     tx_queue_48_packets: 0
     tx_queue_48_bytes: 0
     tx_queue_49_packets: 0
     tx_queue_49_bytes: 0
     tx_queue_50_packets: 0
     tx_queue_50_bytes: 0
     tx_queue_51_packets: 0
     tx_queue_51_bytes: 0
     tx_queue_52_packets: 0
     tx_queue_52_bytes: 0
     tx_queue_53_packets: 0
     tx_queue_53_bytes: 0
     tx_queue_54_packets: 0
     tx_queue_54_bytes: 0
     tx_queue_55_packets: 0
     tx_queue_55_bytes: 0
     tx_queue_56_packets: 0
     tx_queue_56_bytes: 0
     tx_queue_57_packets: 0
     tx_queue_57_bytes: 0
     tx_queue_58_packets: 0
     tx_queue_58_bytes: 0
     tx_queue_59_packets: 0
     tx_queue_59_bytes: 0
     tx_queue_60_packets: 0
     tx_queue_60_bytes: 0
     tx_queue_61_packets: 6
     tx_queue_61_bytes: 540
     tx_queue_62_packets: 0
     tx_queue_62_bytes: 0
     tx_queue_63_packets: 0
     tx_queue_63_bytes: 0
     rx_queue_0_packets: 1064316
     rx_queue_0_bytes: 319294800
     rx_queue_1_packets: 1064318
     rx_queue_1_bytes: 319295400
     rx_queue_2_packets: 1064328
     rx_queue_2_bytes: 319298400
     rx_queue_3_packets: 1064329
     rx_queue_3_bytes: 319298700
     rx_queue_4_packets: 1064326
     rx_queue_4_bytes: 319297800
     rx_queue_5_packets: 1064328
     rx_queue_5_bytes: 319298400
     rx_queue_6_packets: 1064330
     rx_queue_6_bytes: 319299000
     rx_queue_7_packets: 1064327
     rx_queue_7_bytes: 319298100
     rx_queue_8_packets: 1064316
     rx_queue_8_bytes: 319294800
     rx_queue_9_packets: 1064317
     rx_queue_9_bytes: 319295100
     rx_queue_10_packets: 1064329
     rx_queue_10_bytes: 319298700
     rx_queue_11_packets: 1064331
     rx_queue_11_bytes: 319299300
     rx_queue_12_packets: 1064325
     rx_queue_12_bytes: 319297500
     rx_queue_13_packets: 1064325
     rx_queue_13_bytes: 319297500
     rx_queue_14_packets: 1064331
     rx_queue_14_bytes: 319299300
     rx_queue_15_packets: 1064330
     rx_queue_15_bytes: 319299000
     rx_queue_16_packets: 0
     rx_queue_16_bytes: 0
     rx_queue_17_packets: 0
     rx_queue_17_bytes: 0
     rx_queue_18_packets: 0
     rx_queue_18_bytes: 0
     rx_queue_19_packets: 0
     rx_queue_19_bytes: 0
     rx_queue_20_packets: 0
     rx_queue_20_bytes: 0
     rx_queue_21_packets: 0
     rx_queue_21_bytes: 0
     rx_queue_22_packets: 0
     rx_queue_22_bytes: 0
     rx_queue_23_packets: 0
     rx_queue_23_bytes: 0
     rx_queue_24_packets: 0
     rx_queue_24_bytes: 0
     rx_queue_25_packets: 0
     rx_queue_25_bytes: 0
     rx_queue_26_packets: 0
     rx_queue_26_bytes: 0
     rx_queue_27_packets: 0
     rx_queue_27_bytes: 0
     rx_queue_28_packets: 0
     rx_queue_28_bytes: 0
     rx_queue_29_packets: 0
     rx_queue_29_bytes: 0
     rx_queue_30_packets: 0
     rx_queue_30_bytes: 0
     rx_queue_31_packets: 0
     rx_queue_31_bytes: 0
     rx_queue_32_packets: 0
     rx_queue_32_bytes: 0
     rx_queue_33_packets: 0
     rx_queue_33_bytes: 0
     rx_queue_34_packets: 0
     rx_queue_34_bytes: 0
     rx_queue_35_packets: 0
     rx_queue_35_bytes: 0
     rx_queue_36_packets: 0
     rx_queue_36_bytes: 0
     rx_queue_37_packets: 0
     rx_queue_37_bytes: 0
     rx_queue_38_packets: 0
     rx_queue_38_bytes: 0
     rx_queue_39_packets: 0
     rx_queue_39_bytes: 0
     rx_queue_40_packets: 0
     rx_queue_40_bytes: 0
     rx_queue_41_packets: 0
     rx_queue_41_bytes: 0
     rx_queue_42_packets: 0
     rx_queue_42_bytes: 0
     rx_queue_43_packets: 0
     rx_queue_43_bytes: 0
     rx_queue_44_packets: 0
     rx_queue_44_bytes: 0
     rx_queue_45_packets: 0
     rx_queue_45_bytes: 0
     rx_queue_46_packets: 0
     rx_queue_46_bytes: 0
     rx_queue_47_packets: 0
     rx_queue_47_bytes: 0
     rx_queue_48_packets: 0
     rx_queue_48_bytes: 0
     rx_queue_49_packets: 0
     rx_queue_49_bytes: 0
     rx_queue_50_packets: 0
     rx_queue_50_bytes: 0
     rx_queue_51_packets: 0
     rx_queue_51_bytes: 0
     rx_queue_52_packets: 0
     rx_queue_52_bytes: 0
     rx_queue_53_packets: 0
     rx_queue_53_bytes: 0
     rx_queue_54_packets: 0
     rx_queue_54_bytes: 0
     rx_queue_55_packets: 0
     rx_queue_55_bytes: 0
     rx_queue_56_packets: 0
     rx_queue_56_bytes: 0
     rx_queue_57_packets: 0
     rx_queue_57_bytes: 0
     rx_queue_58_packets: 0
     rx_queue_58_bytes: 0
     rx_queue_59_packets: 0
     rx_queue_59_bytes: 0
     rx_queue_60_packets: 0
     rx_queue_60_bytes: 0
     rx_queue_61_packets: 0
     rx_queue_61_bytes: 0
     rx_queue_62_packets: 0
     rx_queue_62_bytes: 0
     rx_queue_63_packets: 0
     rx_queue_63_bytes: 0
     tx_pb_0_pxon: 0
     tx_pb_0_pxoff: 0
     tx_pb_1_pxon: 0
     tx_pb_1_pxoff: 0
     tx_pb_2_pxon: 0
     tx_pb_2_pxoff: 0
     tx_pb_3_pxon: 0
     tx_pb_3_pxoff: 0
     tx_pb_4_pxon: 0
     tx_pb_4_pxoff: 0
     tx_pb_5_pxon: 0
     tx_pb_5_pxoff: 0
     tx_pb_6_pxon: 0
     tx_pb_6_pxoff: 0
     tx_pb_7_pxon: 0
     tx_pb_7_pxoff: 0
     rx_pb_0_pxon: 0
     rx_pb_0_pxoff: 0
     rx_pb_1_pxon: 0
     rx_pb_1_pxoff: 0
     rx_pb_2_pxon: 0
     rx_pb_2_pxoff: 0
     rx_pb_3_pxon: 0
     rx_pb_3_pxoff: 0
     rx_pb_4_pxon: 0
     rx_pb_4_pxoff: 0
     rx_pb_5_pxon: 0
     rx_pb_5_pxoff: 0
     rx_pb_6_pxon: 0
     rx_pb_6_pxoff: 0
     rx_pb_7_pxon: 0
     rx_pb_7_pxoff: 0

[基于评论更新和代码片段共享的 EDIT-1]

DPDK NIC 82599 NIC 支持多个 RX 队列接收和多个 TX 队列发送。有两种类型的统计数据 PMD based rte_eth_stats_getHW register based rte_eth_xstats_get.

当使用 DPDK 统计数据时 rte_eth_stats_get,PMD 将为每个 rte_eth_rx_burst 更新 rx 统计数据。因此,需要在轮询循环中定期调用 rte_eth_rx_burst 或 运行 它。

解决方案:在dev_configure之前修改启用RSS的代码允许流量分布在多个RX队列中。将当前 rte_eth_dev_configure 替换为以下代码

struct rte_eth_conf local_port_conf = port_conf;
local_port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads;
if (local_port_conf.rx_adv_conf.rss_conf.rss_hf != port_conf_default.rx_adv_conf.rss_conf.rss_hf)
{
        printf("Port %u modified RSS hash function based on hardware support,"
                "requested:%#"PRIx64" configured:%#"PRIx64"\n",
                port,
                port_conf.rx_adv_conf.rss_conf.rss_hf,
                local_port_conf.rx_adv_conf.rss_conf.rss_hf);
}
retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &local_port_conf);

注意:我已经在英特尔 FVL 和 CVL 上验证了相同的内容,请求通过评论进行更新。