为什么我的非阻塞原始套接字程序 运行 这么慢?

Why is my non blocking raw sockets program running so slowly?

我有一个程序使用 PF_PACKET 原始套接字将 TCP SYN 数据包发送到网络服务器列表。该程序读入一个文件,该文件在 Web 服务器的每一行上都有一个 IPv4 地址。该程序是尝试以高性能方式连接到多个服务器的开端。但是,目前该程序仅发送大约 10 packets/second。尽管该程序使用非阻塞套接字。它应该快 运行 个数量级。知道为什么它会 运行 这么慢。

我在下面包含了完整的代码清单。警告 - 代码很长。这是因为获取网关路由器的 IP 和 MAC 地址需要大量代码。好消息是您可以跳过 main 之前的所有功能,因为它们只执行获取路由器的 IP 和 MAC 地址以及本地 IP 地址的必要工作。无论如何,这是代码:

#include <sys/types.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <net/ethernet.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <netinet/tcp.h>    //Provides declarations for tcp header
#include <netinet/ip.h> //Provides declarations for ip header
#include <netinet/ether.h>
#include <ifaddrs.h>
#include <asm/types.h>
#include <linux/if_ether.h>
//#include <linux/if_arp.h>
#include <arpa/inet.h>  //htons etc
#include <time.h>
#include <linux/rtnetlink.h>
#include <sys/resource.h>

#define PROTO_ARP 0x0806
#define ETH2_HEADER_LEN 14
#define HW_TYPE 1
#define MAC_LENGTH 6
#define IPV4_LENGTH 4
#define ARP_REQUEST 0x01
#define ARP_REPLY 0x02
#define BUF_SIZE 60
#define MAX_CONNECTIONS 10000

#define debug(x...) printf(x);printf("\n");
#define info(x...) printf(x);printf("\n");
#define warn(x...) printf(x);printf("\n");
#define err(x...) printf(x);printf("\n");

static char * str_devname= NULL;
static int mode_loss     = 0;
static int c_packet_sz   = 150;
static int c_buffer_sz   = 1024*8;
static int c_buffer_nb   = 1024;
static int c_sndbuf_sz   = 0;
static int c_send_mask   = 127;
static int c_error       = 0;
static int c_mtu         = 0;
static int mode_thread   = 0;

volatile int fd_socket;
volatile int data_offset = 0;
volatile struct tpacket_hdr * ps_header_start;
volatile struct sockaddr_ll *ps_sockaddr = NULL;
volatile int shutdown_flag = 0;
int done = 0;
struct tpacket_req s_packet_req;
unsigned char buffer[BUF_SIZE];
struct arp_header *arp_resp = (struct arp_header *) (buffer + ETH2_HEADER_LEN);
char ifname[512];
char ip[512];

/* 
    96 bit (12 bytes) pseudo header needed for tcp header checksum calculation 
*/
struct pseudo_header
{
    u_int32_t source_address;
    u_int32_t dest_address;
    u_int8_t placeholder;
    u_int8_t protocol;
    u_int16_t tcp_length;
};


struct arp_header {
    unsigned short hardware_type;
    unsigned short protocol_type;
    unsigned char hardware_len;
    unsigned char protocol_len;
    unsigned short opcode;
    unsigned char sender_mac[MAC_LENGTH];
    unsigned char sender_ip[IPV4_LENGTH];
    unsigned char target_mac[MAC_LENGTH];
    unsigned char target_ip[IPV4_LENGTH];
};

int rtnl_receive(int fd, struct msghdr *msg, int flags)
{
    int len;

    do { 
        len = recvmsg(fd, msg, flags);
    } while (len < 0 && (errno == EINTR || errno == EAGAIN));

    if (len < 0) {
        perror("Netlink receive failed");
        return -errno;
    }

    if (len == 0) { 
        perror("EOF on netlink");
        return -ENODATA;
    }

    return len;
}

static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
{
    struct iovec *iov = msg->msg_iov;
    char *buf;
    int len;

    iov->iov_base = NULL;
    iov->iov_len = 0;

    len = rtnl_receive(fd, msg, MSG_PEEK | MSG_TRUNC);

    if (len < 0) {
        return len;
    }

    buf = malloc(len);

    if (!buf) {
        perror("malloc failed");
        return -ENOMEM;
    }

    iov->iov_base = buf;
    iov->iov_len = len;

    len = rtnl_receive(fd, msg, 0);

    if (len < 0) {
        free(buf);
        return len;
    }

    *answer = buf;

    return len;
}

void parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
    memset(tb, 0, sizeof(struct rtattr *) * (max + 1));

    while (RTA_OK(rta, len)) {
        if (rta->rta_type <= max) {
            tb[rta->rta_type] = rta;
        }

        rta = RTA_NEXT(rta,len);
    }
}

static inline int rtm_get_table(struct rtmsg *r, struct rtattr **tb)
{
    __u32 table = r->rtm_table;

    if (tb[RTA_TABLE]) {
        table = *(__u32 *)RTA_DATA(tb[RTA_TABLE]);
    }

    return table;
}

void print_route(struct nlmsghdr* nl_header_answer)
{
    struct rtmsg* r = NLMSG_DATA(nl_header_answer);
    int len = nl_header_answer->nlmsg_len;
    struct rtattr* tb[RTA_MAX+1];
    int table;
    char buf[256];

    len -= NLMSG_LENGTH(sizeof(*r));

    if (len < 0) {
        perror("Wrong message length");
        return;
    }
    
    parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);

    table = rtm_get_table(r, tb);

    if (r->rtm_family != AF_INET && table != RT_TABLE_MAIN) {
        return;
    }

    if (tb[RTA_DST]) {
        if ((r->rtm_dst_len != 24) && (r->rtm_dst_len != 16)) {
            return;
        }

        printf("%s/%u ", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_DST]), buf, sizeof(buf)), r->rtm_dst_len);

    } else if (r->rtm_dst_len) {
        printf("0/%u ", r->rtm_dst_len);
    } else {
        printf("default ");
    }

    if (tb[RTA_GATEWAY]) {
        printf("via %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
        strcpy(ip, inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_GATEWAY]), buf, sizeof(buf)));
    }

    if (tb[RTA_OIF]) {
        char if_nam_buf[IF_NAMESIZE];
        int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

        printf(" dev %s", if_indextoname(ifidx, if_nam_buf));
    }

    if (tb[RTA_GATEWAY] && tb[RTA_OIF]) {
        char if_nam_buf[IF_NAMESIZE];
        int ifidx = *(__u32 *)RTA_DATA(tb[RTA_OIF]);

        strcpy(ifname, if_indextoname(ifidx, if_nam_buf));
    }

    if (tb[RTA_SRC]) {
        printf("src %s", inet_ntop(r->rtm_family, RTA_DATA(tb[RTA_SRC]), buf, sizeof(buf)));
    }

    printf("\n");
}

int open_netlink()
{
    struct sockaddr_nl saddr;

    int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

    if (sock < 0) {
        perror("Failed to open netlink socket");
        return -1;
    }

    memset(&saddr, 0, sizeof(saddr));

    saddr.nl_family = AF_NETLINK;
    saddr.nl_pid = getpid();

    if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
        perror("Failed to bind to netlink socket");
        close(sock);
        return -1;
    }

    return sock;
}

int do_route_dump_requst(int sock)
{
    struct {
        struct nlmsghdr nlh;
        struct rtmsg rtm;
    } nl_request;

    nl_request.nlh.nlmsg_type = RTM_GETROUTE;
    nl_request.nlh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
    nl_request.nlh.nlmsg_len = sizeof(nl_request);
    nl_request.nlh.nlmsg_seq = time(NULL);
    nl_request.rtm.rtm_family = AF_INET;

    return send(sock, &nl_request, sizeof(nl_request), 0);
}

int get_route_dump_response(int sock)
{
    struct sockaddr_nl nladdr;
    struct iovec iov;
    struct msghdr msg = {
        .msg_name = &nladdr,
        .msg_namelen = sizeof(nladdr),
        .msg_iov = &iov,
        .msg_iovlen = 1,
    };

    char *buf;
    int dump_intr = 0;

    int status = rtnl_recvmsg(sock, &msg, &buf);

    struct nlmsghdr *h = (struct nlmsghdr *)buf;
    int msglen = status;

    printf("Main routing table IPv4\n");

    while (NLMSG_OK(h, msglen)) {
        if (h->nlmsg_flags & NLM_F_DUMP_INTR) {
            fprintf(stderr, "Dump was interrupted\n");
            free(buf);
            return -1;
        }

        if (nladdr.nl_pid != 0) {
            continue;
        }

        if (h->nlmsg_type == NLMSG_ERROR) {
            perror("netlink reported error");
            free(buf);
        }

        print_route(h);

        h = NLMSG_NEXT(h, msglen);
    }

    free(buf);

    return status;
}


/*
 * Converts struct sockaddr with an IPv4 address to network byte order uin32_t.
 * Returns 0 on success.
 */
int int_ip4(struct sockaddr *addr, uint32_t *ip)
{
    if (addr->sa_family == AF_INET) {
        struct sockaddr_in *i = (struct sockaddr_in *) addr;
        *ip = i->sin_addr.s_addr;
        return 0;
    } else {
        err("Not AF_INET");
        return 1;
    }
}

/*
 * Formats sockaddr containing IPv4 address as human readable string.
 * Returns 0 on success.
 */
int format_ip4(struct sockaddr *addr, char *out)
{
    if (addr->sa_family == AF_INET) {
        struct sockaddr_in *i = (struct sockaddr_in *) addr;
        const char *ip = inet_ntoa(i->sin_addr);
        if (!ip) {
            return -2;
        } else {
            strcpy(out, ip);
            return 0;
        }
    } else {
        return -1;
    }
}

/*
 * Writes interface IPv4 address as network byte order to ip.
 * Returns 0 on success.
 */
int get_if_ip4(int fd, const char *ifname, uint32_t *ip) {
    int err = -1;
    struct ifreq ifr;
    memset(&ifr, 0, sizeof(struct ifreq));
    if (strlen(ifname) > (IFNAMSIZ - 1)) {
        err("Too long interface name");
        goto out;
    }

    strcpy(ifr.ifr_name, ifname);
    if (ioctl(fd, SIOCGIFADDR, &ifr) == -1) {
        perror("SIOCGIFADDR");
        goto out;
    }

    if (int_ip4(&ifr.ifr_addr, ip)) {
        goto out;
    }
    err = 0;
out:
    return err;
}

/*
 * Sends an ARP who-has request to dst_ip
 * on interface ifindex, using source mac src_mac and source ip src_ip.
 */
int send_arp(int fd, int ifindex, const unsigned char *src_mac, uint32_t src_ip, uint32_t dst_ip)
{
    int err = -1;
    unsigned char buffer[BUF_SIZE];
    memset(buffer, 0, sizeof(buffer));

    struct sockaddr_ll socket_address;
    socket_address.sll_family = AF_PACKET;
    socket_address.sll_protocol = htons(ETH_P_ARP);
    socket_address.sll_ifindex = ifindex;
    socket_address.sll_hatype = htons(ARPHRD_ETHER);
    socket_address.sll_pkttype = (PACKET_BROADCAST);
    socket_address.sll_halen = MAC_LENGTH;
    socket_address.sll_addr[6] = 0x00;
    socket_address.sll_addr[7] = 0x00;

    struct ethhdr *send_req = (struct ethhdr *) buffer;
    struct arp_header *arp_req = (struct arp_header *) (buffer + ETH2_HEADER_LEN);
    int index;
    ssize_t ret, length = 0;

    //Broadcast
    memset(send_req->h_dest, 0xff, MAC_LENGTH);

    //Target MAC zero
    memset(arp_req->target_mac, 0x00, MAC_LENGTH);

    //Set source mac to our MAC address
    memcpy(send_req->h_source, src_mac, MAC_LENGTH);
    memcpy(arp_req->sender_mac, src_mac, MAC_LENGTH);
    memcpy(socket_address.sll_addr, src_mac, MAC_LENGTH);

    /* Setting protocol of the packet */
    send_req->h_proto = htons(ETH_P_ARP);

    /* Creating ARP request */
    arp_req->hardware_type = htons(HW_TYPE);
    arp_req->protocol_type = htons(ETH_P_IP);
    arp_req->hardware_len = MAC_LENGTH;
    arp_req->protocol_len = IPV4_LENGTH;
    arp_req->opcode = htons(ARP_REQUEST);

    debug("Copy IP address to arp_req");
    memcpy(arp_req->sender_ip, &src_ip, sizeof(uint32_t));
    memcpy(arp_req->target_ip, &dst_ip, sizeof(uint32_t));

    ret = sendto(fd, buffer, 42, 0, (struct sockaddr *) &socket_address, sizeof(socket_address));
    if (ret == -1) {
        perror("sendto():");
        goto out;
    }
    err = 0;
out:
    return err;
}

/*
 * Gets interface information by name:
 * IPv4
 * MAC
 * ifindex
 */
int get_if_info(const char *ifname, uint32_t *ip, char *mac, int *ifindex)
{
    debug("get_if_info for %s", ifname);
    int err = -1;
    struct ifreq ifr;
    int sd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ARP));
    if (sd <= 0) {
        perror("socket()");
        goto out;
    }
    if (strlen(ifname) > (IFNAMSIZ - 1)) {
        printf("Too long interface name, MAX=%i\n", IFNAMSIZ - 1);
        goto out;
    }

    strcpy(ifr.ifr_name, ifname);

    //Get interface index using name
    if (ioctl(sd, SIOCGIFINDEX, &ifr) == -1) {
        perror("SIOCGIFINDEX");
        goto out;
    }
    *ifindex = ifr.ifr_ifindex;
    printf("interface index is %d\n", *ifindex);

    //Get MAC address of the interface
    if (ioctl(sd, SIOCGIFHWADDR, &ifr) == -1) {
        perror("SIOCGIFINDEX");
        goto out;
    }

    //Copy mac address to output
    memcpy(mac, ifr.ifr_hwaddr.sa_data, MAC_LENGTH);

    if (get_if_ip4(sd, ifname, ip)) {
        goto out;
    }
    debug("get_if_info OK");

    err = 0;
out:
    if (sd > 0) {
        debug("Clean up temporary socket");
        close(sd);
    }
    return err;
}

/*
 * Creates a raw socket that listens for ARP traffic on specific ifindex.
 * Writes out the socket's FD.
 * Return 0 on success.
 */
int bind_arp(int ifindex, int *fd)
{
    debug("bind_arp: ifindex=%i", ifindex);
    int ret = -1;

    // Submit request for a raw socket descriptor.
    *fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ARP));
    if (*fd < 1) {
        perror("socket()");
        goto out;
    }

    debug("Binding to ifindex %i", ifindex);
    struct sockaddr_ll sll;
    memset(&sll, 0, sizeof(struct sockaddr_ll));
    sll.sll_family = AF_PACKET;
    sll.sll_ifindex = ifindex;
    if (bind(*fd, (struct sockaddr*) &sll, sizeof(struct sockaddr_ll)) < 0) {
        perror("bind");
        goto out;
    }

    ret = 0;
out:
    if (ret && *fd > 0) {
        debug("Cleanup socket");
        close(*fd);
    }
    return ret;
}

/*
 * Reads a single ARP reply from fd.
 * Return 0 on success.
 */
int read_arp(int fd)
{
    debug("read_arp");
    int ret = -1;
    ssize_t length = recvfrom(fd, buffer, BUF_SIZE, 0, NULL, NULL);
    int index;
    if (length == -1) {
        perror("recvfrom()");
        goto out;
    }
    struct ethhdr *rcv_resp = (struct ethhdr *) buffer;
    if (ntohs(rcv_resp->h_proto) != PROTO_ARP) {
        debug("Not an ARP packet");
        goto out;
    }
    if (ntohs(arp_resp->opcode) != ARP_REPLY) {
        debug("Not an ARP reply");
        goto out;
    }
    debug("received ARP len=%ld", length);
    struct in_addr sender_a;
    memset(&sender_a, 0, sizeof(struct in_addr));
    memcpy(&sender_a.s_addr, arp_resp->sender_ip, sizeof(uint32_t));
    debug("Sender IP: %s", inet_ntoa(sender_a));

    debug("Sender MAC: %02X:%02X:%02X:%02X:%02X:%02X",
          arp_resp->sender_mac[0],
          arp_resp->sender_mac[1],
          arp_resp->sender_mac[2],
          arp_resp->sender_mac[3],
          arp_resp->sender_mac[4],
          arp_resp->sender_mac[5]);

    ret = 0;

out:
    return ret;
}

/*
 *
 * Sample code that sends an ARP who-has request on
 * interface <ifname> to IPv4 address <ip>.
 * Returns 0 on success.
 */
int test_arping(const char *ifname, const char *ip) {
    int ret = -1;
    uint32_t dst = inet_addr(ip);
    if (dst == 0 || dst == 0xffffffff) {
        printf("Invalid source IP\n");
        return 1;
    }

    int src;
    int ifindex;
    char mac[MAC_LENGTH];
    if (get_if_info(ifname, &src, mac, &ifindex)) {
        err("get_if_info failed, interface %s not found or no IP set?", ifname);
        goto out;
    }
    int arp_fd;
    if (bind_arp(ifindex, &arp_fd)) {
        err("Failed to bind_arp()");
        goto out;
    }

    if (send_arp(arp_fd, ifindex, mac, src, dst)) {
        err("Failed to send_arp");
        goto out;
    }

    while(1) {
        int r = read_arp(arp_fd);
        if (r == 0) {
            info("Got reply, break out");
            break;
        }
    }

    ret = 0;
out:
    if (arp_fd) {
        close(arp_fd);
        arp_fd = 0;
    }
    return ret;
}

unsigned short checksum2(const char *buf, unsigned size)
{
    unsigned long long sum = 0;
    const unsigned long long *b = (unsigned long long *) buf;

    unsigned t1, t2;
    unsigned short t3, t4;

    /* Main loop - 8 bytes at a time */
    while (size >= sizeof(unsigned long long))
    {
        unsigned long long s = *b++;
        sum += s;
        if (sum < s) sum++;
        size -= 8;
    }

    /* Handle tail less than 8-bytes long */
    buf = (const char *) b;
    if (size & 4)
    {
        unsigned s = *(unsigned *)buf;
        sum += s;
        if (sum < s) sum++;
        buf += 4;
    }

    if (size & 2)
    {
        unsigned short s = *(unsigned short *) buf;
        sum += s;
        if (sum < s) sum++;
        buf += 2;
    }

    if (size)
    {
        unsigned char s = *(unsigned char *) buf;
        sum += s;
        if (sum < s) sum++;
    }

    /* Fold down to 16 bits */
    t1 = sum;
    t2 = sum >> 32;
    t1 += t2;
    if (t1 < t2) t1++;
    t3 = t1;
    t4 = t1 >> 16;
    t3 += t4;
    if (t3 < t4) t3++;

    return ~t3;
}

int main( int argc, char ** argv )
{
    uint32_t size;
    size_t len;
    struct sockaddr_ll my_addr, peer_addr;
    int i_ifindex;
    int ec;
    struct ifreq s_ifr; /* points to one interface returned from ioctl */
    int tmp;
    FILE * fp;
    char server[254];
    int count = 0;
    int first_time = 1;
    int z;
    int first_mmap = 1;
    
    #define HWADDR_len 6
    #define IP_len 4
    int s,s2,i;
    struct ifreq ifr,ifr2;
    int ret = -1;
    struct rlimit lim;
    
    if (argc != 2) {
        printf("Usage: %s <INPUT_FILE>\n", argv[0]);
        return 1;
    }

    getrlimit(RLIMIT_NOFILE, &lim);
    printf("Soft: %d Hard: %d\n", (int)lim.rlim_cur, (int)lim.rlim_max);
    lim.rlim_cur = lim.rlim_max;
    
    
    if (setrlimit(RLIMIT_NOFILE, &lim) == -1) {
        printf("rlimit failed\n");
        return -1;
    }
    getrlimit(RLIMIT_NOFILE, &lim);
    printf("New Soft: %d New Hard: %d\n", (int)lim.rlim_cur, (int)lim.rlim_max);
    
    int nl_sock = open_netlink();

    if (do_route_dump_requst(nl_sock) < 0) {
        perror("Failed to perfom request");
        close(nl_sock);
        return -1;
    }

    get_route_dump_response(nl_sock);

    close (nl_sock);
    
    test_arping(ifname, ip);

    
    s = socket(AF_INET, SOCK_DGRAM, 0);
    s2 = socket(AF_INET, SOCK_DGRAM, 0);
    strcpy(ifr.ifr_name, ifname);
    strcpy(ifr2.ifr_name, ifname);
    ioctl(s, SIOCGIFHWADDR, &ifr);
    ioctl(s2, SIOCGIFADDR, &ifr2);
    struct sockaddr_in* ipaddr = (struct sockaddr_in*)&ifr2.ifr_addr;
    close(s);

    fp = fopen(argv[1], "r");
    if (!fp)
        exit(EXIT_FAILURE);


    while (!done)
    {   
        fd_socket = socket(PF_PACKET, SOCK_RAW|SOCK_NONBLOCK, htons(ETH_P_ALL));
        if(fd_socket == -1)
        {
            perror("socket");
            return EXIT_FAILURE;
        }
    
        /* clear structure */
        memset(&my_addr, 0, sizeof(struct sockaddr_ll));
        my_addr.sll_family = PF_PACKET;
        my_addr.sll_protocol = htons(ETH_P_ALL);
    
        str_devname = ifname;
        //strcpy (str_devname, ifname);
        
        /* initialize interface struct */
        strncpy (s_ifr.ifr_name, str_devname, sizeof(s_ifr.ifr_name));
    
        /* Get the broad cast address */
        ec = ioctl(fd_socket, SIOCGIFINDEX, &s_ifr);
        if(ec == -1)
        {
            perror("iotcl");
            return EXIT_FAILURE;
        }
    
        /* update with interface index */
        i_ifindex = s_ifr.ifr_ifindex;
    
        s_ifr.ifr_mtu = 7200;
        /* update the mtu through ioctl */
        ec = ioctl(fd_socket, SIOCSIFMTU, &s_ifr);
        if(ec == -1)
        {
            perror("iotcl");
            return EXIT_FAILURE;
        }
    
        /* set sockaddr info */
        memset(&my_addr, 0, sizeof(struct sockaddr_ll));
        my_addr.sll_family = AF_PACKET;
        my_addr.sll_protocol = ETH_P_ALL;
        my_addr.sll_ifindex = i_ifindex;
    
        /* bind port */
        if (bind(fd_socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll)) == -1)
        {
            perror("bind");
            return EXIT_FAILURE;
        }
    
        /* prepare Tx ring request */
        s_packet_req.tp_block_size = c_buffer_sz;
        s_packet_req.tp_frame_size = c_buffer_sz;
        s_packet_req.tp_block_nr = c_buffer_nb;
        s_packet_req.tp_frame_nr = c_buffer_nb;
    
        /* calculate memory to mmap in the kernel */
        size = s_packet_req.tp_block_size * s_packet_req.tp_block_nr;
    
        /* set packet loss option */
        tmp = mode_loss;
        if (setsockopt(fd_socket, SOL_PACKET, PACKET_LOSS, (char *)&tmp, sizeof(tmp))<0)
        {
            perror("setsockopt: PACKET_LOSS");
            return EXIT_FAILURE;
        }
        
        /* send TX ring request */
        if (setsockopt(fd_socket, SOL_PACKET, PACKET_TX_RING, (char *)&s_packet_req, sizeof(s_packet_req))<0)
        {
            perror("setsockopt: PACKET_TX_RING");
            return EXIT_FAILURE;
        }
    
        /* change send buffer size */
        if(c_sndbuf_sz) {
            printf("send buff size = %d\n", c_sndbuf_sz);
            if (setsockopt(fd_socket, SOL_SOCKET, SO_SNDBUF, &c_sndbuf_sz, sizeof(c_sndbuf_sz))< 0)
            {
                perror("getsockopt: SO_SNDBUF");
                return EXIT_FAILURE;
            }
        }
    
        /* get data offset */
        data_offset = TPACKET_HDRLEN - sizeof(struct sockaddr_ll);
    
        /* mmap Tx ring buffers memory */
        ps_header_start = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd_socket, 0);
        if (ps_header_start == (void*)-1)
        {
            perror("mmap");
            return EXIT_FAILURE;
        }
    
    
        int i,j;
        int i_index = 0;
        char * data;
        int first_loop = 1;
        struct tpacket_hdr * ps_header;
        int ec_send = 0;
 
        int i_index_start = i_index;
        
        ps_header = ((struct tpacket_hdr *)((void *)ps_header_start + (c_buffer_sz*i_index)));
        data = ((void*) ps_header) + data_offset;
        //Datagram to represent the packet
        char datagram[4096] , source_ip[32] , *pseudogram;
    
        //zero out the packet buffer
        memset (datagram, 0, 4096);
    
        //Ethernet header
        struct ether_header *eh = (struct ether_header *) datagram;
        
        //IP header
        struct iphdr *iph = (struct iphdr *) (datagram + sizeof (struct ether_header));
    
        //TCP header
        struct tcphdr *tcph = (struct tcphdr *) (datagram + sizeof (struct ether_header) + sizeof (struct ip));
        struct sockaddr_in sin;
        struct pseudo_header psh;
    
        //some address resolution
        strcpy(source_ip , inet_ntoa(ipaddr->sin_addr));
        sin.sin_family = AF_INET;
        sin.sin_port = htons(80);
        if (fscanf(fp, "%253s", server) == 1)
            sin.sin_addr.s_addr = inet_addr (server);   
        else
        {
            done = 1;
            break;
        }
                        
        //Fill in the Ethernet Header
        eh->ether_dhost[0] = arp_resp->sender_mac[0];
        eh->ether_dhost[1] = arp_resp->sender_mac[1];
        eh->ether_dhost[2] = arp_resp->sender_mac[2];
        eh->ether_dhost[3] = arp_resp->sender_mac[3];
        eh->ether_dhost[4] = arp_resp->sender_mac[4];
        eh->ether_dhost[5] = arp_resp->sender_mac[5];

        memcpy(eh->ether_shost, ifr.ifr_hwaddr.sa_data, HWADDR_len);
        eh->ether_type = htons(0x0800);
                        
        //Fill in the IP Header
        iph->ihl = 5;
        iph->version = 4;
        iph->tos = 0;
        iph->tot_len = htons(sizeof (struct iphdr) + sizeof (struct tcphdr));
        iph->id = htons (54321);    //Id of this packet
        iph->frag_off = 0;
        iph->ttl = 255;
        iph->protocol = IPPROTO_TCP;
        iph->check = 0;     //Set to 0 before calculating checksum
        iph->saddr = inet_addr ( source_ip );
        iph->daddr = sin.sin_addr.s_addr;
    
        //Ip checksum
        iph->check = checksum2 (datagram + sizeof (struct ether_header), sizeof (struct iphdr));
    
        //TCP Header
        tcph->source = htons (1234);
        tcph->dest = htons (80);
        tcph->seq = 0;
        tcph->ack_seq = 0;
        tcph->doff = 5; //tcp header size
        tcph->fin=0;
        tcph->syn=1;
        tcph->rst=0;
        tcph->psh=0;
        tcph->ack=0;
        tcph->urg=0;
        tcph->window = htons (5840);    // maximum allowed window size 
        tcph->check = 0;    //leave checksum 0 now, filled later by pseudo header
        tcph->urg_ptr = 0;

        //Now the TCP checksum
        psh.source_address = inet_addr( source_ip );
        psh.dest_address = sin.sin_addr.s_addr;
        psh.placeholder = 0;
        psh.protocol = IPPROTO_TCP;
        psh.tcp_length = htons(sizeof(struct tcphdr));
    
        int psize = sizeof(struct pseudo_header) + sizeof(struct tcphdr);
        pseudogram = malloc(psize);
    
        memcpy(pseudogram , (char*) &psh , sizeof (struct pseudo_header));
        memcpy(pseudogram + sizeof(struct pseudo_header) , tcph , sizeof(struct tcphdr));
    
        tcph->check = checksum2(pseudogram , psize);
                        
        memcpy(data, datagram, (sizeof(struct ether_header) + sizeof(struct iphdr) + sizeof(struct tcphdr)));
        free(pseudogram);
        len = sizeof(struct ether_header) + sizeof(struct iphdr) + sizeof(struct tcphdr);
        
        i_index ++;
        if(i_index >= c_buffer_nb)
        {
            i_index = 0;
            first_loop = 0;
        }
 
        /* update packet len */
        //ps_header->tp_len = c_packet_sz;
        ps_header->tp_len = len;
        /* set header flag to USER (trigs xmit)*/
        ps_header->tp_status = TP_STATUS_SEND_REQUEST;
        
        //int ec_send;
        static int total=0;
        //int blocking = 1;
        
        /* send all buffers with TP_STATUS_SEND_REQUEST */
        /* Wait end of transfer */
        //ec_send = sendto(fd_socket,NULL,0,(blocking? 0 : MSG_DONTWAIT),(struct sockaddr *) ps_sockaddr,sizeof(struct sockaddr_ll));
        ec_send = sendto(fd_socket,NULL,len,MSG_DONTWAIT,(struct sockaddr *) ps_sockaddr,sizeof(struct sockaddr_ll));
        
        if(ec_send < 0) {
            perror("sendto");
        }
        else if ( ec_send == 0 ) {
            /* nothing to do => schedule : useful if no SMP */
            printf("Sleeping\n");
            usleep(0);
        }
        else {
            total += ec_send/(len);
            printf("send %d packets (+%d bytes)\n",total, ec_send);
            fflush(0);
        }
        //ps_header_start = mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd_socket, 0);
        if (munmap(ps_header_start, size) == -1)
        {
            perror("munmap");
            exit(EXIT_FAILURE);
        }       
    
        close(fd_socket);
    }
    return 1;
}

这里是 strace -c 发送的超过 5,000 个数据包的输出:

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 48.11    3.962165         395     10012           setsockopt
 16.69    1.374748         274      5014           mmap
 14.85    1.222565         244      5007           munmap
 10.91    0.898695         179      5016           close
  3.15    0.259055          25     10022           ioctl
  2.04    0.167613          33      5016           socket
  1.70    0.139623          27      5008           sendto
  1.41    0.116430          23      5025           write
  1.14    0.093826          18      5008           bind
  0.01    0.000505          26        19           read
  0.00    0.000000           0         4           mprotect
  0.00    0.000000           0         3           brk
  0.00    0.000000           0         4           pread64
  0.00    0.000000           0         3         1 access
  0.00    0.000000           0         1           getpid
  0.00    0.000000           0         1           recvfrom
  0.00    0.000000           0         2           recvmsg
  0.00    0.000000           0         1           execve
  0.00    0.000000           0         2         1 arch_prctl
  0.00    0.000000           0         1           set_tid_address
  0.00    0.000000           0         3           openat
  0.00    0.000000           0         4           newfstatat
  0.00    0.000000           0         1           set_robust_list
  0.00    0.000000           0         4           prlimit64
  0.00    0.000000           0         1           getrandom
------ ----------- ----------- --------- --------- ----------------
100.00    8.235225         149     55182         2 total

如果我正确地遵循了代码,您将为每个不需要重做的 IP 地址重做大量工作。每次通过主循环你都是:

  • 正在创建一个新的数据包套接字
  • 绑定它
  • 正在设置 tx 数据包环形缓冲区
  • 映射它
  • 发送单个数据包
  • 取消映射
  • 关闭套接字

系统为一个数据包做的工作量很大。

你应该只在一开始创建一个数据包套接字,设置一次tx缓冲区和mmap,并在程序完成之前保持打开状态。您可以在没有 closing/re-opening.

的情况下通过接口发送任意数量的数据包

这就是为什么您的热门用户是 setsockoptmmapunmap 等。所有这些操作在内核中都很繁重。

此外,PACKET_TX_RING 的要点是您可以设置一个大缓冲区并在缓冲区内创建一个接一个的数据包 而无需 进行 send 系统调用每个数据包。通过使用数据包 header 的 tp_status 字段,您告诉内核该帧已准备好发送。然后,您将环形缓冲区内的指针前进到下一个可用插槽并构建另一个数据包。当你没有更多的数据包要构建时(或者你已经在缓冲区中填充了可用的 space [即环绕到你最旧的 still-in-flight 帧]),你可以制作一个 send/sendto调用以告诉内核去查看您的缓冲区并(开始)发送所有这些数据包。

然后您可以开始构建更多数据包(通过 tp_status 字段小心确保它们未被内核使用)。

就是说,如果这是我正在做的项目,我会简化很多 - 至少在第一阶段:创建一个数据包套接字,将其绑定到接口,一次构建一个数据包,然后使用send 每帧一次(即不打扰 PACKET_TX_RING)。如果(且仅当)性能要求非常严格以至于需要更快地发送时,我会费心设置和使用环形缓冲区。我怀疑你会需要那个。如果没有多余的 setsockoptmmap 调用,这应该会快很多。

最后,non-blocking 套接字仅在您在等待期间有其他事情要做时才有用。在这种情况下,如果您将套接字设置为 non-blocking 并且由于调用 会阻塞 而无法发送数据包,则 send 调用将失败如果您对此不采取任何措施(enqueue 某个地方的数据包,然后稍后重试,比如说),数据包将会丢失。在此程序中,我看不到使用 non-blocking 套接字有任何好处。如果套接字阻塞,那是因为设备传输 queue 已满。在那之后,您继续生成要发送的数据包就没有意义了,您也将无法发送这些数据包。在 queue 耗尽之前,在该点进行阻塞要简单得多。