为什么我在使用 nl_recvmsgs 时收到 Netlink ERRORMSG?

Why do I receive a Netlink ERRORMSG when using nl_recvmsgs?

我正在尝试使用 nl_recvmsgs 作为阻塞函数来接收来自内核模块的 Netlink 消息。 在我的示例中,客户端向内核发送一条消息,然后它调用 nl_recvmsgs_report()(等于 nl_recvmsgs)。然后内核模块发送 return 消息。该消息已被客户端成功接收。

现在我希望客户端在以后监听更多消息并再次调用 nl_recvmsgs_report()。内核没有发送任何第二条消息。但是客户端以某种方式收到了 ERRORMSG。这会导致客户端发生 SEGFAULT,因为他试图将消息解析为 ERRORMSG。

如果我检查消息类型是否为 2 并跳过消息解析,则 nl_recvmsgs_report() 的第三次调用完全没问题。

有人知道为什么客户端会收到这个 ERRORMSG 吗?


看看我的github branch。直接调用make, sudo insmod nlk.ko, ./nlclient这里我只复制了相关部分

客户代码

nlclient.c main()发送接收部分:

  // setup netlink socket
  sk = nl_socket_alloc();
  nl_socket_disable_seq_check(sk);  // disable sequence number check
  genl_connect(sk);

  int id = genl_ctrl_resolve(sk, DEMO_FAMILY_NAME);

  struct nl_msg * msg;


  // create a messgae
  msg = nlmsg_alloc();
  genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, id, 0,    // hdrlen
                        0,  // flags
                        DEMO_CMD,   // numeric command identifier
                        DEMO_VERSION    // interface version
                       );

  nla_put_string(msg, DEMO_ATTR1_STRING, "hola");
  nla_put_u16(msg, DEMO_ATTR2_UINT16, 0xf1);

  // send it
  nl_send_auto(sk, msg);

  // handle reply
  struct nl_cb * cb = NULL;
  cb = nl_cb_alloc(NL_CB_CUSTOM);

  //nl_cb_set_all(cb, NL_CB_DEBUG, NULL, NULL);
  nl_cb_set_all(cb, NL_CB_CUSTOM, cb_handler, &cbarg);
  nl_cb_err(cb, NL_CB_DEBUG, NULL, NULL);

  int nrecv = nl_recvmsgs_report(sk, cb);

  printf("cbarg %d nrecv %d\n", cbarg, nrecv);

  printf("First test if it blocks here for incoming messages:\n");
  nrecv = nl_recvmsgs_report(sk, cb);

  printf("cbarg %d nrecv %d\n", cbarg, nrecv);

  printf("Second test if it blocks here for incoming messages:\n");
  nrecv = nl_recvmsgs_report(sk, cb);

  printf("cbarg %d nrecv %d\n", cbarg, nrecv);

nlclient.c cb_handler() 解析头和消息

  struct nlmsghdr * hdr = nlmsg_hdr(msg);

  struct genlmsghdr * gnlh = nlmsg_data(hdr);

  nl_msg_dump(msg, stderr);

  if (hdr->nlmsg_type == 2) {
    printf("hdr->nlmsg_type is ERROR. Skipping message parsing!\n");    
  } else {

    int valid =
      genlmsg_validate(hdr, 0, DEMO_ATTR_MAX, demo_gnl_policy);
    printf("valid %d %s\n", valid, valid ? "ERROR" : "OK");

    // one way
    struct nlattr * attrs[DEMO_ATTR_MAX + 1];

    if (genlmsg_parse(hdr, 0, attrs, DEMO_ATTR_MAX, demo_gnl_policy) < 0)
      {
        printf("genlsmg_parse ERROR\n");
      }

    else
      {
        printf("genlsmg_parse OK\n");

        printf("attr1 %s\n", nla_get_string(attrs[DEMO_ATTR1_STRING]));
        printf("attr2 %x\n", nla_get_u16(attrs[DEMO_ATTR2_UINT16]));
        struct attr_custom * cp = (struct attr_custom *) nla_data(attrs[DEMO_ATTR3_CUSTOM]);
        printf("attr3 %d %ld %f %lf\n", cp->a, cp->b, cp->c,cp->d);

      }
    }
  // another way
  printf("gnlh->cmd %d\n", gnlh->cmd);  //--- DEMO_CMD_ECHO

  int remaining = genlmsg_attrlen(gnlh, 0);
  struct nlattr * attr = genlmsg_attrdata(gnlh, 0);

  while (nla_ok(attr, remaining))
    {
      printf("remaining %d\n", remaining);
      printf("attr @ %p\n", attr); // nla_get_string(attr)
      attr = nla_next(attr, &remaining);
    }

内核代码

nlkernel.c demo_cmd() 发送到客户端部分:

/* send message back */
    /* allocate some memory, since the size is not yet known use NLMSG_GOODSIZE */
    skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
    if (skb == NULL) {
        goto out;
    }

    /* create the message */
    msg_head =
        genlmsg_put(skb, 0, info->snd_seq + 1, &demo_gnl_family, 0,
            DEMO_CMD);

    if (msg_head == NULL) {
        rc = -ENOMEM;
        goto out;
    }

    rc |= nla_put_string(skb, DEMO_ATTR1_STRING,"world");
    rc |= nla_put_u16(skb, DEMO_ATTR2_UINT16, 0x1f);
    cp.a = 1;
    cp.b = 2;
    cp.c = 3.0;
    cp.d = 4.0;
    rc |= nla_put(skb, DEMO_ATTR3_CUSTOM, sizeof(struct attr_custom), &cp);

    if (rc != 0) {
        goto out;
    }

    /* finalize the message */
    genlmsg_end(skb, msg_head);

    /* send the message back */
    rc = genlmsg_unicast(&init_net, skb, info->snd_portid);

    if (rc != 0) {
        goto out;
    }

    return 0;

输出

nlclient 控制台输出

./nlclient 
--------------------------   BEGIN NETLINK MESSAGE ---------------------------
  [NETLINK HEADER] 16 octets
    .nlmsg_len = 76
    .type = 27 <0x1b>
    .flags = 0 <>
    .seq = 1458476257
    .port = 0
  [GENERIC NETLINK HEADER] 4 octets
    .cmd = 1
    .version = 1
    .unused = 0
  [PAYLOAD] 56 octets
    0a 00 01 00 77 6f 72 6c 64 00 00 00 06 00 02 00 ....world.......
    1f 00 00 00 24 00 03 00 01 00 00 00 ff ff ff ff ....$...........
    02 00 00 00 00 00 00 00 00 00 40 40 04 88 ff ff ..........@@....
    00 00 00 00 00 00 10 40                         .......@
---------------------------  END NETLINK MESSAGE   ---------------------------
valid 0 OK
genlsmg_parse OK
attr1 world
attr2 1f
attr3 1 2 3.000000 4.000000
gnlh->cmd 1
remaining 56
attr @ 0x10df344
remaining 44
attr @ 0x10df350
remaining 36
attr @ 0x10df358
cbarg 123 nrecv 1
First test if it blocks here for incoming messages:
--------------------------   BEGIN NETLINK MESSAGE ---------------------------
  [NETLINK HEADER] 16 octets
    .nlmsg_len = 36
    .type = 2 <ERROR>
    .flags = 0 <>
    .seq = 1458476256
    .port = -1061151077
  [ERRORMSG] 20 octets
    .error = 0 "Success"
  [ORIGINAL MESSAGE] 16 octets
    .nlmsg_len = 16
    .type = 27 <0x1b>
    .flags = 5 <REQUEST,ACK>
    .seq = 1458476256
    .port = -1061151077
---------------------------  END NETLINK MESSAGE   ---------------------------
hdr->nlmsg_type is ERROR. Skipping message parsing!
gnlh->cmd 0
cbarg 123 nrecv 1
Second test if it blocks here for incoming messages:

内核系统日志

kernel: [ 4694.318428] got demo_cmd
kernel: [ 4694.318430] attr1: hola
kernel: [ 4694.318431] attr2: f1

抱歉拖了这么久。

输出有些误导。那不是错误信息;这是一个自动 ACK. Netlink defines ACKs to be "error" messages with error code 0.

(零是C语言中成功的典型行话。)

由于您正在编写答案,因此您可能根本不需要 ACK。您可以通过添加对 nl_socket_disable_auto_ack().

的调用来阻止您的客户端请求 ACK

我会破解它以接近禁用序列检查,因为它有点类似:

sk = nl_socket_alloc();
nl_socket_disable_seq_check(sk);
nl_socket_disable_auto_ack(sk);
genl_connect(sk);