分配给结构字段时无符号短更改值

Question

我有一个函数可以解析 20 字节长的 IP header 缓冲区：

void parseIp(struct ipHeader *ip, const void *buffer)
{
    uint8_t* b = buffer;
   // memcpy(b,buffer,20);
    ip->version = (b[0] & 0xf0) >> 4;
    ip->ihl = (b[0] & 0x0f);
    ip->dscp = (b[1] & 0xfC)>>2;
    ip->ecn = (b[1] & 0x3);

    unsigned short l = (b[2] << 8) | b[3];

    printf("%d\n",l);
    ip->length = l;
    ip->identification = (b[4] << 0xFF) | b[5];
}

构造 ipHeader:

struct ipHeader {
    int version;
    int ihl;
    int dscp;
    int ecn;
    unsigned short length;
    unsigned short identification;
    int flags;
    int fragment_offset;
    int time_to_live;
    int protocol;
    unsigned short header_checksum;
    unsigned char source_ip[4];
    unsigned char destination_ip[4];
};

现在代码将 l 打印为 467，这是正确的，但由于此 l 已分配给结构字段长度，因此它变为 54017。我完全不明白发生了什么。我添加了变量 l 以确保不会发生溢出或类型转换错误，但它仍然会发生变化。

这是学校作业的一部分，所以我无法更改结构。

编辑完整代码：

#include <stdio.h>
#include <arpa/inet.h>
#include "ipheader.h"


/* Parses the given buffer into an IP header structure.
 * 
 * Parameters:
 * ip: pointer to the IP header structure that will be filled based
 *      on the data in the buffer
 * buffer: buffer of 20 bytes that contain the IP header. */
    void parseIp(struct ipHeader *ip, const void *buffer)
    {
        uint8_t* b = buffer;
       // memcpy(b,buffer,20);
        ip->version = (b[0] & 0xf0) >> 4;
        ip->ihl = (b[0] & 0x0f);
        ip->dscp = (b[1] & 0xfC)>>2;
        ip->ecn = (b[1] & 0x3);

        unsigned short l = (b[2] << 8) | b[3];

        printf("%d\n",l);
        ip->length = l;
        ip->identification = (b[4] << 8) | b[5];
    }


/* Builds a 20-byte byte stream based on the given IP header structure
 * 
 * Parameters:
 * buffer: pointer to the 20-byte buffer to which the header is constructed
 * ip: IP header structure that will be packed to the buffer */
void sendIp(void *buffer, const struct ipHeader *ip)
{
}


/* Prints the given IP header structure */
void printIp(const struct ipHeader *ip)
{
    /* Note: ntohs below is for converting numbers from network byte order
     to host byte order. You can ignore them for now
     To be discussed further in Network Programming course... */
    printf("version: %d   ihl: %d   dscp: %d   ecn: %d\n",
            ip->version, ip->ihl, ip->dscp, ip->ecn);
    printf("length: %d   id: %d   flags: %d   offset: %d\n",
            ntohs(ip->length), ntohs(ip->identification), ip->flags, ip->fragment_offset);
    printf("time to live: %d   protocol: %d   checksum: 0x%04x\n",
            ip->time_to_live, ip->protocol, ntohs(ip->header_checksum));
    printf("source ip: %d.%d.%d.%d\n", ip->source_ip[0], ip->source_ip[1],
            ip->source_ip[2], ip->source_ip[3]);
    printf("destination ip: %d.%d.%d.%d\n", ip->destination_ip[0],
            ip->destination_ip[1],
            ip->destination_ip[2], ip->destination_ip[3]);    
}

/* Shows hexdump of given data buffer */
void hexdump(const void *buffer, unsigned int length)
{
    const unsigned char *cbuf = buffer;
    unsigned int i;
    for (i = 0; i < length; ) {
        printf("%02x ", cbuf[i]);
        i++;
        if (!(i % 8))
            printf("\n");
    }
}

struct ipHeader {
    int version;
    int ihl;
    int dscp;
    int ecn;
    unsigned short length;
    unsigned short identification;
    int flags;
    int fragment_offset;
    int time_to_live;
    int protocol;
    unsigned short header_checksum;
    unsigned char source_ip[4];
    unsigned char destination_ip[4];
};

void parseIp(struct ipHeader *ip, const void *buffer);
void sendIp(void *buffer, const struct ipHeader *ip);

void printIp(const struct ipHeader *ip);
void hexdump(const void *buffer, unsigned int length);


#include <arpa/inet.h>
#include "ipheader.h"

int main()
{
    /* Feel free to modify this function to test different things */

    unsigned char bytes[] = {
        0x45, 0x00, 0x01, 0xd3, 0xda, 0x8d, 0x40, 0x00,
        0x40, 0x06, 0x8c, 0xd5, 0xc0, 0xa8, 0x01, 0x46,
        0x6c, 0xa0, 0xa3, 0x33 };

    struct ipHeader ip;

    parseIp(&ip, bytes);
    printIp(&ip);

    struct ipHeader ipfields = {
        4, // version
        28, // ihl
        4, // dscp
        0, // ecn
        htons(1500), // length
        htons(1234), // id
        1, // flags
        1024, // offset
        15, // time_to_live
        33, // protocol
        htons(0x1234), // checksum (invalid)
        {1, 2, 3, 4}, // source IP
        {5, 6, 7, 8} // destination IP
    };
    unsigned char sendbuf[20];

    sendIp(sendbuf, &ipfields);
    hexdump(sendbuf, sizeof(sendbuf));
}

Answer 1

不确定您是否了解字节序（大 endian/Little 字节序）参考：https://en.wikipedia.org/wiki/Endianness

基本上 Little-endian 格式颠倒顺序并将最低有效字节存储在较低的内存地址，而最高有效字节存储在最高内存地址。

因此，当您分配 I (467 = 0x1d3) 时，它以小端格式存储，具体取决于您的机器字节序，即 (0xd301 = 54017)。

因此，如果您希望分配正确的值，请使用 htons。

Answer 2

正如其他人所提到的，看起来您可能遇到了一些移动问题，因为您正在移动超出数据类型的宽度，这会导致 ..? （我不知道，似乎有关于这是否是定义行为的争论）。幸运的是，在我的机器上，您的代码结果为 467，这是正确的。但是，转换取消引用明确定义了您想要的内容。

 unsigned short l = (((unsigned short)b[2]) << 8) | ((unsigned short)b[3]);

此外，您需要担心字节顺序（您确实如此），因为网络 header 代码应该始终是大端，如果我不这样做的话，我个人不会担心移位不得不。对于 header 中多字节且落在字节边界上的部分，我会这样做：

 ip->length = ntohs(((unsigned short*)b)[1]);
 ip->identification = ntohs(((unsigned short*)b)[2]);
 ip->header_checksum = ntohs(((unsigned short*)b[5]);
 /*
 unsigned int sourceIpAddr = ntohl(((unsigned int*)b)[3]);
 unsigned int destIpAddr = ntohl(((unsigned int*)b[4]);
 Not sure what endianess you want for the source and destination IPs since those are just byte arrays
 */

请注意，当您将 b 转换为不同的指针类型时，索引会发生变化。

如果我可以完全控制结构，我会使用位字段创建整个结构，那么你根本不必担心移位，但你说结构是为你定义的。

Answer 3

对于给定的输入：

unsigned char bytes[] = { 0x45, 0x00, 0x01, 0xd3, 0xda,

然后是代码：

unsigned short l = (b[2] << 8) | b[3];

生成值为 467 的 l。

你在问题中说，"as this l is assigned to the struct field length it changes to 54017. "。然而，事实并非如此。如果您在现有 ip->length = l;:

之后立即添加一行

printf("%d\n", ip->length);

您仍然会看到 467。

我猜你提到的问题是你的 printIp 函数打印 54017。这是因为那个函数不打印 ip->length。它打印 ntohs(ip->length)。 ntohs 宏将值从 567 更改为 54017。

要解决此问题，请将 printIp 函数更改为打印 ip->length，而不是 ntohs(ip->length)。

也从该函数中删除其他 ntohs 调用，并从 ipfields 的定义中删除 htons。整数应该以 host 顺序（即本机顺序）存储在 struct ipHeader 中，并存储在 network 顺序中（即大端）在 unsigned char 缓冲区中时。

可移植性注释 1： 从技术上讲，您应该在这两个 printf 语句中使用 %hu 作为格式说明符，因为参数类型是 unsigned short.

可移植性注释 2： l == 467 无论 int 大小如何，与迄今为止某些 comments/answers 中的建议相反.但是要在 16 位 int 系统上运行时支持大于 0x7F 的 b[2] 值，您应该编写 ((unsigned)b[2] << 8) | b[3]。

可移植性注意事项 3：最好使用 uint16_t 而不是 unsigned short，因为现在有 32 位的系统 unsigned short。如果这样做，printf 格式说明符是 "%"PRI16u，您可能需要 #include <inttypes.h>

分配给结构字段时无符号短更改值

Unsigned short changes value when assigning to struct field

c

struct

short