跨字边界的 MinGW64 位域访问是错误的

MinGW64 bit field access across word-boundaries is wrong

我正在研究不同的编译器如何处理结构位域成员以及跨越原始类型边界的成员的未对齐访问,我认为 MinGW64 存在漏洞。我的测试程序是:

#include <stdint.h>
#include <stdio.h>

/* Structure for testing element access

The crux is the ISO C99 6.7.2.1p10 item:

An implementation may allocate any addressable storage unit large enough to hold a bitfield.
If enough space remains, a bit-field that immediately follows another bit-field in a
structure shall be packed into adjacent bits of the same unit. If insufficient space remains,
whether a bit-field that does not fit is put into the next unit or overlaps adjacent units is
implementation-defined. The order of allocation of bit-fields within a unit (high-order to
low-order or low-order to high-order) is implementation-defined. The alignment of the
addressable storage unit is unspecified.
*/

typedef struct _my_struct
{
    /* word 0 */
    uint32_t    first           :32;    /**< A whole word element   */
    /* word 1 */
    uint32_t    second          :8;     /**< bits 7-0               */
    uint32_t    third           :8;     /**< bits 15-8              */
    uint32_t    fourth          :8;     /**< bits 23-16             */
    uint32_t    fifth           :8;     /**< bits 31-24             */
    /* word 2 */
    uint32_t    sixth           :16;    /**< bits 15-0              */
    uint32_t    seventh         :16;    /**< bits 31-16             */
    /* word 3 */
    uint32_t    eigth           :24;    /**< bits 23-0              */
    uint32_t    ninth           :8;     /**< bits 31-24             */
    /* word 4 */
    uint32_t    tenth           :8;     /**< bits 7-0               */
    uint32_t    eleventh        :24;    /**< bits 31-8              */
    /* word 5 */
    uint32_t    twelfth         :8;     /**< bits 7-0               */
    uint32_t    thirteeneth     :16;    /**< bits 23-8              */
    uint32_t    fourteenth      :8;     /**< bits 31-24             */
    /* words 6 & 7 */
    uint32_t    fifteenth       :16;    /**< bits 15-0              */
    uint32_t    sixteenth       :8;     /**< bits 23-16             */
    uint32_t    seventeenth     :16;    /**< bits 31-24 & 7-0       */
    /* word 7 */
    uint32_t    eighteenth      :24;    /**< bits 31-8              */
    /* word 8 */
    uint32_t    nineteenth      :32;    /**< bits 31-0              */
    /* words 9 & 10 */
    uint32_t    twentieth       :16;    /**< bits 15-0              */
    uint32_t    twenty_first    :32;    /**< bits 31-16 & 15-0      */
    uint32_t    twenty_second   :16;    /**< bits 31-16             */
    /* word 11 */
    uint32_t    twenty_third    :32;    /**< bits 31-0              */
} __attribute__((packed)) my_struct;


uint32_t buf[] = {
        0x11223344, 0x55667788, 0x99AABBCC, 0x01020304, /* words 0  - 3     */
        0x05060708, 0x090A0B0C, 0x0D0E0F10, 0x12131415, /* words 4  - 7     */
        0x16171819, 0x20212324, 0x25262728, 0x29303132, /* words 8  - 11    */
        0x34353637, 0x35363738, 0x39404142, 0x43454647  /* words 12 - 15    */
};

uint32_t data[64];

int main(void)
{
    my_struct *p;

    p = (my_struct*) buf;

    data[0] = 0;
    data[1] = p->first;
    data[2] = p->second;
    data[3] = p->third;
    data[4] = p->fourth;
    data[5] = p->fifth;
    data[6] = p->sixth;
    data[7] = p->seventh;
    data[8] = p->eigth;
    data[9] = p->ninth;
    data[10] = p->tenth;
    data[11] = p->eleventh;
    data[12] = p->twelfth;
    data[13] = p->thirteeneth;
    data[14] = p->fourteenth;
    data[15] = p->fifteenth;
    data[16] = p->sixteenth;
    data[17] = p->seventeenth;
    data[18] = p->eighteenth;
    data[19] = p->nineteenth;
    data[20] = p->twentieth;
    data[21] = p->twenty_first;
    data[22] = p->twenty_second;
    data[23] = p->twenty_third;

    if( p->fifth == 0x55 )
    {
        data[0] = 0xCAFECAFE;
    }
    else
    {
        data[0] = 0xDEADBEEF;
    }

    int i;
    for (i = 0; i < 24; ++i) {
        printf("data[%d] = 0x%0x\n", i, data[i]);
    }
    return data[0];
}

我找到的结果是:

| Data Member | Type    | GCC Cortex M3  | GCC mingw64   | GCC Linux     | GCC Cygwin    |
|:------------|:-------:|:---------------|:--------------|:--------------|:--------------|
| data[0]     | uint32_t| 0x0            | 0xcafecafe    | 0xcafecafe    | 0xcafecafe    |
| data[1]     | uint32_t| 0x11223344     | 0x11223344    | 0x11223344    | 0x11223344    |
| data[2]     | uint32_t| 0x88           | 0x88          | 0x88          | 0x88          |
| data[3]     | uint32_t| 0x77           | 0x77          | 0x77          | 0x77          |
| data[4]     | uint32_t| 0x66           | 0x66          | 0x66          | 0x66          |
| data[5]     | uint32_t| 0x55           | 0x55          | 0x55          | 0x55          |
| data[6]     | uint32_t| 0xbbcc         | 0xbbcc        | 0xbbcc        | 0xbbcc        |
| data[7]     | uint32_t| 0x99aa         | 0x99aa        | 0x99aa        | 0x99aa        |
| data[8]     | uint32_t| 0x20304        | 0x20304       | 0x20304       | 0x20304       |
| data[9]     | uint32_t| 0x1            | 0x1           | 0x1           | 0x1           |
| data[10]    | uint32_t| 0x8            | 0x8           | 0x8           | 0x8           |
| data[11]    | uint32_t| 0x50607        | 0x50607       | 0x50607       | 0x50607       |
| data[12]    | uint32_t| 0xc            | 0xc           | 0xc           | 0xc           |
| data[13]    | uint32_t| 0xa0b          | 0xa0b         | 0xa0b         | 0xa0b         |
| data[14]    | uint32_t| 0x9            | 0x9           | 0x9           | 0x9           |
| data[15]    | uint32_t| 0xf10          | 0xf10         | 0xf10         | 0xf10         |
| data[16]    | uint32_t| 0xe            | 0xe           | 0xe           | 0xe           |
| data[17]    | uint32_t| 0x150d         | 0x1415        | 0x150d        | 0x150d        |
| data[18]    | uint32_t| 0x121314       | 0x171819      | 0x121314      | 0x121314      |
| data[19]    | uint32_t| 0x16171819     | 0x20212324    | 0x16171819    | 0x16171819    |
| data[20]    | uint32_t| 0x2324         | 0x2728        | 0x2324        | 0x2324        |
| data[21]    | uint32_t| 0x27282021     | 0x29303132    | 0x27282021    | 0x27282021    |
| data[22]    | uint32_t| 0x2526         | 0x3637        | 0x2526        | 0x2526        |
| data[23]    | uint32_t| 0x29303132     | 0x35363738    | 0x29303132    | 0x29303132    |

GCC Cortex M3 is
arm-none-eabi-gcc (GNU MCU Eclipse ARM Embedded GCC, 32-bit) 8.2.1 20181213 (release) [gcc-8-branch revision 267074]

GCC Mingw is
gcc.exe (i686-posix-dwarf-rev0, Built by MinGW-W64 project) 8.1.0

GCC Linux is
gcc (GCC) 4.4.7 20120313 (Red Hat 4.4.7-23)

GCC Cygwin is
gcc (GCC) 7.4.0

所有 GCC 版本似乎都能正确处理未对齐的访问(如 my_struct.thirteeneth)。

问题不在于跨越单词边界(my_struct.seventeenth)的成员不同,因为上面引用的C99标准明确指出行为是实现定义的。 问题是所有后续访问显然都是不正确的(数据[17] 及以上),即使对于对齐的成员(my_struct.nineteenth & my_struct.twenty_third)也是如此。这是怎么回事,这是错误还是这些有效值?

像 GCC 这样广泛使用的编译器出现错误的可能性不是零,而是非常小。很可能是 PEBKAS。 ;-)

无论如何,我已经用“gcc(x86_64-posix-seh-rev0,由 MinGW-W64 项目构建)8.1.0”编译了你的程序,并在“mingw64”栏中得到了与你相同的结果。

仔细观察会发现编译器将位域对齐在 32 位边界上,这恰好是 int 的宽度。这完全符合标准 C17 第 6.7.2.1 章,其中指出“跨接”(用附件 J.3.9 的话来说)是实现定义的。

其他 GCC 变体不对齐位字段并支持跨越 32 位边界。

这显然不是错误,值是有效的。可能值得研究原因,也许 post 功能请求。

编辑:

澄清一下,这是对齐的布局。元素 seventeenth 和以下元素没有问题:

/* 0x11223344: word 0 */
uint32_t    first           :32;
/* 0x55667788: word 1 */
uint32_t    second          :8;
uint32_t    third           :8;
uint32_t    fourth          :8;
uint32_t    fifth           :8;
/* 0x99AABBCC: word 2 */
uint32_t    sixth           :16;
uint32_t    seventh         :16;
/* 0x01020304: word 3 */
uint32_t    eigth           :24;
uint32_t    ninth           :8;
/* 0x05060708: word 4 */
uint32_t    tenth           :8;
uint32_t    eleventh        :24;
/* 0x090A0B0C: word 5 */
uint32_t    twelfth         :8;
uint32_t    thirteeneth     :16;
uint32_t    fourteenth      :8;
/* 0x0D0E0F10: words 6 */
uint32_t    fifteenth       :16;
uint32_t    sixteenth       :8;
/* 0x12131415: word 7, because "seventeenth" does not fit in the space left */
uint32_t    seventeenth     :16;
/* 0x16171819: word 8, because "eighteenth" does not fit in the space left */
uint32_t    eighteenth      :24;
/* 0x20212324: word 9, because "nineteenth" does not fit in the space left */
uint32_t    nineteenth      :32;
/* 0x25262728: words 10 */
uint32_t    twentieth       :16;
/* 0x29303132: word 11, because "twenty_first" does not fit in the space left */
uint32_t    twenty_first    :32;
/* 0x34353637: word 12 */
uint32_t    twenty_second   :16;
/* 0x35363738: word 13, because "twenty_third" does not fit in the space left */
uint32_t    twenty_third    :32;

您不能完全,以任何方式依赖位域在结构中的排列方式。

Per 6.7.2.1 Structure and union specifiers, paragraph 11 of the C11 standard(加粗我的):

An implementation may allocate any addressable storage unit large enough to hold a bit-field. If enough space remains, a bit-field that immediately follows another bit-field in a structure shall be packed into adjacent bits of the same unit. If insufficient space remains, whether a bit-field that does not fit is put into the next unit or overlaps adjacent units is implementation-defined. The order of allocation of bit-fields within a unit (high-order to low-order or low-order to high-order) is implementation-defined. The alignment of the addressable storage unit is unspecified.

你甚至引用了那句话。鉴于此,没有 "incorrect" 实现布局位域的方法。

所以你不能依赖位域容器的大小。

您不能依赖位域是否跨单位。

您不能依赖单元内位域的顺序。

然而,您的问题假设您可以做到所有这些,甚至在您看到您期望的内容时使用 "correct" 和 "clearly incorrect" 来描述您没有想到的位域布局。

不是"clearly incorrect"。

如果您需要知道某个位在结构中的什么位置,您根本无法移植地使用位域。

事实上,您在这个问题上的所有努力都是一个完美的案例研究,说明了为什么您不能依赖位域。

它没有被窃听,它根据 windows ABI 放置位域。

根据gcc docs

If packed is used on a structure, or if bit-fields are used, it may be that the Microsoft ABI lays out the structure differently than the way GCC normally does.

-mno-ms-bitfields编译mingw64版本修复差异。或者用-mms-bitfields编译所有其他版本来布局与mingw相同的结构。