使用 uint32_t 存储四个单独的 uint8_t 值
Use a uint32_t to store four separate uint8_t values
我想使用 uint32_t
来存储 4 个单独的 uint8_t
值,并且能够 read/write 每个单独的值。
设置每个 8 位范围的值执行以下任一操作是否安全?如果是,哪个更好(更快、更便携)?我不一定要一次设置它们,我只是说明如何在任何时间点设置每个 8 位值。
uint32_t x = ...;
选项 1:
((uint8_t *)(&x))[0] = a;
((uint8_t *)(&x))[1] = b;
((uint8_t *)(&x))[2] = c;
((uint8_t *)(&x))[3] = d;
选项 2:
x = (x & 0xFFFFFF00) | (uint32_t) a;
x = (x & 0xFFFF00FF) | (uint32_t) b << 8;
x = (x & 0xFF00FFFF) | (uint32_t) c << 16;
x = (x & 0x00FFFFFF) | (uint32_t) d << 24;
您的初始修订有一个正确的尽管迂回选项 2 的方法,即
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x = ...;
x = (x & 0xFFFFFF00) | (uint32_t) a;
x = (x & 0xFFFF00FF) | (uint32_t) b << 8;
x = (x & 0xFF00FFFF) | (uint32_t) c << 16;
x = (x & 0x00FFFFFF) | (uint32_t) d << 24;
选项 2 的修订是错误的:
uint32_t x = ...;
x |= (uint32_t) a;
x |= (uint32_t) b << 8;
x |= (uint32_t) c << 16;
x |= (uint32_t) d << 24;
即使 x
被初始化,它仍然是错误的,因为你没有 设置 8 位范围,你是 ORing他们。
正确的做法是
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x = (uint32_t) a;
x |= (uint32_t) b << 8;
x |= (uint32_t) c << 16;
x |= (uint32_t) d << 24;
或者更简洁
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x =
(uint32_t) a
| (uint32_t) b << 8
| (uint32_t) c << 16
| (uint32_t) d << 24;
选项 1 的问题在于它假定 uint32_t
的字节顺序为 LSB 优先,因此不是可移植的解决方案。
在收到有关您提出的问题的说明后,您的初始修订(此答案中的第一个代码块)是正确的方法。它保留剩余的 24 位不变,同时将特定的 8 位范围设置为 RHS 上的 uint8_t
值。
通用函数(用于 pos8 16 和 24):
uint32_t setb(uint32_t val, uint8_t a, int pos)
{
val &= ~(0xffUL << pos);
val |= (uint32_t)a << pos;
return val;
}
如果您不喜欢轮班:
uint32_t f(uint8_t a, uint8_t b,uint8_t c,uint8_t d) {
return a + 0x100UL * b + 0x10000UL * c + 0x1000000UL * d;
}
任何优秀的优化编译器都会生成非常高效的代码:
gcc ARM
f:
add r0, r0, r2, lsl #16
add r0, r0, r3, lsl #24
add r0, r0, r1, lsl #8
bx lr
clang x86
f: # @f
shl esi, 8
lea eax, [rsi + rdi]
shl edx, 16
or eax, edx
shl ecx, 24
or eax, ecx
ret
只有在非常小的微型计算机上我才会推荐并集方式
uint32_t g(uint8_t a, uint8_t b,uint8_t c,uint8_t d)
{
union
{
uint32_t v32;
uint8_t v8[4];
}x = {.v8[0] = a, .v8[1] = b, .v8[2] = c, .v8[3] = d};
return x.v32;
}
因为它更容易优化:
__zero_reg__ = 1
f:
push r16
push r17
ldi r25,lo8(0)
ldi r26,lo8(0)
ldi r27,hi8(0)
add r24,r22
adc r25,__zero_reg__
adc r26,__zero_reg__
adc r27,__zero_reg__
ldi r21,lo8(0)
subi r20,lo8(-(8))
sbci r21,hi8(-(8))
rjmp 2f
1: lsl r24
rol r25
rol r26
rol r27
2: dec r20
brpl 1b
ldi r19,lo8(0)
subi r18,lo8(-(16))
sbci r19,hi8(-(16))
rjmp 2f
1: lsl r24
rol r25
rol r26
rol r27
2: dec r18
brpl 1b
mov r19,r24
clr r18
clr r17
clr r16
mov r22,r16
mov r23,r17
mov r24,r18
mov r25,r19
pop r17
pop r16
ret
g:
push r16
push r17
mov r25,r18
mov r17,r22
mov r22,r24
mov r23,r17
mov r24,r20
pop r17
pop r16
ret
我想使用 uint32_t
来存储 4 个单独的 uint8_t
值,并且能够 read/write 每个单独的值。
设置每个 8 位范围的值执行以下任一操作是否安全?如果是,哪个更好(更快、更便携)?我不一定要一次设置它们,我只是说明如何在任何时间点设置每个 8 位值。
uint32_t x = ...;
选项 1:
((uint8_t *)(&x))[0] = a;
((uint8_t *)(&x))[1] = b;
((uint8_t *)(&x))[2] = c;
((uint8_t *)(&x))[3] = d;
选项 2:
x = (x & 0xFFFFFF00) | (uint32_t) a;
x = (x & 0xFFFF00FF) | (uint32_t) b << 8;
x = (x & 0xFF00FFFF) | (uint32_t) c << 16;
x = (x & 0x00FFFFFF) | (uint32_t) d << 24;
您的初始修订有一个正确的尽管迂回选项 2 的方法,即
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x = ...;
x = (x & 0xFFFFFF00) | (uint32_t) a;
x = (x & 0xFFFF00FF) | (uint32_t) b << 8;
x = (x & 0xFF00FFFF) | (uint32_t) c << 16;
x = (x & 0x00FFFFFF) | (uint32_t) d << 24;
选项 2 的修订是错误的:
uint32_t x = ...;
x |= (uint32_t) a;
x |= (uint32_t) b << 8;
x |= (uint32_t) c << 16;
x |= (uint32_t) d << 24;
即使 x
被初始化,它仍然是错误的,因为你没有 设置 8 位范围,你是 ORing他们。
正确的做法是
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x = (uint32_t) a;
x |= (uint32_t) b << 8;
x |= (uint32_t) c << 16;
x |= (uint32_t) d << 24;
或者更简洁
// a, b, c, and d are of initialized and of type uint8_t
uint32_t x =
(uint32_t) a
| (uint32_t) b << 8
| (uint32_t) c << 16
| (uint32_t) d << 24;
选项 1 的问题在于它假定 uint32_t
的字节顺序为 LSB 优先,因此不是可移植的解决方案。
在收到有关您提出的问题的说明后,您的初始修订(此答案中的第一个代码块)是正确的方法。它保留剩余的 24 位不变,同时将特定的 8 位范围设置为 RHS 上的 uint8_t
值。
通用函数(用于 pos8 16 和 24):
uint32_t setb(uint32_t val, uint8_t a, int pos)
{
val &= ~(0xffUL << pos);
val |= (uint32_t)a << pos;
return val;
}
如果您不喜欢轮班:
uint32_t f(uint8_t a, uint8_t b,uint8_t c,uint8_t d) {
return a + 0x100UL * b + 0x10000UL * c + 0x1000000UL * d;
}
任何优秀的优化编译器都会生成非常高效的代码:
gcc ARM
f:
add r0, r0, r2, lsl #16
add r0, r0, r3, lsl #24
add r0, r0, r1, lsl #8
bx lr
clang x86
f: # @f
shl esi, 8
lea eax, [rsi + rdi]
shl edx, 16
or eax, edx
shl ecx, 24
or eax, ecx
ret
只有在非常小的微型计算机上我才会推荐并集方式
uint32_t g(uint8_t a, uint8_t b,uint8_t c,uint8_t d)
{
union
{
uint32_t v32;
uint8_t v8[4];
}x = {.v8[0] = a, .v8[1] = b, .v8[2] = c, .v8[3] = d};
return x.v32;
}
因为它更容易优化:
__zero_reg__ = 1
f:
push r16
push r17
ldi r25,lo8(0)
ldi r26,lo8(0)
ldi r27,hi8(0)
add r24,r22
adc r25,__zero_reg__
adc r26,__zero_reg__
adc r27,__zero_reg__
ldi r21,lo8(0)
subi r20,lo8(-(8))
sbci r21,hi8(-(8))
rjmp 2f
1: lsl r24
rol r25
rol r26
rol r27
2: dec r20
brpl 1b
ldi r19,lo8(0)
subi r18,lo8(-(16))
sbci r19,hi8(-(16))
rjmp 2f
1: lsl r24
rol r25
rol r26
rol r27
2: dec r18
brpl 1b
mov r19,r24
clr r18
clr r17
clr r16
mov r22,r16
mov r23,r17
mov r24,r18
mov r25,r19
pop r17
pop r16
ret
g:
push r16
push r17
mov r25,r18
mov r17,r22
mov r22,r24
mov r23,r17
mov r24,r20
pop r17
pop r16
ret