我怎样才能让 GCC 将这个位移指令优化为一个移动?
How can I get GCC to optimize this bit-shifting instruction into a move?
我正在尝试使用以下代码在软件中模拟 16 位半浮点数:
typedef struct half
{
unsigned short mantissa:10;
unsigned short exponent:5;
unsigned short sign:1;
} half;
unsigned short from_half(half h)
{
return h.mantissa | h.exponent << 10 | h.sign << 15;
}
half to_half(unsigned short s)
{
half result = { s, s >> 10, s >> 15 };
return result;
}
我设置它以便它可以很容易地优化成一个移动指令,但是你瞧,在 from_half
中,GCC 无论如何都会进行位移(即使在 -O3
):
from_half:
mov edx, edi
mov eax, edi
and di, 1023
shr dx, 15
and eax, 31744
movzx edx, dl
sal edx, 15
or eax, edx
or eax, edi
ret
而 to_half
优化得很好:
to_half:
mov eax, edi
ret
我尝试了不同的优化级别(-O1
、-O2
、-Os
),但 none 将其优化到了我希望的样子。
即使在 -O1
:
from_half: # @from_half
mov eax, edi
ret
to_half: # @to_half
mov eax, edi
ret
我怎样才能让 GCC 把它优化成一个 move?为什么它还没有这样优化?
自从我在 C
中编码已经有一段时间了,但似乎使用 union
应该可行:
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
static bool useUnion;
__attribute__ ((__constructor__)) // supported by gcc compiler
static void initUseUnion()
{
union {
uint16_t i;
char c[2];
} n = { 0x0001 };
useUnion = n.c[0]; // little endian
}
typedef struct half
{
unsigned short mantissa:10;
unsigned short exponent:5;
unsigned short sign:1;
} half;
typedef union half_short
{
half h;
uint16_t s;
} half_short;
unsigned short from_half(half h)
{
if (useUnion) {
half_short hs;
hs.h = h;
return hs.s;
}
else {
return h.mantissa | h.exponent << 10 | h.sign << 15;
}
}
half to_half(unsigned short s)
{
if (useUnion) {
half_short hs;
hs.s = s;
return hs.h;
}
else {
half result = { s, s >> 10, s >> 15 };
return result;
}
}
int main(int argc, char* argv[])
{
printf("%d\n", useUnion);
return 0;
}
除了 之外,您还可以尝试以下方法来回答您的问题
How can I get GCC to optimize this into a move?
只需将每个移位的位域表达式转换为 unsigned short
unsigned short from_half(half h)
{
return (unsigned short)h.mantissa | (unsigned short)(h.exponent << 10) | (unsigned short)(h.sign << 15);
}
结果是:
from_half:
mov eax, edi
ret
Why isn't it optimized that way already?
我不确定我对这个问题有可靠的答案。显然,将位域中间提升为 int
会使优化器感到困惑……但这只是一个猜测。
我正在尝试使用以下代码在软件中模拟 16 位半浮点数:
typedef struct half
{
unsigned short mantissa:10;
unsigned short exponent:5;
unsigned short sign:1;
} half;
unsigned short from_half(half h)
{
return h.mantissa | h.exponent << 10 | h.sign << 15;
}
half to_half(unsigned short s)
{
half result = { s, s >> 10, s >> 15 };
return result;
}
我设置它以便它可以很容易地优化成一个移动指令,但是你瞧,在 from_half
中,GCC 无论如何都会进行位移(即使在 -O3
):
from_half:
mov edx, edi
mov eax, edi
and di, 1023
shr dx, 15
and eax, 31744
movzx edx, dl
sal edx, 15
or eax, edx
or eax, edi
ret
而 to_half
优化得很好:
to_half:
mov eax, edi
ret
我尝试了不同的优化级别(-O1
、-O2
、-Os
),但 none 将其优化到了我希望的样子。
即使在 -O1
:
from_half: # @from_half
mov eax, edi
ret
to_half: # @to_half
mov eax, edi
ret
我怎样才能让 GCC 把它优化成一个 move?为什么它还没有这样优化?
自从我在 C
中编码已经有一段时间了,但似乎使用 union
应该可行:
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
static bool useUnion;
__attribute__ ((__constructor__)) // supported by gcc compiler
static void initUseUnion()
{
union {
uint16_t i;
char c[2];
} n = { 0x0001 };
useUnion = n.c[0]; // little endian
}
typedef struct half
{
unsigned short mantissa:10;
unsigned short exponent:5;
unsigned short sign:1;
} half;
typedef union half_short
{
half h;
uint16_t s;
} half_short;
unsigned short from_half(half h)
{
if (useUnion) {
half_short hs;
hs.h = h;
return hs.s;
}
else {
return h.mantissa | h.exponent << 10 | h.sign << 15;
}
}
half to_half(unsigned short s)
{
if (useUnion) {
half_short hs;
hs.s = s;
return hs.h;
}
else {
half result = { s, s >> 10, s >> 15 };
return result;
}
}
int main(int argc, char* argv[])
{
printf("%d\n", useUnion);
return 0;
}
除了
How can I get GCC to optimize this into a move?
只需将每个移位的位域表达式转换为 unsigned short
unsigned short from_half(half h)
{
return (unsigned short)h.mantissa | (unsigned short)(h.exponent << 10) | (unsigned short)(h.sign << 15);
}
结果是:
from_half:
mov eax, edi
ret
Why isn't it optimized that way already?
我不确定我对这个问题有可靠的答案。显然,将位域中间提升为 int
会使优化器感到困惑……但这只是一个猜测。