sqrtf() 抛出域错误,即使我防范负数
sqrtf() throws a domain error, even though I guard against negative numbers
我正在规范化一个 3D 矢量,clang-9 生成的代码在 sqrtf()
上抛出一个 SIGFPE,即使我在调用它之前做了一个测试。
请注意,我 运行 启用了 FP 异常。
const float lensq = dx*dx + dy*dy + dz*dz;
float invlen = 1.0f;
if (lensq > FLT_EPSILON)
{
const float leng = sqrtf(lensq);
invlen = 1.0f / leng;
}
哪个 clang-9 创建此程序集用于:
const float lensq = dx*dx + dy*dy + dz*dz;
a11: 44 0f bf f1 movswl %cx,%r14d
a15: 44 89 f0 mov %r14d,%eax
a18: 41 0f af c6 imul %r14d,%eax
a1c: 44 0f bf ee movswl %si,%r13d
a20: 44 89 e9 mov %r13d,%ecx
a23: 41 0f af cd imul %r13d,%ecx
a27: 01 c1 add %eax,%ecx
a29: 44 0f bf fa movswl %dx,%r15d
a2d: 44 89 f8 mov %r15d,%eax
a30: 41 0f af c7 imul %r15d,%eax
a34: c5 f8 28 c7 vmovaps %xmm7,%xmm0
if (lensq > FLT_EPSILON)
a38: 01 c8 add %ecx,%eax
a3a: 74 2d je a69 <surface_extract_cases+0x7f9>
const float lensq = dx*dx + dy*dy + dz*dz;
a3c: c5 a2 2a c0 vcvtsi2ss %eax,%xmm11,%xmm0
a40: c4 c1 78 2e c0 vucomiss %xmm8,%xmm0
a45: 72 09 jb a50 <surface_extract_cases+0x7e0>
a47: c5 fa 51 c0 vsqrtss %xmm0,%xmm0,%xmm0
a4b: eb 18 jmp a65 <surface_extract_cases+0x7f5>
a4d: 0f 1f 00 nopl (%rax)
a50: c5 f8 77 vzeroupper
a53: e8 00 00 00 00 callq a58 <surface_extract_cases+0x7e8>
a58: c4 41 39 ef c0 vpxor %xmm8,%xmm8,%xmm8
a5d: c5 fa 10 3d 00 00 00 vmovss 0x0(%rip),%xmm7 # a65 <surface_extract_cases+0x7f5>
a64: 00
invlen = 1.0f / leng;
SIGFPE 显示此调用堆栈:
#0 __kernel_standard (x=-1952511232, y=-1952511232, type=126) at ../sysdeps/ieee754/k_standard.c:584
#1 0x00007ffff5ea18e1 in __kernel_standard_f (x=<optimized out>, y=<optimized out>, type=<optimized out>) at ../sysdeps/ieee754/k_standardf.c:32
#2 0x000000000042a458 in mc_process_case_instances (caseidx=<optimized out>, numcases=31, fielddensity=0x3c171110, fieldtype=0x3c232510, isoval=<optimized out>, outputv=0x9c0d194 <scratch_surface_v+96744708>, outputn=0x12c0d194 <scratch_surface_n+96744708>, outputm=0x17e8ae3c <scratch_surface_m+32248236>, cases=<optimized out>) at ../../src/osino/src/surface.c:535
#3 surface_extract_cases (fielddensity=0x3c171110, fieldtype=0x3c232510, cases=<optimized out>, isoval=43, gridoff=<optimized out>, xlo=1120, xhi=48, ylo=32, yhi=48, zlo=16, zhi=32, outputv=0x9c09c90 <scratch_surface_v+96731136>, outputn=0x12c09c90 <scratch_surface_n+96731136>, outputm=0x17e89c90 <scratch_surface_m+32243712>, maxtria=65536, threadnr=1) at ../../src/osino/src/surface.c:745
SIGFPE 似乎在 __kernel_standard() 调用内部,由 0 除以 0 引起。
如果我在 __kernel_standard()
中查找类型“126”,那么我看到它是:sqrtf(negative) 并调用此代码:
case 126:
/* sqrt(x<0) */
exc.type = DOMAIN;
exc.name = type < 100 ? "sqrt" : "sqrtf";
if (_LIB_VERSION == _SVID_)
exc.retval = zero;
else
exc.retval = zero/zero;
if (_LIB_VERSION == _POSIX_)
errno = EDOM;
else if (!matherr(&exc)) {
/* if (_LIB_VERSION == _SVID_) {
(void) WRITE2("sqrt: DOMAIN error\n", 19);
} */
errno = EDOM;
}
break;
...在调试器中导致此调用堆栈:
Thread 8 "noisetuner" received signal SIGFPE, Arithmetic exception.
[Switching to Thread 0x7fffccbdd700 (LWP 5838)]
─── Assembly ─────────────────────────────────────────────────────────────────────────────────────────
0x00007ffff5ea038c __kernel_standard+8236 nopl 0x0(%rax)
0x00007ffff5ea0390 __kernel_standard+8240 pxor %xmm0,%xmm0
0x00007ffff5ea0394 __kernel_standard+8244 cmp [=14=]x2,%eax
0x00007ffff5ea0397 __kernel_standard+8247 divsd %xmm0,%xmm0
0x00007ffff5ea039b __kernel_standard+8251 movsd %xmm0,0x30(%rsp)
0x00007ffff5ea03a1 __kernel_standard+8257 jne 0x7ffff5e9f13b <__kernel_standard+3547>
0x00007ffff5ea03a7 __kernel_standard+8263 mov 0x38bc0a(%rip),%rax # 0x7ffff622bfb8
─── Expressions ──────────────────────────────────────────────────────────────────────────────────────
─── History ──────────────────────────────────────────────────────────────────────────────────────────
─── Memory ───────────────────────────────────────────────────────────────────────────────────────────
─── Registers ────────────────────────────────────────────────────────────────────────────────────────
rax 0x0000000000000002 rbx 0x00007ffff622c148 rcx 0x0000000058e94a00
rdx 0x00007ffff5f5a3c7 rsi 0x0000000000008d90 rdi 0x000000000000007e
rbp 0x00007fffccbdca30 rsp 0x00007fffccbdc640 r8 0x0000000000000006
r9 0x0000000000000005 r10 0x0000000000000038 r11 0x00007ffff5ea1460
r12 0x0000000000000001 r13 0x00000000ffff8d90 r14 0x00000000ffff9db0
r15 0x00000000ffff8e10 rip 0x00007ffff5ea0397 eflags [ PF ZF IF RF ]
cs 0x00000033 ss 0x0000002b ds 0x00000000
es 0x00000000 fs 0x00000000 gs 0x00000000
─── Source ───────────────────────────────────────────────────────────────────────────────────────────
Cannot display "/build/glibc-2ORdQG/glibc-2.27/math/../sysdeps/ieee754/k_standard.c" ([Errno 2] No such file or directory: '/build/glibc-2ORdQG/glibc-2.27/math/../sysdeps/ieee754/k_standard.c')
─── Stack ────────────────────────────────────────────────────────────────────────────────────────────
[0] from 0x00007ffff5ea0397 in __kernel_standard+8247 at ../sysdeps/ieee754/k_standard.c:584
arg x = -1952511232
arg y = -1952511232
arg type = 126
[1] from 0x00007ffff5ea18e1 in __kernel_standard_f+17 at ../sysdeps/ieee754/k_standardf.c:32
arg x = <optimized out>
arg y = <optimized out>
arg type = <optimized out>
[+]
问题出现在 clang-9 -O3
,但不会出现在 clang-9 -O0
参数。
我使用的完整命令行:
clang-9 -D_GNU_SOURCE -DAPPVER=1.00 -DUSECOREPROFILE -DNOUSESTEAM -DXWIN -DLANDSCAPE -DBLKMAG=6 -USTORECHARS -USTOREFP16 -DSTORESHORTS -I/home/bram/src/stb/ -I../GBase/src -Isrc -I../../src/dutch-blunt/src -I../../src/osino/src -I../../src/osino/src/../externals/enoki/include -I/usr/local/cuda/include -I/home/bram/src/zstd/lib -I../../src/ThreadTracer -IModels.game/geom `/opt/ode-master/bin/ode-config --cflags` `/usr/bin/sdl2-config --cflags` -g -Wall -pedantic -Wno-missing-braces -mavx2 -mfma -mf16c -MMD -MP -O3 -DDEBUG -c -o ../../src/osino/src/surface.o ../../src/osino/src/surface.c
为什么 clang 计算负数的平方根?它是否试图进行推测性执行,并根据 lensq > FLT_EPSILON
测试混合结果?这甚至有效吗?
throws a domain error, even thoug I guard against negative numbers
但是 if (lensq > FLT_EPSILON)
为时已晚,因为之前 dx*dx + dy*dy + dz*dz
导致 int
溢出。 “并且确实溢出,导致 lensq
为负数” - 这是 未定义的行为 UB。
编译器可以利用 sqrtf(lensq)
始终可以工作的优势,因为它可以假定 dx*dx + dy*dy + dz*dz >= 0
等 lensq >= 0.0f
。
去掉 UB。
// const float lensq = dx*dx + dy*dy + dz*dz;
const float lensq = 1LL*dx*dx + 1LL*dy*dy + 1LL*dz*dz;
我正在规范化一个 3D 矢量,clang-9 生成的代码在 sqrtf()
上抛出一个 SIGFPE,即使我在调用它之前做了一个测试。
请注意,我 运行 启用了 FP 异常。
const float lensq = dx*dx + dy*dy + dz*dz;
float invlen = 1.0f;
if (lensq > FLT_EPSILON)
{
const float leng = sqrtf(lensq);
invlen = 1.0f / leng;
}
哪个 clang-9 创建此程序集用于:
const float lensq = dx*dx + dy*dy + dz*dz;
a11: 44 0f bf f1 movswl %cx,%r14d
a15: 44 89 f0 mov %r14d,%eax
a18: 41 0f af c6 imul %r14d,%eax
a1c: 44 0f bf ee movswl %si,%r13d
a20: 44 89 e9 mov %r13d,%ecx
a23: 41 0f af cd imul %r13d,%ecx
a27: 01 c1 add %eax,%ecx
a29: 44 0f bf fa movswl %dx,%r15d
a2d: 44 89 f8 mov %r15d,%eax
a30: 41 0f af c7 imul %r15d,%eax
a34: c5 f8 28 c7 vmovaps %xmm7,%xmm0
if (lensq > FLT_EPSILON)
a38: 01 c8 add %ecx,%eax
a3a: 74 2d je a69 <surface_extract_cases+0x7f9>
const float lensq = dx*dx + dy*dy + dz*dz;
a3c: c5 a2 2a c0 vcvtsi2ss %eax,%xmm11,%xmm0
a40: c4 c1 78 2e c0 vucomiss %xmm8,%xmm0
a45: 72 09 jb a50 <surface_extract_cases+0x7e0>
a47: c5 fa 51 c0 vsqrtss %xmm0,%xmm0,%xmm0
a4b: eb 18 jmp a65 <surface_extract_cases+0x7f5>
a4d: 0f 1f 00 nopl (%rax)
a50: c5 f8 77 vzeroupper
a53: e8 00 00 00 00 callq a58 <surface_extract_cases+0x7e8>
a58: c4 41 39 ef c0 vpxor %xmm8,%xmm8,%xmm8
a5d: c5 fa 10 3d 00 00 00 vmovss 0x0(%rip),%xmm7 # a65 <surface_extract_cases+0x7f5>
a64: 00
invlen = 1.0f / leng;
SIGFPE 显示此调用堆栈:
#0 __kernel_standard (x=-1952511232, y=-1952511232, type=126) at ../sysdeps/ieee754/k_standard.c:584
#1 0x00007ffff5ea18e1 in __kernel_standard_f (x=<optimized out>, y=<optimized out>, type=<optimized out>) at ../sysdeps/ieee754/k_standardf.c:32
#2 0x000000000042a458 in mc_process_case_instances (caseidx=<optimized out>, numcases=31, fielddensity=0x3c171110, fieldtype=0x3c232510, isoval=<optimized out>, outputv=0x9c0d194 <scratch_surface_v+96744708>, outputn=0x12c0d194 <scratch_surface_n+96744708>, outputm=0x17e8ae3c <scratch_surface_m+32248236>, cases=<optimized out>) at ../../src/osino/src/surface.c:535
#3 surface_extract_cases (fielddensity=0x3c171110, fieldtype=0x3c232510, cases=<optimized out>, isoval=43, gridoff=<optimized out>, xlo=1120, xhi=48, ylo=32, yhi=48, zlo=16, zhi=32, outputv=0x9c09c90 <scratch_surface_v+96731136>, outputn=0x12c09c90 <scratch_surface_n+96731136>, outputm=0x17e89c90 <scratch_surface_m+32243712>, maxtria=65536, threadnr=1) at ../../src/osino/src/surface.c:745
SIGFPE 似乎在 __kernel_standard() 调用内部,由 0 除以 0 引起。
如果我在 __kernel_standard()
中查找类型“126”,那么我看到它是:sqrtf(negative) 并调用此代码:
case 126:
/* sqrt(x<0) */
exc.type = DOMAIN;
exc.name = type < 100 ? "sqrt" : "sqrtf";
if (_LIB_VERSION == _SVID_)
exc.retval = zero;
else
exc.retval = zero/zero;
if (_LIB_VERSION == _POSIX_)
errno = EDOM;
else if (!matherr(&exc)) {
/* if (_LIB_VERSION == _SVID_) {
(void) WRITE2("sqrt: DOMAIN error\n", 19);
} */
errno = EDOM;
}
break;
...在调试器中导致此调用堆栈:
Thread 8 "noisetuner" received signal SIGFPE, Arithmetic exception.
[Switching to Thread 0x7fffccbdd700 (LWP 5838)]
─── Assembly ─────────────────────────────────────────────────────────────────────────────────────────
0x00007ffff5ea038c __kernel_standard+8236 nopl 0x0(%rax)
0x00007ffff5ea0390 __kernel_standard+8240 pxor %xmm0,%xmm0
0x00007ffff5ea0394 __kernel_standard+8244 cmp [=14=]x2,%eax
0x00007ffff5ea0397 __kernel_standard+8247 divsd %xmm0,%xmm0
0x00007ffff5ea039b __kernel_standard+8251 movsd %xmm0,0x30(%rsp)
0x00007ffff5ea03a1 __kernel_standard+8257 jne 0x7ffff5e9f13b <__kernel_standard+3547>
0x00007ffff5ea03a7 __kernel_standard+8263 mov 0x38bc0a(%rip),%rax # 0x7ffff622bfb8
─── Expressions ──────────────────────────────────────────────────────────────────────────────────────
─── History ──────────────────────────────────────────────────────────────────────────────────────────
─── Memory ───────────────────────────────────────────────────────────────────────────────────────────
─── Registers ────────────────────────────────────────────────────────────────────────────────────────
rax 0x0000000000000002 rbx 0x00007ffff622c148 rcx 0x0000000058e94a00
rdx 0x00007ffff5f5a3c7 rsi 0x0000000000008d90 rdi 0x000000000000007e
rbp 0x00007fffccbdca30 rsp 0x00007fffccbdc640 r8 0x0000000000000006
r9 0x0000000000000005 r10 0x0000000000000038 r11 0x00007ffff5ea1460
r12 0x0000000000000001 r13 0x00000000ffff8d90 r14 0x00000000ffff9db0
r15 0x00000000ffff8e10 rip 0x00007ffff5ea0397 eflags [ PF ZF IF RF ]
cs 0x00000033 ss 0x0000002b ds 0x00000000
es 0x00000000 fs 0x00000000 gs 0x00000000
─── Source ───────────────────────────────────────────────────────────────────────────────────────────
Cannot display "/build/glibc-2ORdQG/glibc-2.27/math/../sysdeps/ieee754/k_standard.c" ([Errno 2] No such file or directory: '/build/glibc-2ORdQG/glibc-2.27/math/../sysdeps/ieee754/k_standard.c')
─── Stack ────────────────────────────────────────────────────────────────────────────────────────────
[0] from 0x00007ffff5ea0397 in __kernel_standard+8247 at ../sysdeps/ieee754/k_standard.c:584
arg x = -1952511232
arg y = -1952511232
arg type = 126
[1] from 0x00007ffff5ea18e1 in __kernel_standard_f+17 at ../sysdeps/ieee754/k_standardf.c:32
arg x = <optimized out>
arg y = <optimized out>
arg type = <optimized out>
[+]
问题出现在 clang-9 -O3
,但不会出现在 clang-9 -O0
参数。
我使用的完整命令行:
clang-9 -D_GNU_SOURCE -DAPPVER=1.00 -DUSECOREPROFILE -DNOUSESTEAM -DXWIN -DLANDSCAPE -DBLKMAG=6 -USTORECHARS -USTOREFP16 -DSTORESHORTS -I/home/bram/src/stb/ -I../GBase/src -Isrc -I../../src/dutch-blunt/src -I../../src/osino/src -I../../src/osino/src/../externals/enoki/include -I/usr/local/cuda/include -I/home/bram/src/zstd/lib -I../../src/ThreadTracer -IModels.game/geom `/opt/ode-master/bin/ode-config --cflags` `/usr/bin/sdl2-config --cflags` -g -Wall -pedantic -Wno-missing-braces -mavx2 -mfma -mf16c -MMD -MP -O3 -DDEBUG -c -o ../../src/osino/src/surface.o ../../src/osino/src/surface.c
为什么 clang 计算负数的平方根?它是否试图进行推测性执行,并根据 lensq > FLT_EPSILON
测试混合结果?这甚至有效吗?
throws a domain error, even thoug I guard against negative numbers
但是 if (lensq > FLT_EPSILON)
为时已晚,因为之前 dx*dx + dy*dy + dz*dz
导致 int
溢出。 “并且确实溢出,导致 lensq
为负数” - 这是 未定义的行为 UB。
编译器可以利用 sqrtf(lensq)
始终可以工作的优势,因为它可以假定 dx*dx + dy*dy + dz*dz >= 0
等 lensq >= 0.0f
。
去掉 UB。
// const float lensq = dx*dx + dy*dy + dz*dz;
const float lensq = 1LL*dx*dx + 1LL*dy*dy + 1LL*dz*dz;