使用 SSE 模拟标准 Math.pow 函数
Using SSE to mimic the standard Math.pow function
我正在尝试学习如何使用 SSE
,我决定实现一个计算 n^d
的简单代码,使用一个由 C
程序调用的函数.
这是我的 NASM
代码:
section .data
resmsg: db '%d^%d = %d', 0
section .bss
section .text
extern printf
; ------------------------------------------------------------
; Function called from a c program, I only use n and d parameters but I left the others
; ------------------------------------------------------------
global main
T equ 8
n equ 12
d equ 16
m equ 20
Sid equ 24
Sn equ 28
main:
; ------------------------------------------------------------
; Function enter sequence
; ------------------------------------------------------------
push ebp ; save Base Pointer
mov ebp, esp ; Move Base Point to current frame
sub esp, 8 ; reserve space for two local vars
push ebx ; save some registries (don't know if needed)
push esi
push edi
; ------------------------------------------------------------
; copy function's parameters to registries from stack
; ------------------------------------------------------------
mov eax, [ebp+T] ; T
mov ebx, [ebp+n] ; n
mov ecx, [ebp+d] ; d
mov edx, [ebp+m] ; m
mov esi, [ebp+Sid] ; Sid
mov edi, [ebp+Sn] ; Sn
mov [ebp-8], ecx ; copy ecx into one of the local vars
;
; pow is computed by doing n*n d times
;
movss xmm0, [ebp+n] ; base
movss xmm1, [ebp+n] ; another copy of the base because xmm0 will be overwritten by the result
loop: mulss xmm0, xmm1 ; scalar mult from sse
dec ecx ; counter--
cmp ecx,0 ; check if counter is 0 to end loop
jnz loop ;
;
; let's store the result in eax by moving it to the stack and then copying to the registry (we use the other local var as support)
;
movss [ebp-4], xmm0
mov eax, [ebp-4]
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 24 ; clean the stack from both our local and printf's vars
; ------------------------------------------------------------
; Function exit sequence
; ------------------------------------------------------------
pop edi ; restore the registries
pop esi
pop ebx
mov esp, ebp ; restore the Stack Pointer
pop ebp ; restore the Base Pointer
ret ; get back to C program
现在,我希望打印出来
4^2 = 16
但是,相反,我得到了
4^2 = 0
我花了整个下午的时间来解决这个问题,我找不到解决办法,你有什么提示吗?
编辑:
由于格式问题,我尝试使用
转换数据
movss [ebp-4], xmm0
fld dword [ebp-4]
mov eax, dword [ebp-4]
而不是
movss [ebp-4], xmm0
mov eax, [ebp-4]
但我得到了相同的结果。
MOVSS
移动单精度浮点数(32 位)。我假设 n
是一个整数,所以您不能使用 MOVSS
将它加载到 XMM 寄存器中。请改用 CVTSI2SS
。 printf
无法处理单精度浮点数,它会被编译器转换为双精度数。此时用CVTSS2SI
就方便了。所以代码应该是这样的:
...
;
; pow is computed by doing n*n d times
;
cvtsi2ss xmm0, [ebp+n] ; load integer
sub ecx, 1 ; first step (n^1) is done
cvtsi2ss xmm1, [ebp+n] ; load integer
loop:
mulss xmm0, xmm1 ; scalar mult from sse
sub ecx, 1
jnz loop
cvtss2si eax, xmm0 ; result as integer
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 16 ; clean the stack only from printf's vars
...
我正在尝试学习如何使用 SSE
,我决定实现一个计算 n^d
的简单代码,使用一个由 C
程序调用的函数.
这是我的 NASM
代码:
section .data
resmsg: db '%d^%d = %d', 0
section .bss
section .text
extern printf
; ------------------------------------------------------------
; Function called from a c program, I only use n and d parameters but I left the others
; ------------------------------------------------------------
global main
T equ 8
n equ 12
d equ 16
m equ 20
Sid equ 24
Sn equ 28
main:
; ------------------------------------------------------------
; Function enter sequence
; ------------------------------------------------------------
push ebp ; save Base Pointer
mov ebp, esp ; Move Base Point to current frame
sub esp, 8 ; reserve space for two local vars
push ebx ; save some registries (don't know if needed)
push esi
push edi
; ------------------------------------------------------------
; copy function's parameters to registries from stack
; ------------------------------------------------------------
mov eax, [ebp+T] ; T
mov ebx, [ebp+n] ; n
mov ecx, [ebp+d] ; d
mov edx, [ebp+m] ; m
mov esi, [ebp+Sid] ; Sid
mov edi, [ebp+Sn] ; Sn
mov [ebp-8], ecx ; copy ecx into one of the local vars
;
; pow is computed by doing n*n d times
;
movss xmm0, [ebp+n] ; base
movss xmm1, [ebp+n] ; another copy of the base because xmm0 will be overwritten by the result
loop: mulss xmm0, xmm1 ; scalar mult from sse
dec ecx ; counter--
cmp ecx,0 ; check if counter is 0 to end loop
jnz loop ;
;
; let's store the result in eax by moving it to the stack and then copying to the registry (we use the other local var as support)
;
movss [ebp-4], xmm0
mov eax, [ebp-4]
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 24 ; clean the stack from both our local and printf's vars
; ------------------------------------------------------------
; Function exit sequence
; ------------------------------------------------------------
pop edi ; restore the registries
pop esi
pop ebx
mov esp, ebp ; restore the Stack Pointer
pop ebp ; restore the Base Pointer
ret ; get back to C program
现在,我希望打印出来
4^2 = 16
但是,相反,我得到了
4^2 = 0
我花了整个下午的时间来解决这个问题,我找不到解决办法,你有什么提示吗?
编辑:
由于格式问题,我尝试使用
转换数据movss [ebp-4], xmm0
fld dword [ebp-4]
mov eax, dword [ebp-4]
而不是
movss [ebp-4], xmm0
mov eax, [ebp-4]
但我得到了相同的结果。
MOVSS
移动单精度浮点数(32 位)。我假设 n
是一个整数,所以您不能使用 MOVSS
将它加载到 XMM 寄存器中。请改用 CVTSI2SS
。 printf
无法处理单精度浮点数,它会被编译器转换为双精度数。此时用CVTSS2SI
就方便了。所以代码应该是这样的:
...
;
; pow is computed by doing n*n d times
;
cvtsi2ss xmm0, [ebp+n] ; load integer
sub ecx, 1 ; first step (n^1) is done
cvtsi2ss xmm1, [ebp+n] ; load integer
loop:
mulss xmm0, xmm1 ; scalar mult from sse
sub ecx, 1
jnz loop
cvtss2si eax, xmm0 ; result as integer
;
; Print using C's printf
;
push eax ; result
mov ecx, [ebp-8] ; copy the original d back since we used it as loop's counter
push ecx ; exponent
push ebx ; base
push resmsg ; string format
call printf ; printf call
add esp, 16 ; clean the stack only from printf's vars
...