为什么 powerpc 需要这个额外的绒毛来使原始机器代码功能工作?
Why does powerpc need this additional fluff to make a raw machine code function work?
我正在为 PowerPC 开发一个简单的 JIT 编译器,我按照 https://github.com/spencertipping/jit-tutorial 中的示例来了解如何使用它。
问题是第二个例子"jitproto.c"中的身份函数不能真正移植到powerpc,使用"LWA"和"BLR"指令,它只会导致执行时出现段错误。
最后我使用了SLJIT编译器(https://github.com/linux-on-ibm-z/sljit)的机器码输出来查看我做错了什么,
我看到它在我认为的函数之前生成了 12 个指令字。
那么这些指令在做什么?
为什么我不能像在 x86 中那样直接启动函数?
可以在 PPC64 上使用 C99 编译器编译代码(在 powermac 和 power8 服务器中测试过)。
#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);
//instruction stream for identity function, dumped from SLJIT
unsigned int code[] =
{
0x7c0802a6, //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 , //
0xfbc1fff0 , //
0xf8010010 , //
0x3be00000, //
0x7c7e1b78 , //
0xf821ff81, //
0x38210080, //
0xe8010010 , //
0xebc1fff0, //
0xebe1fff8 , //end of unknown instructions
0x7c0803a6 ,
0x4e800020,
0x00000000,
0x00000000,
0x00000000};
fn1 compile_identity(void) {
//allocate exec memory
unsigned int *memory = mmap(NULL, // address
16*sizeof(int), // size
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, // fd (not used here)
0); // offset (not used here)
//copy instructions
for (int i = 0; i <14; ++i){
memory[i] = code[i];
}
//copy start adress to last pointer, else it only works in ppc64le
((unsigned long long*)memory)[7] = (unsigned long long)memory;
return (fn1) memory;
}
int main() {
void * test = compile_identity();
//print stuff to check if its right
printf("Pointer %p\n%p\n",test,((char*)test)[0]);
for (int i = 0; i< 16; ++i){
printf("INS %8x\n",((unsigned int*)test)[i]);
}
//load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
fn1 f = test;
#endif
//test function
printf("%d\n",f(4));
//free exec memory
munmap(test, 16*sizeof(int));
return 0;
}
SLJIT 原始代码的 objdump 输出
asm.bin: file format binary
Disassembly of section .data:
0000000000000000 <.data>:
0: 7c 08 02 a6 lhzu r16,2172(r2)
4: fb e1 ff f8 .long 0xf8ffe1fb
8: fb c1 ff f0 xxsel vs39,vs31,vs56,vs39
c: fb a1 ff e8 .long 0xe8ffa1fb
10: fb 81 ff e0 lq r6,-32272(r31)
14: f8 01 00 10 ps_msub f0,f0,f7,f0
18: 3b e0 00 00 .long 0xe03b
1c: 7c 7e 1b 78 .long 0x781b7e7c
20: 7c 9d 23 78 .long 0x78239d7c
24: 7c bc 2b 78 .long 0x782bbc7c
28: f8 21 ff 71 andi. r31,r15,8696
2c: 7f a3 eb 78 .long 0x78eba37f
30: 38 21 00 90 stw r0,8504(0)
34: e8 01 00 10 vmsumshm v0,v0,v0,v7
38: eb 81 ff e0 lq r6,-32288(r31)
3c: eb a1 ff e8 .long 0xe8ffa1eb
40: eb c1 ff f0 psq_st f7,491(r31),1,4
44: eb e1 ff f8 .long 0xf8ffe1eb
48: 7c 08 03 a6 lhzu r16,2172(r3) #These two instructions should have been enough in x86
4c: 4e 80 00 20 subfic r0,r0,-32690 #
GDB 反汇编程序输出
0x00003ffff7ff9000: mflr r0
0x00003ffff7ff9004: std r31,-8(r1)
0x00003ffff7ff9008: std r30,-16(r1)
0x00003ffff7ff900c: std r0,16(r1)
0x00003ffff7ff9010: li r31,0
0x00003ffff7ff9014: mr r30,r3
0x00003ffff7ff9018: stdu r1,-128(r1)
0x00003ffff7ff901c: addi r1,r1,128
0x00003ffff7ff9020: ld r0,16(r1)
0x00003ffff7ff9024: ld r30,-16(r1)
0x00003ffff7ff9028: ld r31,-8(r1)
0x00003ffff7ff902c: mtlr r0
0x00003ffff7ff9030: blr
设置 PPC64 ABI 的堆栈布局需要这些说明。看这里:
http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK
我正在为 PowerPC 开发一个简单的 JIT 编译器,我按照 https://github.com/spencertipping/jit-tutorial 中的示例来了解如何使用它。
问题是第二个例子"jitproto.c"中的身份函数不能真正移植到powerpc,使用"LWA"和"BLR"指令,它只会导致执行时出现段错误。
最后我使用了SLJIT编译器(https://github.com/linux-on-ibm-z/sljit)的机器码输出来查看我做错了什么, 我看到它在我认为的函数之前生成了 12 个指令字。
那么这些指令在做什么? 为什么我不能像在 x86 中那样直接启动函数?
可以在 PPC64 上使用 C99 编译器编译代码(在 powermac 和 power8 服务器中测试过)。
#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);
//instruction stream for identity function, dumped from SLJIT
unsigned int code[] =
{
0x7c0802a6, //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 , //
0xfbc1fff0 , //
0xf8010010 , //
0x3be00000, //
0x7c7e1b78 , //
0xf821ff81, //
0x38210080, //
0xe8010010 , //
0xebc1fff0, //
0xebe1fff8 , //end of unknown instructions
0x7c0803a6 ,
0x4e800020,
0x00000000,
0x00000000,
0x00000000};
fn1 compile_identity(void) {
//allocate exec memory
unsigned int *memory = mmap(NULL, // address
16*sizeof(int), // size
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, // fd (not used here)
0); // offset (not used here)
//copy instructions
for (int i = 0; i <14; ++i){
memory[i] = code[i];
}
//copy start adress to last pointer, else it only works in ppc64le
((unsigned long long*)memory)[7] = (unsigned long long)memory;
return (fn1) memory;
}
int main() {
void * test = compile_identity();
//print stuff to check if its right
printf("Pointer %p\n%p\n",test,((char*)test)[0]);
for (int i = 0; i< 16; ++i){
printf("INS %8x\n",((unsigned int*)test)[i]);
}
//load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
fn1 f = test;
#endif
//test function
printf("%d\n",f(4));
//free exec memory
munmap(test, 16*sizeof(int));
return 0;
}
SLJIT 原始代码的 objdump 输出
asm.bin: file format binary
Disassembly of section .data:
0000000000000000 <.data>:
0: 7c 08 02 a6 lhzu r16,2172(r2)
4: fb e1 ff f8 .long 0xf8ffe1fb
8: fb c1 ff f0 xxsel vs39,vs31,vs56,vs39
c: fb a1 ff e8 .long 0xe8ffa1fb
10: fb 81 ff e0 lq r6,-32272(r31)
14: f8 01 00 10 ps_msub f0,f0,f7,f0
18: 3b e0 00 00 .long 0xe03b
1c: 7c 7e 1b 78 .long 0x781b7e7c
20: 7c 9d 23 78 .long 0x78239d7c
24: 7c bc 2b 78 .long 0x782bbc7c
28: f8 21 ff 71 andi. r31,r15,8696
2c: 7f a3 eb 78 .long 0x78eba37f
30: 38 21 00 90 stw r0,8504(0)
34: e8 01 00 10 vmsumshm v0,v0,v0,v7
38: eb 81 ff e0 lq r6,-32288(r31)
3c: eb a1 ff e8 .long 0xe8ffa1eb
40: eb c1 ff f0 psq_st f7,491(r31),1,4
44: eb e1 ff f8 .long 0xf8ffe1eb
48: 7c 08 03 a6 lhzu r16,2172(r3) #These two instructions should have been enough in x86
4c: 4e 80 00 20 subfic r0,r0,-32690 #
GDB 反汇编程序输出
0x00003ffff7ff9000: mflr r0
0x00003ffff7ff9004: std r31,-8(r1)
0x00003ffff7ff9008: std r30,-16(r1)
0x00003ffff7ff900c: std r0,16(r1)
0x00003ffff7ff9010: li r31,0
0x00003ffff7ff9014: mr r30,r3
0x00003ffff7ff9018: stdu r1,-128(r1)
0x00003ffff7ff901c: addi r1,r1,128
0x00003ffff7ff9020: ld r0,16(r1)
0x00003ffff7ff9024: ld r30,-16(r1)
0x00003ffff7ff9028: ld r31,-8(r1)
0x00003ffff7ff902c: mtlr r0
0x00003ffff7ff9030: blr
设置 PPC64 ABI 的堆栈布局需要这些说明。看这里: http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK