为什么 powerpc 需要这个额外的绒毛来使原始机器代码功能工作?

Why does powerpc need this additional fluff to make a raw machine code function work?

我正在为 PowerPC 开发一个简单的 JIT 编译器,我按照 https://github.com/spencertipping/jit-tutorial 中的示例来了解如何使用它。

问题是第二个例子"jitproto.c"中的身份函数不能真正移植到powerpc,使用"LWA"和"BLR"指令,它只会导致执行时出现段错误。

最后我使用了SLJIT编译器(https://github.com/linux-on-ibm-z/sljit)的机器码输出来查看我做错了什么, 我看到它在我认为的函数之前生成了 12 个指令字。

那么这些指令在做什么? 为什么我不能像在 x86 中那样直接启动函数?

可以在 PPC64 上使用 C99 编译器编译代码(在 powermac 和 power8 服务器中测试过)。

#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);

//instruction stream for identity function, dumped from SLJIT
unsigned int code[] = 
{
0x7c0802a6,   //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 ,  //
0xfbc1fff0 ,  //
0xf8010010 ,  //

0x3be00000,   //
0x7c7e1b78 ,  //
0xf821ff81,   //
0x38210080,   //

0xe8010010 ,  //
0xebc1fff0,   //
0xebe1fff8 ,  //end of unknown instructions
0x7c0803a6 ,  

0x4e800020,   
0x00000000,   
0x00000000,   
0x00000000};  


fn1 compile_identity(void) {
  //allocate exec memory
  unsigned int *memory = mmap(NULL,             // address
                      16*sizeof(int),             // size
                      PROT_READ | PROT_WRITE | PROT_EXEC,
                      MAP_PRIVATE | MAP_ANONYMOUS,
                      -1,               // fd (not used here)
                      0);               // offset (not used here)
  //copy instructions
  for (int i = 0; i <14; ++i){
    memory[i] = code[i];
  }
  //copy start adress to last pointer, else it only works in ppc64le
  ((unsigned long long*)memory)[7] = (unsigned long long)memory;

  return (fn1) memory;
}

int main() {
  void * test = compile_identity();
  //print stuff to check if its right
  printf("Pointer %p\n%p\n",test,((char*)test)[0]);
  for (int i = 0; i< 16; ++i){
    printf("INS %8x\n",((unsigned int*)test)[i]);
  }
  //load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
  fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
  fn1 f = test;
#endif
  //test function
  printf("%d\n",f(4));
  //free exec memory
  munmap(test, 16*sizeof(int));
  return 0;
}

SLJIT 原始代码的 objdump 输出

asm.bin:     file format binary


Disassembly of section .data:

0000000000000000 <.data>:
   0:   7c 08 02 a6     lhzu    r16,2172(r2)
   4:   fb e1 ff f8     .long 0xf8ffe1fb
   8:   fb c1 ff f0     xxsel   vs39,vs31,vs56,vs39
   c:   fb a1 ff e8     .long 0xe8ffa1fb
  10:   fb 81 ff e0     lq      r6,-32272(r31)
  14:   f8 01 00 10     ps_msub f0,f0,f7,f0
  18:   3b e0 00 00     .long 0xe03b
  1c:   7c 7e 1b 78     .long 0x781b7e7c
  20:   7c 9d 23 78     .long 0x78239d7c
  24:   7c bc 2b 78     .long 0x782bbc7c
  28:   f8 21 ff 71     andi.   r31,r15,8696
  2c:   7f a3 eb 78     .long 0x78eba37f
  30:   38 21 00 90     stw     r0,8504(0)
  34:   e8 01 00 10     vmsumshm v0,v0,v0,v7
  38:   eb 81 ff e0     lq      r6,-32288(r31)
  3c:   eb a1 ff e8     .long 0xe8ffa1eb
  40:   eb c1 ff f0     psq_st  f7,491(r31),1,4
  44:   eb e1 ff f8     .long 0xf8ffe1eb
  48:   7c 08 03 a6     lhzu    r16,2172(r3)  #These two instructions should have been enough in x86
  4c:   4e 80 00 20     subfic  r0,r0,-32690  #

GDB 反汇编程序输出

   0x00003ffff7ff9000:  mflr    r0
   0x00003ffff7ff9004:  std     r31,-8(r1)
   0x00003ffff7ff9008:  std     r30,-16(r1)
   0x00003ffff7ff900c:  std     r0,16(r1)
   0x00003ffff7ff9010:  li      r31,0
   0x00003ffff7ff9014:  mr      r30,r3
   0x00003ffff7ff9018:  stdu    r1,-128(r1)
   0x00003ffff7ff901c:  addi    r1,r1,128
   0x00003ffff7ff9020:  ld      r0,16(r1)
   0x00003ffff7ff9024:  ld      r30,-16(r1)
   0x00003ffff7ff9028:  ld      r31,-8(r1)
   0x00003ffff7ff902c:  mtlr    r0
   0x00003ffff7ff9030:  blr

设置 PPC64 ABI 的堆栈布局需要这些说明。看这里: http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK