capstone 反汇编程序 python returns 每个偏移量上的相同指令

capstone disassembler python returns same instructions on every offset

我正在尝试使用具有 python 绑定的 capstone 对 PE 文件进行反汇编。

import pefile
from capstone import *

exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)

# find text section
offset = False
for section in pe.sections:
    if section.Name == b'.text\x00\x00\x00':
        offset = section.VirtualAddress
        break

with open(exe_file, 'rb') as f:
    code = f.read()

# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
    for i in md.disasm(code, offset):
        print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))

但它在每个偏移处都返回相同的 ASM 指令。

0x1000: dec ebp
0x1001: pop edx
0x1002: nop 
0x1003: add byte ptr [ebx], al
0x1005: add byte ptr [eax], al
0x1007: add byte ptr [eax + eax], al
0x100a: add byte ptr [eax], al
for i in md.disasm(code, 0x2000):
        print('0x%x:\t%s\t%s' %(i.address, i.mnemonic, i.op_str))
0x2000: dec ebp
0x2001: pop edx
0x2002: nop 
0x2003: add byte ptr [ebx], al
0x2005: add byte ptr [eax], al
0x2007: add byte ptr [eax + eax], al
0x200a: add byte ptr [eax], al

如果我继续循环,我会得到源源不断的相同输出。

from typing import Iterable, Any, Tuple

def signal_last(it: Iterable[Any]) -> Iterable[Tuple[bool, Any]]:
    iterable = iter(it)
    ret_var = next(iterable)
    for value in iterable:
        yield False, ret_var
        ret_var = value
    yield True, ret_var

offset = 0x1000
while True:
    for last, i in signal_last(md.disasm(code, offset)):
        print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
        if last:
            offset = i.address + 1
0xbc113:    dec ebp
0xbc114:    pop edx
0xbc115:    nop 
0xbc116:    add byte ptr [ebx], al
0xbc118:    add byte ptr [eax], al
0xbc11a:    add byte ptr [eax + eax], al
0xbc11d:    add byte ptr [eax], al
0xbc11e:    dec ebp
0xbc11f:    pop edx
0xbc120:    nop 
0xbc121:    add byte ptr [ebx], al
0xbc123:    add byte ptr [eax], al
0xbc125:    add byte ptr [eax + eax], al
0xbc128:    add byte ptr [eax], al
0xbc129:    dec ebp
0xbc12a:    pop edx
0xbc12b:    nop 
0xbc12c:    add byte ptr [ebx], al
0xbc12e:    add byte ptr [eax], al
0xbc130:    add byte ptr [eax + eax], al
0xbc133:    add byte ptr [eax], al
0xbc134:    dec ebp
0xbc135:    pop edx
0xbc136:    nop 
0xbc137:    add byte ptr [ebx], al
0xbc139:    add byte ptr [eax], al
0xbc13b:    add byte ptr [eax + eax], al
0xbc13e:    add byte ptr [eax], al
0xbc13f:    dec ebp
0xbc140:    pop edx
0xbc141:    nop 
0xbc142:    add byte ptr [ebx], al
0xbc144:    add byte ptr [eax], al
0xbc146:    add byte ptr [eax + eax], al
0xbc149:    add byte ptr [eax], al
0xbc14a:    dec ebp
0xbc14b:    pop edx
0xbc14c:    nop 
0xbc14d:    add byte ptr [ebx], al
0xbc14f:    add byte ptr [eax], al
0xbc151:    add byte ptr [eax + eax], al
0xbc154:    add byte ptr [eax], al
0xbc155:    dec ebp
0xbc156:    pop edx
0xbc157:    nop 
0xbc158:    add byte ptr [ebx], al
0xbc15a:    add byte ptr [eax], al
0xbc15c:    add byte ptr [eax + eax], al
0xbc15f:    add byte ptr [eax], al
0xbc160:    dec ebp
0xbc161:    pop edx
0xbc162:    nop 
0xbc163:    add byte ptr [ebx], al
0xbc165:    add byte ptr [eax], al
0xbc167:    add byte ptr [eax + eax], al
0xbc16a:    add byte ptr [eax], al
0xbc16b:    dec ebp
0xbc16c:    pop edx
0xbc16d:    nop 
0xbc16e:    add byte ptr [ebx], al
0xbc170:    add byte ptr [eax], al
0xbc172:    add byte ptr [eax + eax], al
0xbc175:    add byte ptr [eax], al

有谁知道我做错了什么?因为我认为 Capstone 不应该这样工作。

Disasm 将从 code 开始反汇编。您应该传递与代码部分对应的原始数据,而不是 PE 文件的开头,PE headers 所在的位置:

import pefile
from capstone import *

exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)

# find text section
offset = False
for section in pe.sections:
    if section.Name == b'.text\x00\x00\x00':
        offset = section.VirtualAddress
        codePtr = section.PointerToRawData
        codeEndPtr = codePtr+section.SizeOfRawData
        break

code = pe.get_memory_mapped_image()[codePtr:codeEndPtr]

# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
    for i in md.disasm(code, offset):
        print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))