capstone 反汇编程序 python returns 每个偏移量上的相同指令
capstone disassembler python returns same instructions on every offset
我正在尝试使用具有 python 绑定的 capstone 对 PE 文件进行反汇编。
import pefile
from capstone import *
exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
break
with open(exe_file, 'rb') as f:
code = f.read()
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
但它在每个偏移处都返回相同的 ASM 指令。
0x1000: dec ebp
0x1001: pop edx
0x1002: nop
0x1003: add byte ptr [ebx], al
0x1005: add byte ptr [eax], al
0x1007: add byte ptr [eax + eax], al
0x100a: add byte ptr [eax], al
for i in md.disasm(code, 0x2000):
print('0x%x:\t%s\t%s' %(i.address, i.mnemonic, i.op_str))
0x2000: dec ebp
0x2001: pop edx
0x2002: nop
0x2003: add byte ptr [ebx], al
0x2005: add byte ptr [eax], al
0x2007: add byte ptr [eax + eax], al
0x200a: add byte ptr [eax], al
如果我继续循环,我会得到源源不断的相同输出。
from typing import Iterable, Any, Tuple
def signal_last(it: Iterable[Any]) -> Iterable[Tuple[bool, Any]]:
iterable = iter(it)
ret_var = next(iterable)
for value in iterable:
yield False, ret_var
ret_var = value
yield True, ret_var
offset = 0x1000
while True:
for last, i in signal_last(md.disasm(code, offset)):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
if last:
offset = i.address + 1
0xbc113: dec ebp
0xbc114: pop edx
0xbc115: nop
0xbc116: add byte ptr [ebx], al
0xbc118: add byte ptr [eax], al
0xbc11a: add byte ptr [eax + eax], al
0xbc11d: add byte ptr [eax], al
0xbc11e: dec ebp
0xbc11f: pop edx
0xbc120: nop
0xbc121: add byte ptr [ebx], al
0xbc123: add byte ptr [eax], al
0xbc125: add byte ptr [eax + eax], al
0xbc128: add byte ptr [eax], al
0xbc129: dec ebp
0xbc12a: pop edx
0xbc12b: nop
0xbc12c: add byte ptr [ebx], al
0xbc12e: add byte ptr [eax], al
0xbc130: add byte ptr [eax + eax], al
0xbc133: add byte ptr [eax], al
0xbc134: dec ebp
0xbc135: pop edx
0xbc136: nop
0xbc137: add byte ptr [ebx], al
0xbc139: add byte ptr [eax], al
0xbc13b: add byte ptr [eax + eax], al
0xbc13e: add byte ptr [eax], al
0xbc13f: dec ebp
0xbc140: pop edx
0xbc141: nop
0xbc142: add byte ptr [ebx], al
0xbc144: add byte ptr [eax], al
0xbc146: add byte ptr [eax + eax], al
0xbc149: add byte ptr [eax], al
0xbc14a: dec ebp
0xbc14b: pop edx
0xbc14c: nop
0xbc14d: add byte ptr [ebx], al
0xbc14f: add byte ptr [eax], al
0xbc151: add byte ptr [eax + eax], al
0xbc154: add byte ptr [eax], al
0xbc155: dec ebp
0xbc156: pop edx
0xbc157: nop
0xbc158: add byte ptr [ebx], al
0xbc15a: add byte ptr [eax], al
0xbc15c: add byte ptr [eax + eax], al
0xbc15f: add byte ptr [eax], al
0xbc160: dec ebp
0xbc161: pop edx
0xbc162: nop
0xbc163: add byte ptr [ebx], al
0xbc165: add byte ptr [eax], al
0xbc167: add byte ptr [eax + eax], al
0xbc16a: add byte ptr [eax], al
0xbc16b: dec ebp
0xbc16c: pop edx
0xbc16d: nop
0xbc16e: add byte ptr [ebx], al
0xbc170: add byte ptr [eax], al
0xbc172: add byte ptr [eax + eax], al
0xbc175: add byte ptr [eax], al
有谁知道我做错了什么?因为我认为 Capstone 不应该这样工作。
Disasm 将从 code
开始反汇编。您应该传递与代码部分对应的原始数据,而不是 PE 文件的开头,PE headers 所在的位置:
import pefile
from capstone import *
exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
codePtr = section.PointerToRawData
codeEndPtr = codePtr+section.SizeOfRawData
break
code = pe.get_memory_mapped_image()[codePtr:codeEndPtr]
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
我正在尝试使用具有 python 绑定的 capstone 对 PE 文件进行反汇编。
import pefile
from capstone import *
exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
break
with open(exe_file, 'rb') as f:
code = f.read()
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
但它在每个偏移处都返回相同的 ASM 指令。
0x1000: dec ebp
0x1001: pop edx
0x1002: nop
0x1003: add byte ptr [ebx], al
0x1005: add byte ptr [eax], al
0x1007: add byte ptr [eax + eax], al
0x100a: add byte ptr [eax], al
for i in md.disasm(code, 0x2000):
print('0x%x:\t%s\t%s' %(i.address, i.mnemonic, i.op_str))
0x2000: dec ebp
0x2001: pop edx
0x2002: nop
0x2003: add byte ptr [ebx], al
0x2005: add byte ptr [eax], al
0x2007: add byte ptr [eax + eax], al
0x200a: add byte ptr [eax], al
如果我继续循环,我会得到源源不断的相同输出。
from typing import Iterable, Any, Tuple
def signal_last(it: Iterable[Any]) -> Iterable[Tuple[bool, Any]]:
iterable = iter(it)
ret_var = next(iterable)
for value in iterable:
yield False, ret_var
ret_var = value
yield True, ret_var
offset = 0x1000
while True:
for last, i in signal_last(md.disasm(code, offset)):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
if last:
offset = i.address + 1
0xbc113: dec ebp
0xbc114: pop edx
0xbc115: nop
0xbc116: add byte ptr [ebx], al
0xbc118: add byte ptr [eax], al
0xbc11a: add byte ptr [eax + eax], al
0xbc11d: add byte ptr [eax], al
0xbc11e: dec ebp
0xbc11f: pop edx
0xbc120: nop
0xbc121: add byte ptr [ebx], al
0xbc123: add byte ptr [eax], al
0xbc125: add byte ptr [eax + eax], al
0xbc128: add byte ptr [eax], al
0xbc129: dec ebp
0xbc12a: pop edx
0xbc12b: nop
0xbc12c: add byte ptr [ebx], al
0xbc12e: add byte ptr [eax], al
0xbc130: add byte ptr [eax + eax], al
0xbc133: add byte ptr [eax], al
0xbc134: dec ebp
0xbc135: pop edx
0xbc136: nop
0xbc137: add byte ptr [ebx], al
0xbc139: add byte ptr [eax], al
0xbc13b: add byte ptr [eax + eax], al
0xbc13e: add byte ptr [eax], al
0xbc13f: dec ebp
0xbc140: pop edx
0xbc141: nop
0xbc142: add byte ptr [ebx], al
0xbc144: add byte ptr [eax], al
0xbc146: add byte ptr [eax + eax], al
0xbc149: add byte ptr [eax], al
0xbc14a: dec ebp
0xbc14b: pop edx
0xbc14c: nop
0xbc14d: add byte ptr [ebx], al
0xbc14f: add byte ptr [eax], al
0xbc151: add byte ptr [eax + eax], al
0xbc154: add byte ptr [eax], al
0xbc155: dec ebp
0xbc156: pop edx
0xbc157: nop
0xbc158: add byte ptr [ebx], al
0xbc15a: add byte ptr [eax], al
0xbc15c: add byte ptr [eax + eax], al
0xbc15f: add byte ptr [eax], al
0xbc160: dec ebp
0xbc161: pop edx
0xbc162: nop
0xbc163: add byte ptr [ebx], al
0xbc165: add byte ptr [eax], al
0xbc167: add byte ptr [eax + eax], al
0xbc16a: add byte ptr [eax], al
0xbc16b: dec ebp
0xbc16c: pop edx
0xbc16d: nop
0xbc16e: add byte ptr [ebx], al
0xbc170: add byte ptr [eax], al
0xbc172: add byte ptr [eax + eax], al
0xbc175: add byte ptr [eax], al
有谁知道我做错了什么?因为我认为 Capstone 不应该这样工作。
Disasm 将从 code
开始反汇编。您应该传递与代码部分对应的原始数据,而不是 PE 文件的开头,PE headers 所在的位置:
import pefile
from capstone import *
exe_file = 'C:\Users\Philip\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
codePtr = section.PointerToRawData
codeEndPtr = codePtr+section.SizeOfRawData
break
code = pe.get_memory_mapped_image()[codePtr:codeEndPtr]
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))