IDA pro 了解反汇编细节
IDA pro understanding disassembly specifics
目前正在尝试将旧游戏的字符串到内存复制功能从 x86 机器代码反汇编为 C++。
从下面的函数中,我看到代码检查字符串是否大于为赋值保留的目标 space。
如果不是,则进入直接复制string的ELSE
如果是,则它从 string[*(this_dest-1) - *this_dest]
位置剪切字符串。
我想知道如何使伪代码看起来更像 C++?
到目前为止,我找不到 *(this_dest-1) - *this_dest
.
的解决方案
我只是用它来学习如何更好地反汇编,欢迎任何其他提示!
char *__thiscall copyA2strToA1mem(char *this, const char *strStart_a2, char *strEnd_a3)
{
void *dest_v4; // edi
size_t strLength_v5; // eax
size_t destLength_v6; // ecx
const void *v7; // edi
void *v8; // esi
char *i; // esi
dest_v4 = *this;
strLength_v5 = strEnd_a3 - strStart_a2;
destLength_v6 = *(this + 1) - *this;
if ( strEnd_a3 - strStart_a2 > destLength_v6 )// destination space > string size ?
{
qmemcpy(dest_v4, strStart_a2, destLength_v6);
for ( i = &strStart_a2[*(this + 1) - *this]; i != strEnd_a3; ++i )
sub_401D20(this, *i);
}
else
{
qmemcpy(dest_v4, strStart_a2, strLength_v5);
v7 = *(this + 1);
v8 = (strLength_v5 + *this);
if ( v8 != v7 )
{
memmove(v8, v7, 1u);
*(this + 1) += v8 - v7;
return this;
}
}
return this;
这里是汇编代码:
00406660 copyA2strToA1mem proc near ; CODE XREF: sub_406430+1AB↑p
00406660 ; sub_409130+1CC↓p ...
00406660
00406660 strStart_a2 = dword ptr 4
00406660 strEnd_a3 = dword ptr 8
00406660
00406660 000 mov edx, [esp+strStart_a2]
00406664 000 push ebx
00406665 004 push ebp
00406666 008 mov ebp, [esp+8+strEnd_a3]
0040666A 008 push esi
0040666B 00C mov ebx, ecx
0040666D 00C mov ecx, [ebx+4]
00406670 00C push edi
00406671 010 mov edi, [ebx]
00406673 010 mov eax, ebp
00406675 010 sub eax, edx
00406677 010 sub ecx, edi
00406679 010 cmp eax, ecx
0040667B 010 mov esi, edx
0040667D 010 ja short loc_4066BA
0040667F 010 mov ecx, eax
00406681 010 mov edx, ecx
00406683 010 shr ecx, 2
00406686 010 rep movsd
00406688 010 mov ecx, edx
0040668A 010 and ecx, 3
0040668D 010 rep movsb
0040668F 010 mov esi, [ebx]
00406691 010 mov edi, [ebx+4]
00406694 010 add esi, eax
00406696 010 cmp esi, edi
00406698 010 jz short loc_4066E4
0040669A 010 mov eax, edi
0040669C 010 sub eax, edi
0040669E 010 add eax, 1
004066A1 010 push eax ; Size
004066A2 014 push edi ; Src
004066A3 018 push esi ; Dst
004066A4 01C call memmove
004066A9 01C add esp, 0Ch
004066AC 010 sub esi, edi
004066AE 010 add [ebx+4], esi
004066B1 010 pop edi
004066B2 00C pop esi
004066B3 008 pop ebp
004066B4 004 mov eax, ebx
004066B6 004 pop ebx
004066B7 000 retn 8
004066BA ; ---------------------------------------------------------------------------
004066BA
004066BA loc_4066BA: ; CODE XREF: copyA2strToA1mem+1D↑j
004066BA 010 mov eax, ecx
004066BC 010 shr ecx, 2
004066BF 010 rep movsd
004066C1 010 mov ecx, eax
004066C3 010 and ecx, 3
004066C6 010 rep movsb
004066C8 010 mov esi, [ebx+4]
004066CB 010 sub esi, [ebx]
004066CD 010 add esi, edx
004066CF 010 cmp esi, ebp
004066D1 010 jz short loc_4066E4
004066D3
004066D3 loc_4066D3: ; CODE XREF: copyA2strToA1mem+82↓j
004066D3 010 mov cl, [esi]
004066D5 010 push ecx
004066D6 014 mov ecx, ebx
004066D8 014 call sub_401D20
004066DD 010 add esi, 1
004066E0 010 cmp esi, ebp
004066E2 010 jnz short loc_4066D3
004066E4
004066E4 loc_4066E4: ; CODE XREF: copyA2strToA1mem+38↑j
004066E4 ; copyA2strToA1mem+71↑j
004066E4 010 pop edi
004066E5 00C pop esi
004066E6 008 pop ebp
004066E7 004 mov eax, ebx
004066E9 004 pop ebx
004066EA 000 retn 8
004066EA copyA2strToA1mem endp
ebx
被设置为 ecx
的初始值,如果调用约定确实是 __thiscall
.
,则它是 this
指针
[ebx]
,即[ebx+0]
,指的是this
.
中位于字节偏移0处的数据成员
[ebx+4]
指的是this
.
中位于字节偏移量4处的数据成员
这两个数据成员的使用方式,我们可以假设它们是 32 位指针,并且它们相互关联,因此它们很可能是缓冲区的开始和结束指针。
据我所知,如果我正确翻译了程序集,C++ 代码将类似于以下内容:
SomeClass* SomeClass::copyA2strToA1mem(char *strStart, char *strEnd)
{
int strLen = strEnd - strStart;
int bufferLen = this->bufferEnd - this->bufferStart;
if (strLen <= bufferLen)
{
qmemcpy(this->bufferStart, strStart, strLen);
char *src = this->bufferEnd;
char *dst = this->bufferStart + strLen;
if (dst != src)
{
memmove(dst, src, 1);
this->bufferEnd += (dst - src);
}
}
else
{
qmemcpy(this->bufferStart, strStart, bufferLen);
strStart += bufferLen;
while (strStart != strEnd)
{
this->sub_401D20(*strStart++);
}
}
return this;
}
memmove()
有点棘手。 我认为代码正在检查复制字符串的末尾和缓冲区末尾之间是否有间隙,如果有,那么它实际上是从字符串的末尾移动 1 个字节buffer 到复制字符串的末尾,然后设置 bufferEnd
指向该字节。为什么呢,谁知道呢
目前正在尝试将旧游戏的字符串到内存复制功能从 x86 机器代码反汇编为 C++。
从下面的函数中,我看到代码检查字符串是否大于为赋值保留的目标 space。
如果不是,则进入直接复制string的ELSE
如果是,则它从 string[*(this_dest-1) - *this_dest]
位置剪切字符串。
我想知道如何使伪代码看起来更像 C++?
到目前为止,我找不到 *(this_dest-1) - *this_dest
.
我只是用它来学习如何更好地反汇编,欢迎任何其他提示!
char *__thiscall copyA2strToA1mem(char *this, const char *strStart_a2, char *strEnd_a3)
{
void *dest_v4; // edi
size_t strLength_v5; // eax
size_t destLength_v6; // ecx
const void *v7; // edi
void *v8; // esi
char *i; // esi
dest_v4 = *this;
strLength_v5 = strEnd_a3 - strStart_a2;
destLength_v6 = *(this + 1) - *this;
if ( strEnd_a3 - strStart_a2 > destLength_v6 )// destination space > string size ?
{
qmemcpy(dest_v4, strStart_a2, destLength_v6);
for ( i = &strStart_a2[*(this + 1) - *this]; i != strEnd_a3; ++i )
sub_401D20(this, *i);
}
else
{
qmemcpy(dest_v4, strStart_a2, strLength_v5);
v7 = *(this + 1);
v8 = (strLength_v5 + *this);
if ( v8 != v7 )
{
memmove(v8, v7, 1u);
*(this + 1) += v8 - v7;
return this;
}
}
return this;
这里是汇编代码:
00406660 copyA2strToA1mem proc near ; CODE XREF: sub_406430+1AB↑p
00406660 ; sub_409130+1CC↓p ...
00406660
00406660 strStart_a2 = dword ptr 4
00406660 strEnd_a3 = dword ptr 8
00406660
00406660 000 mov edx, [esp+strStart_a2]
00406664 000 push ebx
00406665 004 push ebp
00406666 008 mov ebp, [esp+8+strEnd_a3]
0040666A 008 push esi
0040666B 00C mov ebx, ecx
0040666D 00C mov ecx, [ebx+4]
00406670 00C push edi
00406671 010 mov edi, [ebx]
00406673 010 mov eax, ebp
00406675 010 sub eax, edx
00406677 010 sub ecx, edi
00406679 010 cmp eax, ecx
0040667B 010 mov esi, edx
0040667D 010 ja short loc_4066BA
0040667F 010 mov ecx, eax
00406681 010 mov edx, ecx
00406683 010 shr ecx, 2
00406686 010 rep movsd
00406688 010 mov ecx, edx
0040668A 010 and ecx, 3
0040668D 010 rep movsb
0040668F 010 mov esi, [ebx]
00406691 010 mov edi, [ebx+4]
00406694 010 add esi, eax
00406696 010 cmp esi, edi
00406698 010 jz short loc_4066E4
0040669A 010 mov eax, edi
0040669C 010 sub eax, edi
0040669E 010 add eax, 1
004066A1 010 push eax ; Size
004066A2 014 push edi ; Src
004066A3 018 push esi ; Dst
004066A4 01C call memmove
004066A9 01C add esp, 0Ch
004066AC 010 sub esi, edi
004066AE 010 add [ebx+4], esi
004066B1 010 pop edi
004066B2 00C pop esi
004066B3 008 pop ebp
004066B4 004 mov eax, ebx
004066B6 004 pop ebx
004066B7 000 retn 8
004066BA ; ---------------------------------------------------------------------------
004066BA
004066BA loc_4066BA: ; CODE XREF: copyA2strToA1mem+1D↑j
004066BA 010 mov eax, ecx
004066BC 010 shr ecx, 2
004066BF 010 rep movsd
004066C1 010 mov ecx, eax
004066C3 010 and ecx, 3
004066C6 010 rep movsb
004066C8 010 mov esi, [ebx+4]
004066CB 010 sub esi, [ebx]
004066CD 010 add esi, edx
004066CF 010 cmp esi, ebp
004066D1 010 jz short loc_4066E4
004066D3
004066D3 loc_4066D3: ; CODE XREF: copyA2strToA1mem+82↓j
004066D3 010 mov cl, [esi]
004066D5 010 push ecx
004066D6 014 mov ecx, ebx
004066D8 014 call sub_401D20
004066DD 010 add esi, 1
004066E0 010 cmp esi, ebp
004066E2 010 jnz short loc_4066D3
004066E4
004066E4 loc_4066E4: ; CODE XREF: copyA2strToA1mem+38↑j
004066E4 ; copyA2strToA1mem+71↑j
004066E4 010 pop edi
004066E5 00C pop esi
004066E6 008 pop ebp
004066E7 004 mov eax, ebx
004066E9 004 pop ebx
004066EA 000 retn 8
004066EA copyA2strToA1mem endp
ebx
被设置为 ecx
的初始值,如果调用约定确实是 __thiscall
.
this
指针
[ebx]
,即[ebx+0]
,指的是this
.
[ebx+4]
指的是this
.
这两个数据成员的使用方式,我们可以假设它们是 32 位指针,并且它们相互关联,因此它们很可能是缓冲区的开始和结束指针。
据我所知,如果我正确翻译了程序集,C++ 代码将类似于以下内容:
SomeClass* SomeClass::copyA2strToA1mem(char *strStart, char *strEnd)
{
int strLen = strEnd - strStart;
int bufferLen = this->bufferEnd - this->bufferStart;
if (strLen <= bufferLen)
{
qmemcpy(this->bufferStart, strStart, strLen);
char *src = this->bufferEnd;
char *dst = this->bufferStart + strLen;
if (dst != src)
{
memmove(dst, src, 1);
this->bufferEnd += (dst - src);
}
}
else
{
qmemcpy(this->bufferStart, strStart, bufferLen);
strStart += bufferLen;
while (strStart != strEnd)
{
this->sub_401D20(*strStart++);
}
}
return this;
}
memmove()
有点棘手。 我认为代码正在检查复制字符串的末尾和缓冲区末尾之间是否有间隙,如果有,那么它实际上是从字符串的末尾移动 1 个字节buffer 到复制字符串的末尾,然后设置 bufferEnd
指向该字节。为什么呢,谁知道呢