如何简化引用自身作为参数的函数? (这是什么意思)
How to simplify a function referencing itself as argument ? (and what does this mean)
我经常遇到一个函数调用自己(在IDA生成的伪代码中),例如:
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
- 既然反汇编是
call qword ptr [rax]
,我把c伪代码翻译成result = lambda();
不就可以了吗?
- 为什么在伪代码中函数将自身作为参数?
- 当出现
lambda + 8i64
(即 call qword ptr [rax+8]
)这样的调用时会发生什么情况?
这里有更完整的上下文:
__int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
{
volatile signed __int32 *lambda; // rdi
__int64 result; // rax
lambda = (volatile signed __int32 *)a1[1];
if ( lambda )
{
result = (unsigned int)_InterlockedExchangeAdd(lambda + 2, 0xFFFFFFFF);
if ( (_DWORD)result == 1 )
{
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
if ( _InterlockedExchangeAdd(lambda + 3, 0xFFFFFFFF) == 1 )
result = (*(__int64 (__fastcall **)(volatile signed __int32 *))(*(_QWORD *)lambda + 8i64))(lambda);
a1[1] = positionOrCounter;
*a1 = newPtr;
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
return result;
}
同样来自 IDA 的反汇编:
.text:0000000180005F70 ; __int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
.text:0000000180005F70 CR_maybeParseWithLambda proc near ; CODE XREF: sub_180005B10+10F↑p
.text:0000000180005F70 ; sub_180005B10+14A↑p ...
.text:0000000180005F70
.text:0000000180005F70 arg_0 = qword ptr 8
.text:0000000180005F70 arg_8 = qword ptr 10h
.text:0000000180005F70 arg_10 = qword ptr 18h
.text:0000000180005F70 arg_18 = qword ptr 20h
.text:0000000180005F70
.text:0000000180005F70 mov [rsp+arg_8], rbx
.text:0000000180005F75 mov [rsp+arg_10], rbp
.text:0000000180005F7A mov [rsp+arg_18], rsi
.text:0000000180005F7F push rdi
.text:0000000180005F80 sub rsp, 20h
.text:0000000180005F84 mov rdi, [rcx+8]
.text:0000000180005F88 mov rsi, r8
.text:0000000180005F8B mov rbp, rdx
.text:0000000180005F8E mov rbx, rcx
.text:0000000180005F91 test rdi, rdi
.text:0000000180005F94 jz short loc_180005FF3
.text:0000000180005F96
.text:0000000180005F96 loc_180005F96: ; DATA XREF: .rdata:0000000180401E74↓o
.text:0000000180005F96 ; .rdata:0000000180401E84↓o ...
.text:0000000180005F96 mov [rsp+28h+arg_0], r14
.text:0000000180005F9B or r14d, 0FFFFFFFFh
.text:0000000180005F9F mov eax, r14d
.text:0000000180005FA2 lock xadd [rdi+8], eax
.text:0000000180005FA7 cmp eax, 1
.text:0000000180005FAA jnz short loc_180005FEA
.text:0000000180005FAC mov rax, [rdi]
.text:0000000180005FAF mov rcx, rdi
.text:0000000180005FB2 call qword ptr [rax]
.text:0000000180005FB4 lock xadd [rdi+0Ch], r14d
.text:0000000180005FBA cmp r14d, 1
.text:0000000180005FBE jnz short loc_180005FC9
.text:0000000180005FC0 mov rax, [rdi]
.text:0000000180005FC3 mov rcx, rdi
.text:0000000180005FC6 call qword ptr [rax+8]
.text:0000000180005FC9
.text:0000000180005FC9 loc_180005FC9: ; CODE XREF: CR_maybeParseWithLambda+4E↑j
.text:0000000180005FC9 mov [rbx+8], rsi
.text:0000000180005FCD mov [rbx], rbp
.text:0000000180005FD0
.text:0000000180005FD0 loc_180005FD0: ; CODE XREF: CR_maybeParseWithLambda+81↓j
.text:0000000180005FD0 mov r14, [rsp+28h+arg_0]
.text:0000000180005FD5
.text:0000000180005FD5 loc_180005FD5: ; CODE XREF: CR_maybeParseWithLambda+8A↓j
.text:0000000180005FD5 ; DATA XREF: .pdata:0000000180483888↓o ...
.text:0000000180005FD5 mov rbx, [rsp+28h+arg_8]
.text:0000000180005FDA mov rbp, [rsp+28h+arg_10]
.text:0000000180005FDF mov rsi, [rsp+28h+arg_18]
.text:0000000180005FE4 add rsp, 20h
.text:0000000180005FE8 pop rdi
.text:0000000180005FE9 retn
- Since the disassembly is
call qword ptr [rax]
, can't this be simplified into result = lambda();
if I translate the pseudo code in c ?
没有。反编译器检测到传入的变量可能是被调用函数的参数。
例如,void f()
和 void f(int)
函数都使用单个 call
汇编命令调用,除了在后一种情况下,调用者移动 int
在调用函数之前将值写入适当的寄存器。
您可以更改 lambda
的类型来避免这种情况。
- Why in the pseudo code the function is having itself as argument ?
仔细阅读汇编代码和反编译代码非常。 lambda
不是函数指针,要从中获取函数指针,必须取消引用 两次 。所以它可能是这样的(伪C++代码)
using FunctionType=int(int);
struct B{
FunctionType* functionPointer;
};
struct A{
B* b;
};
A* lambda; // the variable name is a little misleading, given this interpretation.
auto functionPointer=(*(*lambda).b);
functionPointer(lambda);
鉴于双重取消引用,B
很可能实际上是一个 vftable(尽管在这些情况下,该函数通常使用 __thiscall
约定调用)——所以代码可以这样写:
struct Base{
virtual void someFunction(){}
virtual void otherFunction(){}
};
struct Base_vftableType{ // compiler-generated
void (*someFunction)(Base*); // explicit (this) argument shown
void (*otherFunction)(Base*); // explicit (this) argument shown
};
struct Derived: Base{
Base_vftableType *vftable; // compiler-generated
void someFunction(){ /* ... */ }
};
Base_vftableType derived_vftable{ /* ... */ }; // compiler-generated vftable
Derived *a;
// the function call is something like this in pseudo-C
// (and probably how it will be displayed in IDA):
a->vftable->someFunction(a);
- What is happening when there is a call such as
lambda + 8i64
(ie., call qword ptr [rax+8]
) ?
同样,vftable中可以有多个函数,而+
只是获取其他函数的地址。
假设 64 位函数指针,+8
将是 table 中的第二个函数。
另请参阅:c++ - How to organize vtables in IDA Pro? - Reverse Engineering Stack Exchange
我经常遇到一个函数调用自己(在IDA生成的伪代码中),例如:
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
- 既然反汇编是
call qword ptr [rax]
,我把c伪代码翻译成result = lambda();
不就可以了吗? - 为什么在伪代码中函数将自身作为参数?
- 当出现
lambda + 8i64
(即call qword ptr [rax+8]
)这样的调用时会发生什么情况?
这里有更完整的上下文:
__int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
{
volatile signed __int32 *lambda; // rdi
__int64 result; // rax
lambda = (volatile signed __int32 *)a1[1];
if ( lambda )
{
result = (unsigned int)_InterlockedExchangeAdd(lambda + 2, 0xFFFFFFFF);
if ( (_DWORD)result == 1 )
{
result = (**(__int64 (__fastcall ***)(volatile signed __int32 *))lambda)(lambda);
if ( _InterlockedExchangeAdd(lambda + 3, 0xFFFFFFFF) == 1 )
result = (*(__int64 (__fastcall **)(volatile signed __int32 *))(*(_QWORD *)lambda + 8i64))(lambda);
a1[1] = positionOrCounter;
*a1 = newPtr;
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
}
else
{
a1[1] = positionOrCounter;
*a1 = newPtr;
}
return result;
}
同样来自 IDA 的反汇编:
.text:0000000180005F70 ; __int64 __fastcall CR_maybeParseWithLambda(_QWORD *a1, __int64 newPtr, __int64 positionOrCounter)
.text:0000000180005F70 CR_maybeParseWithLambda proc near ; CODE XREF: sub_180005B10+10F↑p
.text:0000000180005F70 ; sub_180005B10+14A↑p ...
.text:0000000180005F70
.text:0000000180005F70 arg_0 = qword ptr 8
.text:0000000180005F70 arg_8 = qword ptr 10h
.text:0000000180005F70 arg_10 = qword ptr 18h
.text:0000000180005F70 arg_18 = qword ptr 20h
.text:0000000180005F70
.text:0000000180005F70 mov [rsp+arg_8], rbx
.text:0000000180005F75 mov [rsp+arg_10], rbp
.text:0000000180005F7A mov [rsp+arg_18], rsi
.text:0000000180005F7F push rdi
.text:0000000180005F80 sub rsp, 20h
.text:0000000180005F84 mov rdi, [rcx+8]
.text:0000000180005F88 mov rsi, r8
.text:0000000180005F8B mov rbp, rdx
.text:0000000180005F8E mov rbx, rcx
.text:0000000180005F91 test rdi, rdi
.text:0000000180005F94 jz short loc_180005FF3
.text:0000000180005F96
.text:0000000180005F96 loc_180005F96: ; DATA XREF: .rdata:0000000180401E74↓o
.text:0000000180005F96 ; .rdata:0000000180401E84↓o ...
.text:0000000180005F96 mov [rsp+28h+arg_0], r14
.text:0000000180005F9B or r14d, 0FFFFFFFFh
.text:0000000180005F9F mov eax, r14d
.text:0000000180005FA2 lock xadd [rdi+8], eax
.text:0000000180005FA7 cmp eax, 1
.text:0000000180005FAA jnz short loc_180005FEA
.text:0000000180005FAC mov rax, [rdi]
.text:0000000180005FAF mov rcx, rdi
.text:0000000180005FB2 call qword ptr [rax]
.text:0000000180005FB4 lock xadd [rdi+0Ch], r14d
.text:0000000180005FBA cmp r14d, 1
.text:0000000180005FBE jnz short loc_180005FC9
.text:0000000180005FC0 mov rax, [rdi]
.text:0000000180005FC3 mov rcx, rdi
.text:0000000180005FC6 call qword ptr [rax+8]
.text:0000000180005FC9
.text:0000000180005FC9 loc_180005FC9: ; CODE XREF: CR_maybeParseWithLambda+4E↑j
.text:0000000180005FC9 mov [rbx+8], rsi
.text:0000000180005FCD mov [rbx], rbp
.text:0000000180005FD0
.text:0000000180005FD0 loc_180005FD0: ; CODE XREF: CR_maybeParseWithLambda+81↓j
.text:0000000180005FD0 mov r14, [rsp+28h+arg_0]
.text:0000000180005FD5
.text:0000000180005FD5 loc_180005FD5: ; CODE XREF: CR_maybeParseWithLambda+8A↓j
.text:0000000180005FD5 ; DATA XREF: .pdata:0000000180483888↓o ...
.text:0000000180005FD5 mov rbx, [rsp+28h+arg_8]
.text:0000000180005FDA mov rbp, [rsp+28h+arg_10]
.text:0000000180005FDF mov rsi, [rsp+28h+arg_18]
.text:0000000180005FE4 add rsp, 20h
.text:0000000180005FE8 pop rdi
.text:0000000180005FE9 retn
- Since the disassembly is
call qword ptr [rax]
, can't this be simplified intoresult = lambda();
if I translate the pseudo code in c ?
没有。反编译器检测到传入的变量可能是被调用函数的参数。
例如,void f()
和 void f(int)
函数都使用单个 call
汇编命令调用,除了在后一种情况下,调用者移动 int
在调用函数之前将值写入适当的寄存器。
您可以更改 lambda
的类型来避免这种情况。
- Why in the pseudo code the function is having itself as argument ?
仔细阅读汇编代码和反编译代码非常。 lambda
不是函数指针,要从中获取函数指针,必须取消引用 两次 。所以它可能是这样的(伪C++代码)
using FunctionType=int(int);
struct B{
FunctionType* functionPointer;
};
struct A{
B* b;
};
A* lambda; // the variable name is a little misleading, given this interpretation.
auto functionPointer=(*(*lambda).b);
functionPointer(lambda);
鉴于双重取消引用,B
很可能实际上是一个 vftable(尽管在这些情况下,该函数通常使用 __thiscall
约定调用)——所以代码可以这样写:
struct Base{
virtual void someFunction(){}
virtual void otherFunction(){}
};
struct Base_vftableType{ // compiler-generated
void (*someFunction)(Base*); // explicit (this) argument shown
void (*otherFunction)(Base*); // explicit (this) argument shown
};
struct Derived: Base{
Base_vftableType *vftable; // compiler-generated
void someFunction(){ /* ... */ }
};
Base_vftableType derived_vftable{ /* ... */ }; // compiler-generated vftable
Derived *a;
// the function call is something like this in pseudo-C
// (and probably how it will be displayed in IDA):
a->vftable->someFunction(a);
- What is happening when there is a call such as
lambda + 8i64
(ie.,call qword ptr [rax+8]
) ?
同样,vftable中可以有多个函数,而+
只是获取其他函数的地址。
假设 64 位函数指针,+8
将是 table 中的第二个函数。
另请参阅:c++ - How to organize vtables in IDA Pro? - Reverse Engineering Stack Exchange