在 C++ 中编译虚拟继承 类 时,Clang 如何计算虚拟表中 GEP 指令的索引?
How does Clang compute indices of GEP instructions in virtual tables when compiling virtual inheritance classes in C++?
我试图了解 Clang 在 C++ 中编译虚拟继承的方式 classes。这是我的代码:
// test.cpp
#include <stdio.h>
int global_obj;
int *global_ptr = &global_obj;
class A {
public:
virtual int f(int *i) { return *i; }
};
class B: virtual public A { // class B is virtual inheritance class of A
};
int main(int argc, char **argv)
{
int *ptr = &global_obj;
B *pb = new B;
int a = pb->f(ptr);
return a;
}
我的编译命令是:
clang -O0 -Xclang -disable-llvm-passes -Xclang -disable-O0-optnone -c -emit-llvm test.c
opt -mem2reg test.bc
及以下是编译后的LLVM位码,其中_ZN1BC1Ev
和_ZN1AC2Ev
是classB
和A
.[=23的编译构造函数=]
%class.B = type { %class.A }
%class.A = type { i32 (...)** }
@global_obj = global i32 0, align 4
@global_ptr = global i32* @global_obj, align 8
@_ZTV1B = linkonce_odr unnamed_addr constant { [5 x i8*] } { [5 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8
@_ZTT1B = linkonce_odr unnamed_addr constant [2 x i8*] [i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*), i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*)], align 8
@_ZTV1A = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8
; Function Attrs: noinline norecurse ssp uwtable
define i32 @main(i32, i8**) #0 {
%3 = call i8* @_Znwm(i64 8) #3
%4 = bitcast i8* %3 to %class.B*
call void @_ZN1BC1Ev(%class.B* %4) #4
%5 = bitcast %class.B* %4 to i8**
%6 = load i8*, i8** %5, align 8
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
%12 = bitcast i8* %11 to %class.A*
%13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
%14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
%15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
%16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8
%17 = call i32 %16(%class.A* %12, i32* @global_obj)
ret i32 %17
}
; Function Attrs: nobuiltin
declare noalias i8* @_Znwm(i64) #1
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1BC1Ev(%class.B*) unnamed_addr #2 align 2 {
%2 = bitcast %class.B* %0 to %class.A*
call void @_ZN1AC2Ev(%class.A* %2) #4
%3 = bitcast %class.B* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %3, align 8
%4 = bitcast %class.B* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %4, align 8
ret void
}
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1AC2Ev(%class.A*) unnamed_addr #2 align 2 {
%2 = bitcast %class.A* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %2, align 8
ret void
}
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr i32 @_ZN1A1fEPi(%class.A*, i32*) unnamed_addr #2 align 2 {
%3 = load i32, i32* %1, align 4
ret i32 %3
}
我了解到 Clang 将引入虚拟 table 来捕获 classes A 和 B 的对象。
但是在深入研究main函数的编译时,我不太明白为什么Clang会在main函数中引入一个索引-32
的GEP
。
以及下一个GEP
中索引%9
的值是多少。为什么不能在编译时确定?
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
有谁知道 Clang 为什么这样做?
非常感谢您阅读我很长的问题!
虚拟继承基的位置class 无法在编译时确定,并延迟到运行时。虚拟基址偏移量(vbase 偏移量)位于 vtable 中,因此首先您的代码加载 vtable 指针:
%5 = bitcast %class.B* %4 to i8**
%6 = load i8*, i8** %5, align 8
然后加载 vbase 偏移量(从 vptr - 32
处的预定义位置):
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
这用于计算到基数的偏移量 class:
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
%12 = bitcast i8* %11 to %class.A*
并从基础 class 的 vtable 加载指向虚方法的指针:
%13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
%14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
%15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
%16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8
最后调用它:
%17 = call i32 %16(%class.A* %12, i32* @global_obj)
您可以在 Itanium ABI 中找到有关如何组织 vtable 的更多详细信息(但请注意,胆小者不宜阅读)。
我试图了解 Clang 在 C++ 中编译虚拟继承的方式 classes。这是我的代码:
// test.cpp
#include <stdio.h>
int global_obj;
int *global_ptr = &global_obj;
class A {
public:
virtual int f(int *i) { return *i; }
};
class B: virtual public A { // class B is virtual inheritance class of A
};
int main(int argc, char **argv)
{
int *ptr = &global_obj;
B *pb = new B;
int a = pb->f(ptr);
return a;
}
我的编译命令是:
clang -O0 -Xclang -disable-llvm-passes -Xclang -disable-O0-optnone -c -emit-llvm test.c
opt -mem2reg test.bc
及以下是编译后的LLVM位码,其中_ZN1BC1Ev
和_ZN1AC2Ev
是classB
和A
.[=23的编译构造函数=]
%class.B = type { %class.A }
%class.A = type { i32 (...)** }
@global_obj = global i32 0, align 4
@global_ptr = global i32* @global_obj, align 8
@_ZTV1B = linkonce_odr unnamed_addr constant { [5 x i8*] } { [5 x i8*] [i8* null, i8* null, i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8
@_ZTT1B = linkonce_odr unnamed_addr constant [2 x i8*] [i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*), i8* bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i8*)], align 8
@_ZTV1A = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (i32 (%class.A*, i32*)* @_ZN1A1fEPi to i8*)] }, align 8
; Function Attrs: noinline norecurse ssp uwtable
define i32 @main(i32, i8**) #0 {
%3 = call i8* @_Znwm(i64 8) #3
%4 = bitcast i8* %3 to %class.B*
call void @_ZN1BC1Ev(%class.B* %4) #4
%5 = bitcast %class.B* %4 to i8**
%6 = load i8*, i8** %5, align 8
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
%12 = bitcast i8* %11 to %class.A*
%13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
%14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
%15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
%16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8
%17 = call i32 %16(%class.A* %12, i32* @global_obj)
ret i32 %17
}
; Function Attrs: nobuiltin
declare noalias i8* @_Znwm(i64) #1
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1BC1Ev(%class.B*) unnamed_addr #2 align 2 {
%2 = bitcast %class.B* %0 to %class.A*
call void @_ZN1AC2Ev(%class.A* %2) #4
%3 = bitcast %class.B* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %3, align 8
%4 = bitcast %class.B* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [5 x i8*] }, { [5 x i8*] }* @_ZTV1B, i32 0, inrange i32 0, i32 4) to i32 (...)**), i32 (...)*** %4, align 8
ret void
}
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr void @_ZN1AC2Ev(%class.A*) unnamed_addr #2 align 2 {
%2 = bitcast %class.A* %0 to i32 (...)***
store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %2, align 8
ret void
}
; Function Attrs: noinline nounwind ssp uwtable
define linkonce_odr i32 @_ZN1A1fEPi(%class.A*, i32*) unnamed_addr #2 align 2 {
%3 = load i32, i32* %1, align 4
ret i32 %3
}
我了解到 Clang 将引入虚拟 table 来捕获 classes A 和 B 的对象。
但是在深入研究main函数的编译时,我不太明白为什么Clang会在main函数中引入一个索引-32
的GEP
。
以及下一个GEP
中索引%9
的值是多少。为什么不能在编译时确定?
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
有谁知道 Clang 为什么这样做?
非常感谢您阅读我很长的问题!
虚拟继承基的位置class 无法在编译时确定,并延迟到运行时。虚拟基址偏移量(vbase 偏移量)位于 vtable 中,因此首先您的代码加载 vtable 指针:
%5 = bitcast %class.B* %4 to i8**
%6 = load i8*, i8** %5, align 8
然后加载 vbase 偏移量(从 vptr - 32
处的预定义位置):
%7 = getelementptr i8, i8* %6, i64 -32
%8 = bitcast i8* %7 to i64*
%9 = load i64, i64* %8, align 8
这用于计算到基数的偏移量 class:
%10 = bitcast %class.B* %4 to i8*
%11 = getelementptr inbounds i8, i8* %10, i64 %9
%12 = bitcast i8* %11 to %class.A*
并从基础 class 的 vtable 加载指向虚方法的指针:
%13 = bitcast %class.A* %12 to i32 (%class.A*, i32*)***
%14 = load i32 (%class.A*, i32*)**, i32 (%class.A*, i32*)*** %13, align 8
%15 = getelementptr inbounds i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %14, i64 0
%16 = load i32 (%class.A*, i32*)*, i32 (%class.A*, i32*)** %15, align 8
最后调用它:
%17 = call i32 %16(%class.A* %12, i32* @global_obj)
您可以在 Itanium ABI 中找到有关如何组织 vtable 的更多详细信息(但请注意,胆小者不宜阅读)。