如何在 MachineInstr 中获取指令?

How to get Instruction in MachineInstr?

我想知道真实寄存器中的变量依赖性(如 X86:EAX、EBX ...)。因此,我创建了一个 IR-PASS,可以识别对 IR 的依赖关系。这个pass在Valueclass.

中使用了新添加的变量unsigned HasDependency: 1;unsigned HasMaybeDependency: 1;
      .
      .
// Use the same type as the bitfield above so that MSVC will pack them.
unsigned IsUsedByMD : 1;
unsigned HasName : 1;
unsigned HasHungOffUses : 1;
unsigned HasDescriptor : 1;
unsigned HasDependency : 1;
unsigned HasMaybeDependency : 1;
      .
      .
      .
void setDependency() { HasDependency = true; }
void setMaybeDependency() { HasMaybeDependency = true; }
bool hasDependency() const { return HasDependency; }
bool hasMaybeDependency() const { return HasMaybeDependency; }

  //static_assert(sizeof(Value) == 2 * sizeof(void *) + 2 * sizeof(unsigned),
  //              "Value too big");

当应用于这样的代码片段时:

extern int foo_called(int a);

int foo(int k)
{
    int __attribute__((annotate("xxx"))) a;
    for (int i = 0; i < k; i++)
    {
        int c = a + k;
        a += foo_called(c);
    }
    return 0;
}

产生这个位码:

define i32 @"?foo@@YAHH@Z"(i32 %k) local_unnamed_addr #0 {
entry:
  %a = alloca i32, align 4
  %0 = bitcast i32* %a to i8*
  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #2
  call void @llvm.var.annotation(i8* nonnull %0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i32 0, i32 0), i32 17)
  %cmp7 = icmp sgt i32 %k, 0
  br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph:                                   ; preds = %entry
  %.pre = load i32, i32* %a, align 4, !tbaa !3
  br label %for.body

for.cond.cleanup:                                 ; preds = %for.body, %entry
  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #2
  ret i32 0

for.body:                                         ; preds = %for.body, %for.body.lr.ph
  %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %add = add nsw i32 %1, %k
  %call = call i32 @"?foo_called@@YAHH@Z"(i32 %add)
  %2 = load i32, i32* %a, align 4, !tbaa !3
  %add2 = add nsw i32 %2, %call
  store i32 %add2, i32* %a, align 4, !tbaa !3
  %inc = add nuw nsw i32 %i.08, 1
  %exitcond = icmp eq i32 %inc, %k
  br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

declare i32 @"?foo_called@@YAHH@Z"(i32) local_unnamed_addr #3

以上位码pass的结果为:

Function - ?foo@@YAHH@Z
    Annotated Variable List :
        - Annotated : a(message: xxx)

    Annotated-Variable : a
        (Perpect)  %add2 = add nsw i32 %2, %call
        (Perpect)  %2 = load i32, i32* %a, align 4, !tbaa !3
        (Perpect)  %a = alloca i32, align 4
        (Perpect)  %cmp7 = icmp sgt i32 %k, 0
        (Maybe)  %exitcond = icmp eq i32 %inc, %k
        (Maybe)  %inc = add nuw nsw i32 %i.08, 1
        (Maybe)  %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
        (Perpect)  %call = call i32 @"?foo_called@@YAHH@Z"(i32 %add)
        (Perpect)  %add = add nsw i32 %1, %k
        (Perpect)  %1 = phi i32 [ %.pre, %for.body.lr.ph ], [ %add2, %for.body ]
        (Perpect)  %.pre = load i32, i32* %a, align 4, !tbaa !3

我按照 SelectionDAGISel.cpp: SelectAllBasicBlocks 函数从后端获取信息,但我只能使用以下方法获取 AllocaInstStoreInstLoadInst

for (MachineBasicBlock &MBB : mf) {
  for (MachineInstr& I : MBB) {
    for (MachineInstr::mmo_iterator i = I.memoperands_begin(), 
      e = I.memoperands_end();
      i != e; ++i) {
      if (const Value *V = (*i)->getValue())
        errs() << *V << "\n";
    }
  }
}

我怎么知道 MachineInstrInstruction 之间的相关性?如果LLVM没有提供,需要修复哪些部分?

这不正常。这是一个技巧。但是我非常有用地使用了这个方法。如果你知道正常的方法,请给我评论。

我使用 DebugLoc 解决了这个问题。用来表示.c.cpp个文件的line-column-row、function-name等信息。此信息将从 ;;vm-ir 开始一直保留到 MachineInstr

所以,如果保证DebugLoc在你的编译器处理中没有用到,你可以把class的地址放在包含行信息需要的信息的地方。这将允许您在正确的时间将 DebugLoc 行转换为所需的 class。 (可以用column,因为column必须小于2^16。)

下面详细介绍我使用的方法

更改文件和Re-Build您的项目。

使用了多种设计模式来最大化内存效率,因此我无法轻易更改 class。

首先,修改DebugLoc-print例程。像这样转到 DebugLoc.cpp 并删除 DIScope 打印例程。此处理为您保存表格 runtime-error.

void DebugLoc::print(raw_ostream &OS) const {
  if (!Loc)
    return;

  // Print source line info.
  //auto *Scope = cast<DIScope>(getScope());
  //OS << Scope->getFilename();
  OS << ':' << getLine();
  if (getCol() != 0)
    OS << ':' << getCol();

其次,修改验证器。此语法会有所帮助。

void Verifier::visitDILocation(const DILocation &N) {
-  AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
-           "location requires a valid scope", &N, N.getRawScope());
+  //AssertDI(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
+  //         "location requires a valid scope", &N, N.getRawScope());
   if (auto *IA = N.getRawInlinedAt())
     AssertDI(isa<DILocation>(IA), "inlined-at should be a location", &N, IA);
}

第三,在DebugLoc中注册一个class需要一些正式的步骤。为此创建初始化函数。

static LLVMContext cnt;
static MDNode *md;

md = MDNode::get(cnt, DILocation::get(cnt, 100, 100, DIScope::get(cnt, nullptr)));

最后,创建注册函数。

static DebugLoc getDebugLoc(DependencyInstrInfoManager *info)
{
  return DebugLoc::get(reinterpret_cast<unsigned> (info), (uint16_t)-1, md);
}

static void setDebugLoc(Instruction *I, ...)
{
  DependencyInstrInfoManager *mgr;
  if (I->getDebugLoc()) {
    mgr = reinterpret_cast<DependencyInstrInfoManager *>
      (I->getDebugLoc()->getLine());
  } else {
    mgr = new DependencyInstrInfoManager();
    I->setDebugLoc(getDebugLoc(mgr));
  }
  mgr->addInfo(new DependencyInstrInfo(I, S, T, ...));
}

DependencyInstrInfoManager就是回答以上问题的class

最后,您可以在XXXMCInstLower.cpp:EmitInstruction();中打印您自己的信息(如X86MCInstLower.cpp)。以下语句是我的案例输出的示例。

void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
  X86MCInstLower MCInstLowering(*MF, *this);
  const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();

  if (MI->getDebugLoc()) {
    DependencyInstrInfoManager *mgr = reinterpret_cast<DependencyInstrInfoManager *>
      (MI->getDebugLoc()->getLine());
    mgr->doFolding();
    for (auto DI : *mgr)
      OutStreamer->AddComment(DI->getInfo());
  }

依赖标记

我用这个方法做了依赖标记。

int foo(int k)
{
  int ANNORATE("b") b = 0;
  int ANNORATE("a") a = 0;

  for (int i = 0; i < k; i++)
  {
    int c = a + k;
    int d = b + k;
    a += foo_called(c);
    b += foo_called2(c);
  }

  return a + foo_called(b);
}

# BB#1:                                 # %for.body.preheader
movl    %esi, %ebx
.p2align    4, 0x90
LBB0_2 : # %for.body
# =>This Inner Loop Header: Depth=1
  addl  %esi, %edi              # [Perpect, Source:b]
# [Perpect, Source: a]
  pushl %edi                    # [Maybe, Source:b]
# [Perpect, Source: a]
  calll "?foo_called@@YAHH@Z"   # [Maybe, Source:b]
# [Perpect, Source: a]
  addl  , %esp                # [Maybe, Source:b]
# [Perpect, Source: a]
  addl  %eax, 4(%esp)
  pushl %edi                    # [Perpect, Source:b]
  calll "?foo_called2@@YAHH@Z"  # [Perpect, Source:b]
  addl  , %esp                # [Perpect, Source:b]
  addl(%esp), %eax            # [Annotated, Source:b]
  movl  4(%esp), %edi           # [Perpect, Source:b]
# [Perpect, Source: a]
  decl  %ebx                    # [Maybe, Source:b]
  movl  %eax, (%esp)
  jne   LBB0_2
  jmp   LBB0_4