From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001 From: xiongzhou4 Date: Wed, 12 Jun 2024 17:12:36 +0800 Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline. --- bolt/include/bolt/Core/MCPlusBuilder.h | 5 +-- bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++ .../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++ bolt/test/AArch64/Inputs/inline-foo.c | 5 +++ bolt/test/AArch64/Inputs/inline-main.c | 5 +++ bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++ bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++ bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++ bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++ bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++ bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++ bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++ bolt/test/AArch64/jmp-optimization.test | 14 +++++++++ 13 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c create mode 100644 bolt/test/AArch64/Inputs/inline-main.c create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp create mode 100644 bolt/test/AArch64/inline-debug-info.test create mode 100644 bolt/test/AArch64/inlined-function-mixed.test create mode 100644 bolt/test/AArch64/jmp-optimization.test diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index db3f7e7f1..56d0228cd 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -573,10 +573,7 @@ public: return 0; } - virtual bool isPush(const MCInst &Inst) const { - llvm_unreachable("not implemented"); - return false; - } + virtual bool isPush(const MCInst &Inst) const { return false; } /// Return the width, in bytes, of the memory access performed by \p Inst, if /// this is a push instruction. Return zero otherwise. diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp index 8dcb8934f..67dd294fb 100644 --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { << ". Size change: " << SizeAfterInlining << " bytes.\n"); +// Skip situations where some A64 instructions can't be inlined: +// # Indirect branch, e.g., BR. +// # Branch instructions but used to make a function call. + if (BC.isAArch64()) { + auto &MIB = *BC.MIB; + bool skip = false; + for (const BinaryBasicBlock &BB : *TargetFunction) { + for (MCInst Inst : BB) { + if (MIB.isPseudo(Inst)) + continue; + + MIB.stripAnnotations(Inst, false); + + if (MIB.isBranch(Inst)) { + const BinaryBasicBlock *TargetBB = + TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst)); + if (MIB.isIndirectBranch(Inst) || !TargetBB) { + skip = true; + break; + } + } + } + if (skip) + break; + } + if (skip) { + ++InstIt; + continue; + } + } + std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction); DidInlining = true; diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index d109a5d52..acf21ba23 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -34,6 +34,8 @@ public: const MCRegisterInfo *RegInfo) : MCPlusBuilder(Analysis, Info, RegInfo) {} + MCPhysReg getStackPointer() const override { return AArch64::SP; } + bool equals(const MCTargetExpr &A, const MCTargetExpr &B, CompFuncTy Comp) const override { const auto &AArch64ExprA = cast(A); @@ -816,6 +818,14 @@ public: int getUncondBranchEncodingSize() const override { return 28; } + bool createCall(MCInst &Inst, const MCSymbol *Target, + MCContext *Ctx) override { + Inst.setOpcode(AArch64::BL); + Inst.addOperand(MCOperand::createExpr( + MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx))); + return true; + } + bool createTailCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx) override { Inst.setOpcode(AArch64::B); diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c new file mode 100644 index 000000000..1307c13f2 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inline-foo.c @@ -0,0 +1,5 @@ +#include "stub.h" + +void foo() { + puts("Hello world!\n"); +} diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c new file mode 100644 index 000000000..7853d2b63 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inline-main.c @@ -0,0 +1,5 @@ +extern void foo(); +int main() { + foo(); + return 0; +} diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp new file mode 100644 index 000000000..a6ff9e262 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inlined.cpp @@ -0,0 +1,23 @@ +extern "C" int printf(const char*, ...); +extern const char* question(); + +inline int answer() __attribute__((always_inline)); +inline int answer() { return 42; } + +int main(int argc, char *argv[]) { + int ans; + if (argc == 1) { + ans = 0; + } else { + ans = argc; + } + printf("%s\n", question()); + for (int i = 0; i < 10; ++i) { + int x = answer(); + int y = answer(); + ans += x - y; + } + // padding to make sure question() is inlineable + asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;"); + return ans; +} diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp new file mode 100644 index 000000000..edb7ab145 --- /dev/null +++ b/bolt/test/AArch64/Inputs/inlinee.cpp @@ -0,0 +1,3 @@ +const char* question() { + return "What do you get if you multiply six by nine?"; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp new file mode 100644 index 000000000..cd6d53c35 --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp @@ -0,0 +1,7 @@ +int g(); + +int main() { + int x = g(); + int y = x*x; + return y; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp new file mode 100644 index 000000000..80b853d63 --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp @@ -0,0 +1,3 @@ +int f() { + return 0; +} diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp new file mode 100644 index 000000000..7fb551163 --- /dev/null +++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp @@ -0,0 +1,3 @@ +int f(); + +int g() { return f(); } diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test new file mode 100644 index 000000000..e20e5e31e --- /dev/null +++ b/bolt/test/AArch64/inline-debug-info.test @@ -0,0 +1,20 @@ +## Check that BOLT correctly prints and updates debug info for inlined +## functions. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \ +# RUN: -I%p/../Inputs -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \ +# RUN: --print-only=main --print-after-lowering --force-inline=foo \ +# RUN: -o %t.bolt \ +# RUN: | FileCheck %s + +## The call to puts() should come from inline-foo.c: +# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3 + +# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \ +# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP + +## Dump of main() should include debug info from inline-foo.c after inlining: +# CHECK-OBJDUMP: inline-foo.c:4 diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test new file mode 100644 index 000000000..5a87bdde9 --- /dev/null +++ b/bolt/test/AArch64/inlined-function-mixed.test @@ -0,0 +1,11 @@ +# Make sure inlining from a unit with debug info into unit without +# debug info does not cause a crash. + +RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o +RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g +RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t + +RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \ +RUN: --inline-small-functions --force-inline=main | FileCheck %s + +CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test new file mode 100644 index 000000000..92f4b9a14 --- /dev/null +++ b/bolt/test/AArch64/jmp-optimization.test @@ -0,0 +1,14 @@ +# Tests the optimization of functions that just do a tail call in the beginning. + +# This test has commands that rely on shell capabilities that won't execute +# correctly on Windows e.g. unsupported parameter expansion +REQUIRES: shell + +RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t +RUN: llvm-bolt -inline-small-functions %t -o %t.bolt +RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s + +CHECK:
: +CHECK-NOT: call +CHECK: xorl %eax, %eax +CHECK: retq -- 2.33.0