275 lines
9.5 KiB
Diff
275 lines
9.5 KiB
Diff
From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001
|
|
From: xiongzhou4 <xiongzhou4@huawei.com>
|
|
Date: Wed, 12 Jun 2024 17:12:36 +0800
|
|
Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline.
|
|
|
|
---
|
|
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +--
|
|
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
|
|
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++
|
|
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
|
|
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
|
|
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
|
|
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
|
|
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
|
|
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
|
|
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
|
|
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
|
|
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
|
|
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
|
|
13 files changed, 136 insertions(+), 4 deletions(-)
|
|
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
|
|
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
|
|
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
|
|
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
|
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
|
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
|
create mode 100644 bolt/test/AArch64/inline-debug-info.test
|
|
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
|
|
create mode 100644 bolt/test/AArch64/jmp-optimization.test
|
|
|
|
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
index db3f7e7f1..56d0228cd 100644
|
|
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
@@ -573,10 +573,7 @@ public:
|
|
return 0;
|
|
}
|
|
|
|
- virtual bool isPush(const MCInst &Inst) const {
|
|
- llvm_unreachable("not implemented");
|
|
- return false;
|
|
- }
|
|
+ virtual bool isPush(const MCInst &Inst) const { return false; }
|
|
|
|
/// Return the width, in bytes, of the memory access performed by \p Inst, if
|
|
/// this is a push instruction. Return zero otherwise.
|
|
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
|
|
index 8dcb8934f..67dd294fb 100644
|
|
--- a/bolt/lib/Passes/Inliner.cpp
|
|
+++ b/bolt/lib/Passes/Inliner.cpp
|
|
@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
|
|
<< ". Size change: " << SizeAfterInlining
|
|
<< " bytes.\n");
|
|
|
|
+// Skip situations where some A64 instructions can't be inlined:
|
|
+// # Indirect branch, e.g., BR.
|
|
+// # Branch instructions but used to make a function call.
|
|
+ if (BC.isAArch64()) {
|
|
+ auto &MIB = *BC.MIB;
|
|
+ bool skip = false;
|
|
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
|
|
+ for (MCInst Inst : BB) {
|
|
+ if (MIB.isPseudo(Inst))
|
|
+ continue;
|
|
+
|
|
+ MIB.stripAnnotations(Inst, false);
|
|
+
|
|
+ if (MIB.isBranch(Inst)) {
|
|
+ const BinaryBasicBlock *TargetBB =
|
|
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
|
|
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
|
|
+ skip = true;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (skip)
|
|
+ break;
|
|
+ }
|
|
+ if (skip) {
|
|
+ ++InstIt;
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
|
|
|
|
DidInlining = true;
|
|
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
index d109a5d52..acf21ba23 100644
|
|
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
@@ -34,6 +34,8 @@ public:
|
|
const MCRegisterInfo *RegInfo)
|
|
: MCPlusBuilder(Analysis, Info, RegInfo) {}
|
|
|
|
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
|
+
|
|
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
|
|
CompFuncTy Comp) const override {
|
|
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
|
|
@@ -816,6 +818,14 @@ public:
|
|
|
|
int getUncondBranchEncodingSize() const override { return 28; }
|
|
|
|
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
|
|
+ MCContext *Ctx) override {
|
|
+ Inst.setOpcode(AArch64::BL);
|
|
+ Inst.addOperand(MCOperand::createExpr(
|
|
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
|
+ return true;
|
|
+ }
|
|
+
|
|
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
|
MCContext *Ctx) override {
|
|
Inst.setOpcode(AArch64::B);
|
|
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
|
|
new file mode 100644
|
|
index 000000000..1307c13f2
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
|
|
@@ -0,0 +1,5 @@
|
|
+#include "stub.h"
|
|
+
|
|
+void foo() {
|
|
+ puts("Hello world!\n");
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
|
|
new file mode 100644
|
|
index 000000000..7853d2b63
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/inline-main.c
|
|
@@ -0,0 +1,5 @@
|
|
+extern void foo();
|
|
+int main() {
|
|
+ foo();
|
|
+ return 0;
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
|
|
new file mode 100644
|
|
index 000000000..a6ff9e262
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
|
|
@@ -0,0 +1,23 @@
|
|
+extern "C" int printf(const char*, ...);
|
|
+extern const char* question();
|
|
+
|
|
+inline int answer() __attribute__((always_inline));
|
|
+inline int answer() { return 42; }
|
|
+
|
|
+int main(int argc, char *argv[]) {
|
|
+ int ans;
|
|
+ if (argc == 1) {
|
|
+ ans = 0;
|
|
+ } else {
|
|
+ ans = argc;
|
|
+ }
|
|
+ printf("%s\n", question());
|
|
+ for (int i = 0; i < 10; ++i) {
|
|
+ int x = answer();
|
|
+ int y = answer();
|
|
+ ans += x - y;
|
|
+ }
|
|
+ // padding to make sure question() is inlineable
|
|
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
|
|
+ return ans;
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
|
|
new file mode 100644
|
|
index 000000000..edb7ab145
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
|
|
@@ -0,0 +1,3 @@
|
|
+const char* question() {
|
|
+ return "What do you get if you multiply six by nine?";
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
|
new file mode 100644
|
|
index 000000000..cd6d53c35
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
|
@@ -0,0 +1,7 @@
|
|
+int g();
|
|
+
|
|
+int main() {
|
|
+ int x = g();
|
|
+ int y = x*x;
|
|
+ return y;
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
|
new file mode 100644
|
|
index 000000000..80b853d63
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
|
@@ -0,0 +1,3 @@
|
|
+int f() {
|
|
+ return 0;
|
|
+}
|
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
|
new file mode 100644
|
|
index 000000000..7fb551163
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
|
@@ -0,0 +1,3 @@
|
|
+int f();
|
|
+
|
|
+int g() { return f(); }
|
|
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
|
|
new file mode 100644
|
|
index 000000000..e20e5e31e
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/inline-debug-info.test
|
|
@@ -0,0 +1,20 @@
|
|
+## Check that BOLT correctly prints and updates debug info for inlined
|
|
+## functions.
|
|
+
|
|
+# REQUIRES: system-linux
|
|
+
|
|
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
|
|
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
|
|
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
|
|
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
|
|
+# RUN: -o %t.bolt \
|
|
+# RUN: | FileCheck %s
|
|
+
|
|
+## The call to puts() should come from inline-foo.c:
|
|
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
|
|
+
|
|
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
|
|
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
|
|
+
|
|
+## Dump of main() should include debug info from inline-foo.c after inlining:
|
|
+# CHECK-OBJDUMP: inline-foo.c:4
|
|
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
|
|
new file mode 100644
|
|
index 000000000..5a87bdde9
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/inlined-function-mixed.test
|
|
@@ -0,0 +1,11 @@
|
|
+# Make sure inlining from a unit with debug info into unit without
|
|
+# debug info does not cause a crash.
|
|
+
|
|
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
|
|
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
|
|
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
|
|
+
|
|
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
|
|
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
|
|
+
|
|
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
|
|
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
|
|
new file mode 100644
|
|
index 000000000..92f4b9a14
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/jmp-optimization.test
|
|
@@ -0,0 +1,14 @@
|
|
+# Tests the optimization of functions that just do a tail call in the beginning.
|
|
+
|
|
+# This test has commands that rely on shell capabilities that won't execute
|
|
+# correctly on Windows e.g. unsupported parameter expansion
|
|
+REQUIRES: shell
|
|
+
|
|
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
|
|
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
|
|
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
|
|
+
|
|
+CHECK: <main>:
|
|
+CHECK-NOT: call
|
|
+CHECK: xorl %eax, %eax
|
|
+CHECK: retq
|
|
--
|
|
2.33.0
|
|
|