llvm-bolt/0003-AArch64-Add-AArch64-support-for-inline.patch
rfwang07 e94a123c64 Sync patch from 2203sp4
(cherry picked from commit 814120db4bcb4fae1fe36b043cdeee2ee0d0a47e)
2024-11-22 10:45:19 +08:00

275 lines
9.5 KiB
Diff

From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001
From: xiongzhou4 <xiongzhou4@huawei.com>
Date: Wed, 12 Jun 2024 17:12:36 +0800
Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline.
---
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +--
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
13 files changed, 136 insertions(+), 4 deletions(-)
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
create mode 100644 bolt/test/AArch64/inline-debug-info.test
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
create mode 100644 bolt/test/AArch64/jmp-optimization.test
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index db3f7e7f1..56d0228cd 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -573,10 +573,7 @@ public:
return 0;
}
- virtual bool isPush(const MCInst &Inst) const {
- llvm_unreachable("not implemented");
- return false;
- }
+ virtual bool isPush(const MCInst &Inst) const { return false; }
/// Return the width, in bytes, of the memory access performed by \p Inst, if
/// this is a push instruction. Return zero otherwise.
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
index 8dcb8934f..67dd294fb 100644
--- a/bolt/lib/Passes/Inliner.cpp
+++ b/bolt/lib/Passes/Inliner.cpp
@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
<< ". Size change: " << SizeAfterInlining
<< " bytes.\n");
+// Skip situations where some A64 instructions can't be inlined:
+// # Indirect branch, e.g., BR.
+// # Branch instructions but used to make a function call.
+ if (BC.isAArch64()) {
+ auto &MIB = *BC.MIB;
+ bool skip = false;
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
+ for (MCInst Inst : BB) {
+ if (MIB.isPseudo(Inst))
+ continue;
+
+ MIB.stripAnnotations(Inst, false);
+
+ if (MIB.isBranch(Inst)) {
+ const BinaryBasicBlock *TargetBB =
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
+ skip = true;
+ break;
+ }
+ }
+ }
+ if (skip)
+ break;
+ }
+ if (skip) {
+ ++InstIt;
+ continue;
+ }
+ }
+
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
DidInlining = true;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index d109a5d52..acf21ba23 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -34,6 +34,8 @@ public:
const MCRegisterInfo *RegInfo)
: MCPlusBuilder(Analysis, Info, RegInfo) {}
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
+
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
CompFuncTy Comp) const override {
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
@@ -816,6 +818,14 @@ public:
int getUncondBranchEncodingSize() const override { return 28; }
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
+ MCContext *Ctx) override {
+ Inst.setOpcode(AArch64::BL);
+ Inst.addOperand(MCOperand::createExpr(
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
+ return true;
+ }
+
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
MCContext *Ctx) override {
Inst.setOpcode(AArch64::B);
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
new file mode 100644
index 000000000..1307c13f2
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
@@ -0,0 +1,5 @@
+#include "stub.h"
+
+void foo() {
+ puts("Hello world!\n");
+}
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
new file mode 100644
index 000000000..7853d2b63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inline-main.c
@@ -0,0 +1,5 @@
+extern void foo();
+int main() {
+ foo();
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
new file mode 100644
index 000000000..a6ff9e262
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
@@ -0,0 +1,23 @@
+extern "C" int printf(const char*, ...);
+extern const char* question();
+
+inline int answer() __attribute__((always_inline));
+inline int answer() { return 42; }
+
+int main(int argc, char *argv[]) {
+ int ans;
+ if (argc == 1) {
+ ans = 0;
+ } else {
+ ans = argc;
+ }
+ printf("%s\n", question());
+ for (int i = 0; i < 10; ++i) {
+ int x = answer();
+ int y = answer();
+ ans += x - y;
+ }
+ // padding to make sure question() is inlineable
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
+ return ans;
+}
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
new file mode 100644
index 000000000..edb7ab145
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
@@ -0,0 +1,3 @@
+const char* question() {
+ return "What do you get if you multiply six by nine?";
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
new file mode 100644
index 000000000..cd6d53c35
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
@@ -0,0 +1,7 @@
+int g();
+
+int main() {
+ int x = g();
+ int y = x*x;
+ return y;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
new file mode 100644
index 000000000..80b853d63
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
@@ -0,0 +1,3 @@
+int f() {
+ return 0;
+}
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
new file mode 100644
index 000000000..7fb551163
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
@@ -0,0 +1,3 @@
+int f();
+
+int g() { return f(); }
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
new file mode 100644
index 000000000..e20e5e31e
--- /dev/null
+++ b/bolt/test/AArch64/inline-debug-info.test
@@ -0,0 +1,20 @@
+## Check that BOLT correctly prints and updates debug info for inlined
+## functions.
+
+# REQUIRES: system-linux
+
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
+# RUN: -o %t.bolt \
+# RUN: | FileCheck %s
+
+## The call to puts() should come from inline-foo.c:
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
+
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
+
+## Dump of main() should include debug info from inline-foo.c after inlining:
+# CHECK-OBJDUMP: inline-foo.c:4
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
new file mode 100644
index 000000000..5a87bdde9
--- /dev/null
+++ b/bolt/test/AArch64/inlined-function-mixed.test
@@ -0,0 +1,11 @@
+# Make sure inlining from a unit with debug info into unit without
+# debug info does not cause a crash.
+
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
+
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
+
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
new file mode 100644
index 000000000..92f4b9a14
--- /dev/null
+++ b/bolt/test/AArch64/jmp-optimization.test
@@ -0,0 +1,14 @@
+# Tests the optimization of functions that just do a tail call in the beginning.
+
+# This test has commands that rely on shell capabilities that won't execute
+# correctly on Windows e.g. unsupported parameter expansion
+REQUIRES: shell
+
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
+
+CHECK: <main>:
+CHECK-NOT: call
+CHECK: xorl %eax, %eax
+CHECK: retq
--
2.33.0