Sync patch from 2203sp4
(cherry picked from commit 814120db4bcb4fae1fe36b043cdeee2ee0d0a47e)
This commit is contained in:
parent
7d26121e7b
commit
e94a123c64
126
0001-Fix-trap-value-for-non-X86.patch
Normal file
126
0001-Fix-trap-value-for-non-X86.patch
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
From 868d8c360b3e1e5f291cb3e0dae0777a4529228f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Denis Revunov <revunov.denis@huawei-partners.com>
|
||||||
|
Date: Thu, 27 Jul 2023 11:48:08 -0400
|
||||||
|
Subject: [PATCH] Fix trap value for non-X86
|
||||||
|
|
||||||
|
The trap value used by BOLT was assumed to be single-byte instruction.
|
||||||
|
It made some functions unaligned on AArch64(e.g exceptions-instrumentation test)
|
||||||
|
and caused emission failures. Fix that by changing fill value to StringRef.
|
||||||
|
|
||||||
|
Reviewed By: rafauler
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D158191
|
||||||
|
---
|
||||||
|
bolt/include/bolt/Core/MCPlusBuilder.h | 9 ++++++---
|
||||||
|
bolt/lib/Core/BinaryEmitter.cpp | 4 ++--
|
||||||
|
bolt/lib/Rewrite/RewriteInstance.cpp | 6 ++++--
|
||||||
|
bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 4 ++++
|
||||||
|
bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp | 4 ++++
|
||||||
|
bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 2 +-
|
||||||
|
6 files changed, 21 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
index 56d0228cd..beb06751d 100644
|
||||||
|
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
@@ -636,9 +636,12 @@ public:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /// If non-zero, this is used to fill the executable space with instructions
|
||||||
|
- /// that will trap. Defaults to 0.
|
||||||
|
- virtual unsigned getTrapFillValue() const { return 0; }
|
||||||
|
+ /// Used to fill the executable space with instructions
|
||||||
|
+ /// that will trap.
|
||||||
|
+ virtual StringRef getTrapFillValue() const {
|
||||||
|
+ llvm_unreachable("not implemented");
|
||||||
|
+ return StringRef();
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/// Interface and basic functionality of a MCInstMatcher. The idea is to make
|
||||||
|
/// it easy to match one or more MCInsts against a tree-like pattern and
|
||||||
|
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
|
||||||
|
index c4129615a..df076c81d 100644
|
||||||
|
--- a/bolt/lib/Core/BinaryEmitter.cpp
|
||||||
|
+++ b/bolt/lib/Core/BinaryEmitter.cpp
|
||||||
|
@@ -376,7 +376,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function,
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opts::MarkFuncs)
|
||||||
|
- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1);
|
||||||
|
+ Streamer.emitBytes(BC.MIB->getTrapFillValue());
|
||||||
|
|
||||||
|
// Emit CFI end
|
||||||
|
if (Function.hasCFI())
|
||||||
|
@@ -420,7 +420,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
|
||||||
|
// case, the call site entries in that LSDA have 0 as offset to the landing
|
||||||
|
// pad, which the runtime interprets as "no handler". To prevent this,
|
||||||
|
// insert some padding.
|
||||||
|
- Streamer.emitIntValue(BC.MIB->getTrapFillValue(), 1);
|
||||||
|
+ Streamer.emitBytes(BC.MIB->getTrapFillValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track the first emitted instruction with debug info.
|
||||||
|
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
index fe8c134b8..c6ea0b009 100644
|
||||||
|
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
@@ -5273,8 +5273,10 @@ void RewriteInstance::rewriteFile() {
|
||||||
|
if (!BF.getFileOffset() || !BF.isEmitted())
|
||||||
|
continue;
|
||||||
|
OS.seek(BF.getFileOffset());
|
||||||
|
- for (unsigned I = 0; I < BF.getMaxSize(); ++I)
|
||||||
|
- OS.write((unsigned char)BC->MIB->getTrapFillValue());
|
||||||
|
+ StringRef TrapInstr = BC->MIB->getTrapFillValue();
|
||||||
|
+ unsigned NInstr = BF.getMaxSize() / TrapInstr.size();
|
||||||
|
+ for (unsigned I = 0; I < NInstr; ++I)
|
||||||
|
+ OS.write(TrapInstr.data(), TrapInstr.size());
|
||||||
|
}
|
||||||
|
OS.seek(SavedPos);
|
||||||
|
}
|
||||||
|
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
index acf21ba23..cd66b654e 100644
|
||||||
|
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
@@ -1142,6 +1142,10 @@ public:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ StringRef getTrapFillValue() const override {
|
||||||
|
+ return StringRef("\0\0\0\0", 4);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
bool createReturn(MCInst &Inst) const override {
|
||||||
|
Inst.setOpcode(AArch64::RET);
|
||||||
|
Inst.clear();
|
||||||
|
diff --git a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||||
|
index ec5bca852..badc1bde8 100644
|
||||||
|
--- a/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||||
|
+++ b/bolt/lib/Target/RISCV/RISCVMCPlusBuilder.cpp
|
||||||
|
@@ -171,6 +171,10 @@ public:
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ StringRef getTrapFillValue() const override {
|
||||||
|
+ return StringRef("\0\0\0\0", 4);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
|
||||||
|
const MCSymbol *&TBB, const MCSymbol *&FBB,
|
||||||
|
MCInst *&CondBranch,
|
||||||
|
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||||
|
index 3ee161d0b..5e3c01a1c 100644
|
||||||
|
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||||
|
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
||||||
|
@@ -397,7 +397,7 @@ public:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- unsigned getTrapFillValue() const override { return 0xCC; }
|
||||||
|
+ StringRef getTrapFillValue() const override { return StringRef("\314", 1); }
|
||||||
|
|
||||||
|
struct IndJmpMatcherFrag1 : MCInstMatcher {
|
||||||
|
std::unique_ptr<MCInstMatcher> Base;
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
44
0002-Add-test-for-emitting-trap-value.patch
Normal file
44
0002-Add-test-for-emitting-trap-value.patch
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
From e4ae238a42296a84bc819dd1fb61f3c699952f17 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Denis Revunov <rnovds@gmail.com>
|
||||||
|
Date: Thu, 17 Aug 2023 18:30:07 +0300
|
||||||
|
Subject: [PATCH] Add test for emitting trap value
|
||||||
|
|
||||||
|
Reviewed By: rafauler
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D158191
|
||||||
|
---
|
||||||
|
bolt/test/runtime/mark-funcs.c | 22 ++++++++++++++++++++++
|
||||||
|
1 file changed, 22 insertions(+)
|
||||||
|
create mode 100644 bolt/test/runtime/mark-funcs.c
|
||||||
|
|
||||||
|
diff --git a/bolt/test/runtime/mark-funcs.c b/bolt/test/runtime/mark-funcs.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a8586ca8b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/runtime/mark-funcs.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+#include <stdio.h>
|
||||||
|
+
|
||||||
|
+int dummy() {
|
||||||
|
+ printf("Dummy called\n");
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main(int argc, char **argv) {
|
||||||
|
+ if (dummy() != 0)
|
||||||
|
+ return 1;
|
||||||
|
+ printf("Main called\n");
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+// Check that emitting trap value works properly and
|
||||||
|
+// does not break functions
|
||||||
|
+// REQUIRES: system-linux
|
||||||
|
+// RUN: %clangxx -Wl,-q %s -o %t.exe
|
||||||
|
+// RUN: %t.exe | FileCheck %s
|
||||||
|
+// CHECK: Dummy called
|
||||||
|
+// CHECK-NEXT: Main called
|
||||||
|
+// RUN: llvm-bolt %t.exe -o %t.exe.bolt -lite=false --mark-funcs
|
||||||
|
+// RUN: %t.exe.bolt | FileCheck %s
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
274
0003-AArch64-Add-AArch64-support-for-inline.patch
Normal file
274
0003-AArch64-Add-AArch64-support-for-inline.patch
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
From a09ea2c3534d12f194f740180e09a229e0b2200f Mon Sep 17 00:00:00 2001
|
||||||
|
From: xiongzhou4 <xiongzhou4@huawei.com>
|
||||||
|
Date: Wed, 12 Jun 2024 17:12:36 +0800
|
||||||
|
Subject: [PATCH 1/2] [AArch64] Add AArch64 support for inline.
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/include/bolt/Core/MCPlusBuilder.h | 5 +--
|
||||||
|
bolt/lib/Passes/Inliner.cpp | 31 +++++++++++++++++++
|
||||||
|
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 10 ++++++
|
||||||
|
bolt/test/AArch64/Inputs/inline-foo.c | 5 +++
|
||||||
|
bolt/test/AArch64/Inputs/inline-main.c | 5 +++
|
||||||
|
bolt/test/AArch64/Inputs/inlined.cpp | 23 ++++++++++++++
|
||||||
|
bolt/test/AArch64/Inputs/inlinee.cpp | 3 ++
|
||||||
|
bolt/test/AArch64/Inputs/jmp_opt.cpp | 7 +++++
|
||||||
|
bolt/test/AArch64/Inputs/jmp_opt2.cpp | 3 ++
|
||||||
|
bolt/test/AArch64/Inputs/jmp_opt3.cpp | 3 ++
|
||||||
|
bolt/test/AArch64/inline-debug-info.test | 20 ++++++++++++
|
||||||
|
bolt/test/AArch64/inlined-function-mixed.test | 11 +++++++
|
||||||
|
bolt/test/AArch64/jmp-optimization.test | 14 +++++++++
|
||||||
|
13 files changed, 136 insertions(+), 4 deletions(-)
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/inline-foo.c
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/inline-main.c
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/inlined.cpp
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/inlinee.cpp
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||||
|
create mode 100644 bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||||
|
create mode 100644 bolt/test/AArch64/inline-debug-info.test
|
||||||
|
create mode 100644 bolt/test/AArch64/inlined-function-mixed.test
|
||||||
|
create mode 100644 bolt/test/AArch64/jmp-optimization.test
|
||||||
|
|
||||||
|
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
index db3f7e7f1..56d0228cd 100644
|
||||||
|
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
||||||
|
@@ -573,10 +573,7 @@ public:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
- virtual bool isPush(const MCInst &Inst) const {
|
||||||
|
- llvm_unreachable("not implemented");
|
||||||
|
- return false;
|
||||||
|
- }
|
||||||
|
+ virtual bool isPush(const MCInst &Inst) const { return false; }
|
||||||
|
|
||||||
|
/// Return the width, in bytes, of the memory access performed by \p Inst, if
|
||||||
|
/// this is a push instruction. Return zero otherwise.
|
||||||
|
diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp
|
||||||
|
index 8dcb8934f..67dd294fb 100644
|
||||||
|
--- a/bolt/lib/Passes/Inliner.cpp
|
||||||
|
+++ b/bolt/lib/Passes/Inliner.cpp
|
||||||
|
@@ -465,6 +465,37 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) {
|
||||||
|
<< ". Size change: " << SizeAfterInlining
|
||||||
|
<< " bytes.\n");
|
||||||
|
|
||||||
|
+// Skip situations where some A64 instructions can't be inlined:
|
||||||
|
+// # Indirect branch, e.g., BR.
|
||||||
|
+// # Branch instructions but used to make a function call.
|
||||||
|
+ if (BC.isAArch64()) {
|
||||||
|
+ auto &MIB = *BC.MIB;
|
||||||
|
+ bool skip = false;
|
||||||
|
+ for (const BinaryBasicBlock &BB : *TargetFunction) {
|
||||||
|
+ for (MCInst Inst : BB) {
|
||||||
|
+ if (MIB.isPseudo(Inst))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ MIB.stripAnnotations(Inst, false);
|
||||||
|
+
|
||||||
|
+ if (MIB.isBranch(Inst)) {
|
||||||
|
+ const BinaryBasicBlock *TargetBB =
|
||||||
|
+ TargetFunction->getBasicBlockForLabel(MIB.getTargetSymbol(Inst));
|
||||||
|
+ if (MIB.isIndirectBranch(Inst) || !TargetBB) {
|
||||||
|
+ skip = true;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (skip)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ if (skip) {
|
||||||
|
+ ++InstIt;
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
std::tie(BB, InstIt) = inlineCall(*BB, InstIt, *TargetFunction);
|
||||||
|
|
||||||
|
DidInlining = true;
|
||||||
|
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
index d109a5d52..acf21ba23 100644
|
||||||
|
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
||||||
|
@@ -34,6 +34,8 @@ public:
|
||||||
|
const MCRegisterInfo *RegInfo)
|
||||||
|
: MCPlusBuilder(Analysis, Info, RegInfo) {}
|
||||||
|
|
||||||
|
+ MCPhysReg getStackPointer() const override { return AArch64::SP; }
|
||||||
|
+
|
||||||
|
bool equals(const MCTargetExpr &A, const MCTargetExpr &B,
|
||||||
|
CompFuncTy Comp) const override {
|
||||||
|
const auto &AArch64ExprA = cast<AArch64MCExpr>(A);
|
||||||
|
@@ -816,6 +818,14 @@ public:
|
||||||
|
|
||||||
|
int getUncondBranchEncodingSize() const override { return 28; }
|
||||||
|
|
||||||
|
+ bool createCall(MCInst &Inst, const MCSymbol *Target,
|
||||||
|
+ MCContext *Ctx) override {
|
||||||
|
+ Inst.setOpcode(AArch64::BL);
|
||||||
|
+ Inst.addOperand(MCOperand::createExpr(
|
||||||
|
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
||||||
|
MCContext *Ctx) override {
|
||||||
|
Inst.setOpcode(AArch64::B);
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/inline-foo.c b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..1307c13f2
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/inline-foo.c
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+#include "stub.h"
|
||||||
|
+
|
||||||
|
+void foo() {
|
||||||
|
+ puts("Hello world!\n");
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/inline-main.c b/bolt/test/AArch64/Inputs/inline-main.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..7853d2b63
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/inline-main.c
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+extern void foo();
|
||||||
|
+int main() {
|
||||||
|
+ foo();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/inlined.cpp b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a6ff9e262
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/inlined.cpp
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+extern "C" int printf(const char*, ...);
|
||||||
|
+extern const char* question();
|
||||||
|
+
|
||||||
|
+inline int answer() __attribute__((always_inline));
|
||||||
|
+inline int answer() { return 42; }
|
||||||
|
+
|
||||||
|
+int main(int argc, char *argv[]) {
|
||||||
|
+ int ans;
|
||||||
|
+ if (argc == 1) {
|
||||||
|
+ ans = 0;
|
||||||
|
+ } else {
|
||||||
|
+ ans = argc;
|
||||||
|
+ }
|
||||||
|
+ printf("%s\n", question());
|
||||||
|
+ for (int i = 0; i < 10; ++i) {
|
||||||
|
+ int x = answer();
|
||||||
|
+ int y = answer();
|
||||||
|
+ ans += x - y;
|
||||||
|
+ }
|
||||||
|
+ // padding to make sure question() is inlineable
|
||||||
|
+ asm("nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;");
|
||||||
|
+ return ans;
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/inlinee.cpp b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..edb7ab145
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/inlinee.cpp
|
||||||
|
@@ -0,0 +1,3 @@
|
||||||
|
+const char* question() {
|
||||||
|
+ return "What do you get if you multiply six by nine?";
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt.cpp b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..cd6d53c35
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt.cpp
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+int g();
|
||||||
|
+
|
||||||
|
+int main() {
|
||||||
|
+ int x = g();
|
||||||
|
+ int y = x*x;
|
||||||
|
+ return y;
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt2.cpp b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..80b853d63
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt2.cpp
|
||||||
|
@@ -0,0 +1,3 @@
|
||||||
|
+int f() {
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/AArch64/Inputs/jmp_opt3.cpp b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..7fb551163
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/Inputs/jmp_opt3.cpp
|
||||||
|
@@ -0,0 +1,3 @@
|
||||||
|
+int f();
|
||||||
|
+
|
||||||
|
+int g() { return f(); }
|
||||||
|
diff --git a/bolt/test/AArch64/inline-debug-info.test b/bolt/test/AArch64/inline-debug-info.test
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..e20e5e31e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/inline-debug-info.test
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+## Check that BOLT correctly prints and updates debug info for inlined
|
||||||
|
+## functions.
|
||||||
|
+
|
||||||
|
+# REQUIRES: system-linux
|
||||||
|
+
|
||||||
|
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
|
||||||
|
+# RUN: -I%p/../Inputs -o %t.exe -Wl,-q
|
||||||
|
+# RUN: llvm-bolt %t.exe --update-debug-sections --print-debug-info \
|
||||||
|
+# RUN: --print-only=main --print-after-lowering --force-inline=foo \
|
||||||
|
+# RUN: -o %t.bolt \
|
||||||
|
+# RUN: | FileCheck %s
|
||||||
|
+
|
||||||
|
+## The call to puts() should come from inline-foo.c:
|
||||||
|
+# CHECK: callq {{.*}} # debug line {{.*}}inline-foo.c:4:3
|
||||||
|
+
|
||||||
|
+# RUN: llvm-objdump --disassemble-symbols=main -d --line-numbers %t.bolt \
|
||||||
|
+# RUN: | FileCheck %s -check-prefix=CHECK-OBJDUMP
|
||||||
|
+
|
||||||
|
+## Dump of main() should include debug info from inline-foo.c after inlining:
|
||||||
|
+# CHECK-OBJDUMP: inline-foo.c:4
|
||||||
|
diff --git a/bolt/test/AArch64/inlined-function-mixed.test b/bolt/test/AArch64/inlined-function-mixed.test
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..5a87bdde9
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/inlined-function-mixed.test
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+# Make sure inlining from a unit with debug info into unit without
|
||||||
|
+# debug info does not cause a crash.
|
||||||
|
+
|
||||||
|
+RUN: %clangxx %cxxflags %S/Inputs/inlined.cpp -c -o %T/inlined.o
|
||||||
|
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
|
||||||
|
+RUN: %clangxx %cxxflags %T/inlined.o %T/inlinee.o -o %t
|
||||||
|
+
|
||||||
|
+RUN: llvm-bolt %t -o %t.bolt --update-debug-sections --reorder-blocks=reverse \
|
||||||
|
+RUN: --inline-small-functions --force-inline=main | FileCheck %s
|
||||||
|
+
|
||||||
|
+CHECK-NOT: BOLT: 0 out of {{.*}} functions were overwritten
|
||||||
|
diff --git a/bolt/test/AArch64/jmp-optimization.test b/bolt/test/AArch64/jmp-optimization.test
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..92f4b9a14
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/jmp-optimization.test
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+# Tests the optimization of functions that just do a tail call in the beginning.
|
||||||
|
+
|
||||||
|
+# This test has commands that rely on shell capabilities that won't execute
|
||||||
|
+# correctly on Windows e.g. unsupported parameter expansion
|
||||||
|
+REQUIRES: shell
|
||||||
|
+
|
||||||
|
+RUN: %clang %cflags -O2 %S/Inputs/jmp_opt{,2,3}.cpp -o %t
|
||||||
|
+RUN: llvm-bolt -inline-small-functions %t -o %t.bolt
|
||||||
|
+RUN: llvm-objdump -d %t.bolt --print-imm-hex | FileCheck %s
|
||||||
|
+
|
||||||
|
+CHECK: <main>:
|
||||||
|
+CHECK-NOT: call
|
||||||
|
+CHECK: xorl %eax, %eax
|
||||||
|
+CHECK: retq
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
170
0004-Bolt-Solving-pie-support-issue.patch
Normal file
170
0004-Bolt-Solving-pie-support-issue.patch
Normal file
@ -0,0 +1,170 @@
|
|||||||
|
From a28084a4adff2340dd02c2c0c42f4997f76b3ffa Mon Sep 17 00:00:00 2001
|
||||||
|
From: rfwang07 <wangrufeng5@huawei.com>
|
||||||
|
Date: Fri, 21 Jun 2024 11:16:44 +0800
|
||||||
|
Subject: [PATCH] [Bolt] Solving pie support issue
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/lib/Core/BinaryContext.cpp | 25 +++++++++++++++++++----
|
||||||
|
bolt/test/perf2bolt/Inputs/perf_test.c | 26 ++++++++++++++++++++++++
|
||||||
|
bolt/test/perf2bolt/Inputs/perf_test.lds | 13 ++++++++++++
|
||||||
|
bolt/test/perf2bolt/lit.local.cfg | 4 ++++
|
||||||
|
bolt/test/perf2bolt/perf_test.test | 17 ++++++++++++++++
|
||||||
|
bolt/unittests/Core/BinaryContext.cpp | 21 +++++++++++++++++++
|
||||||
|
6 files changed, 102 insertions(+), 4 deletions(-)
|
||||||
|
create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.c
|
||||||
|
create mode 100644 bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||||
|
create mode 100644 bolt/test/perf2bolt/lit.local.cfg
|
||||||
|
create mode 100644 bolt/test/perf2bolt/perf_test.test
|
||||||
|
|
||||||
|
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
|
||||||
|
index 2d2b35ee2..ab9f0b844 100644
|
||||||
|
--- a/bolt/lib/Core/BinaryContext.cpp
|
||||||
|
+++ b/bolt/lib/Core/BinaryContext.cpp
|
||||||
|
@@ -1880,10 +1880,27 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress,
|
||||||
|
// Find a segment with a matching file offset.
|
||||||
|
for (auto &KV : SegmentMapInfo) {
|
||||||
|
const SegmentInfo &SegInfo = KV.second;
|
||||||
|
- if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) == FileOffset) {
|
||||||
|
- // Use segment's aligned memory offset to calculate the base address.
|
||||||
|
- const uint64_t MemOffset = alignDown(SegInfo.Address, SegInfo.Alignment);
|
||||||
|
- return MMapAddress - MemOffset;
|
||||||
|
+ // FileOffset is got from perf event,
|
||||||
|
+ // and it is equal to alignDown(SegInfo.FileOffset, pagesize).
|
||||||
|
+ // If the pagesize is not equal to SegInfo.Alignment.
|
||||||
|
+ // FileOffset and SegInfo.FileOffset should be aligned first,
|
||||||
|
+ // and then judge whether they are equal.
|
||||||
|
+ if (alignDown(SegInfo.FileOffset, SegInfo.Alignment) ==
|
||||||
|
+ alignDown(FileOffset, SegInfo.Alignment)) {
|
||||||
|
+ // The function's offset from base address in VAS is aligned by pagesize
|
||||||
|
+ // instead of SegInfo.Alignment. Pagesize can't be got from perf events.
|
||||||
|
+ // However, The ELF document says that SegInfo.FileOffset should equal
|
||||||
|
+ // to SegInfo.Address, modulo the pagesize.
|
||||||
|
+ // Reference: https://refspecs.linuxfoundation.org/elf/elf.pdf
|
||||||
|
+
|
||||||
|
+ // So alignDown(SegInfo.Address, pagesize) can be calculated by:
|
||||||
|
+ // alignDown(SegInfo.Address, pagesize)
|
||||||
|
+ // = SegInfo.Address - (SegInfo.Address % pagesize)
|
||||||
|
+ // = SegInfo.Address - (SegInfo.FileOffset % pagesize)
|
||||||
|
+ // = SegInfo.Address - SegInfo.FileOffset +
|
||||||
|
+ // alignDown(SegInfo.FileOffset, pagesize)
|
||||||
|
+ // = SegInfo.Address - SegInfo.FileOffset + FileOffset
|
||||||
|
+ return MMapAddress - (SegInfo.Address - SegInfo.FileOffset + FileOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.c b/bolt/test/perf2bolt/Inputs/perf_test.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..ff5ecf7a8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/perf2bolt/Inputs/perf_test.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+#include <stdio.h>
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+#include <unistd.h>
|
||||||
|
+
|
||||||
|
+int add(int a, int b) { return a + b; }
|
||||||
|
+int minus(int a, int b) { return a - b; }
|
||||||
|
+int multiple(int a, int b) { return a * b; }
|
||||||
|
+int divide(int a, int b) {
|
||||||
|
+ if (b == 0)
|
||||||
|
+ return 0;
|
||||||
|
+ return a / b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main() {
|
||||||
|
+ int a = 16;
|
||||||
|
+ int b = 8;
|
||||||
|
+
|
||||||
|
+ for (int i = 1; i < 100000; i++) {
|
||||||
|
+ add(a, b);
|
||||||
|
+ minus(a, b);
|
||||||
|
+ multiple(a, b);
|
||||||
|
+ divide(a, b);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/perf2bolt/Inputs/perf_test.lds b/bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..9cb4ebbf1
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/perf2bolt/Inputs/perf_test.lds
|
||||||
|
@@ -0,0 +1,13 @@
|
||||||
|
+SECTIONS {
|
||||||
|
+ . = SIZEOF_HEADERS;
|
||||||
|
+ .interp : { *(.interp) }
|
||||||
|
+ .note.gnu.build-id : { *(.note.gnu.build-id) }
|
||||||
|
+ . = 0x212e8;
|
||||||
|
+ .dynsym : { *(.dynsym) }
|
||||||
|
+ . = 0x31860;
|
||||||
|
+ .text : { *(.text*) }
|
||||||
|
+ . = 0x41c20;
|
||||||
|
+ .fini_array : { *(.fini_array) }
|
||||||
|
+ . = 0x54e18;
|
||||||
|
+ .data : { *(.data) }
|
||||||
|
+}
|
||||||
|
diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..87a96ec34
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/perf2bolt/lit.local.cfg
|
||||||
|
@@ -0,0 +1,4 @@
|
||||||
|
+import shutil
|
||||||
|
+
|
||||||
|
+if shutil.which("perf") != None:
|
||||||
|
+ config.available_features.add("perf")
|
||||||
|
diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..fe6e015ab
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/perf2bolt/perf_test.test
|
||||||
|
@@ -0,0 +1,17 @@
|
||||||
|
+# Check perf2bolt binary function which was compiled with pie
|
||||||
|
+
|
||||||
|
+REQUIRES: system-linux, perf
|
||||||
|
+
|
||||||
|
+RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
|
||||||
|
+RUN: perf record -e cycles:u -o %t2 -- %t
|
||||||
|
+RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
|
||||||
|
+
|
||||||
|
+CHECK-NOT: PERF2BOLT-ERROR
|
||||||
|
+CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||||
|
+
|
||||||
|
+RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||||
|
+RUN: perf record -e cycles:u -o %t5 -- %t4
|
||||||
|
+RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
|
||||||
|
+
|
||||||
|
+CHECK-NO-PIE-NOT: PERF2BOLT-ERROR
|
||||||
|
+CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||||
|
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
index bac264141..5a80cb4a2 100644
|
||||||
|
--- a/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
+++ b/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
@@ -83,3 +83,24 @@ TEST_P(BinaryContextTester, BaseAddress) {
|
||||||
|
BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x137a000);
|
||||||
|
ASSERT_FALSE(BaseAddress.has_value());
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+TEST_P(BinaryContextTester, BaseAddress2) {
|
||||||
|
+ // Check that base address calculation is correct for a binary if the
|
||||||
|
+ // alignment in ELF file are different from pagesize.
|
||||||
|
+ // The segment layout is as follows:
|
||||||
|
+ BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000};
|
||||||
|
+ BC->SegmentMapInfo[0x31860] =
|
||||||
|
+ SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000};
|
||||||
|
+ BC->SegmentMapInfo[0x41c20] =
|
||||||
|
+ SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000};
|
||||||
|
+ BC->SegmentMapInfo[0x54e18] =
|
||||||
|
+ SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000};
|
||||||
|
+
|
||||||
|
+ std::optional<uint64_t> BaseAddress =
|
||||||
|
+ BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000);
|
||||||
|
+ ASSERT_TRUE(BaseAddress.has_value());
|
||||||
|
+ ASSERT_EQ(*BaseAddress, 0xaaaaea413000ULL);
|
||||||
|
+
|
||||||
|
+ BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000);
|
||||||
|
+ ASSERT_FALSE(BaseAddress.has_value());
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.39.2 (Apple Git-143)
|
||||||
|
|
||||||
130
0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
Normal file
130
0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
From 28e7e71251dc4b79c29aa0d4904cb424f9081455 Mon Sep 17 00:00:00 2001
|
||||||
|
From: rfwang07 <wangrufeng5@huawei.com>
|
||||||
|
Date: Fri, 21 Jun 2024 11:23:42 +0800
|
||||||
|
Subject: [PATCH] [BOLT][AArch64] Don't change layout in PatchEntries
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/lib/Passes/PatchEntries.cpp | 11 ++++++++
|
||||||
|
bolt/test/AArch64/patch-entries.s | 36 ++++++++++++++++++++++++
|
||||||
|
bolt/unittests/Core/BinaryContext.cpp | 40 +++++++++++++++++++++++++++
|
||||||
|
3 files changed, 87 insertions(+)
|
||||||
|
create mode 100644 bolt/test/AArch64/patch-entries.s
|
||||||
|
|
||||||
|
diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp
|
||||||
|
index 02a044d8b..ee7512d89 100644
|
||||||
|
--- a/bolt/lib/Passes/PatchEntries.cpp
|
||||||
|
+++ b/bolt/lib/Passes/PatchEntries.cpp
|
||||||
|
@@ -98,6 +98,17 @@ void PatchEntries::runOnFunctions(BinaryContext &BC) {
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!Success) {
|
||||||
|
+ // We can't change output layout for AArch64 due to LongJmp pass
|
||||||
|
+ if (BC.isAArch64()) {
|
||||||
|
+ if (opts::ForcePatch) {
|
||||||
|
+ errs() << "BOLT-ERROR: unable to patch entries in " << Function
|
||||||
|
+ << "\n";
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
// If the original function entries cannot be patched, then we cannot
|
||||||
|
// safely emit new function body.
|
||||||
|
errs() << "BOLT-WARNING: failed to patch entries in " << Function
|
||||||
|
diff --git a/bolt/test/AArch64/patch-entries.s b/bolt/test/AArch64/patch-entries.s
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..cf6f72a0b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/patch-entries.s
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+# This test checks patch entries functionality
|
||||||
|
+
|
||||||
|
+# REQUIRES: system-linux
|
||||||
|
+
|
||||||
|
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
||||||
|
+# RUN: %s -o %t.o
|
||||||
|
+# RUN: %clang %cflags -pie %t.o -o %t.exe -nostdlib -Wl,-q
|
||||||
|
+# RUN: llvm-bolt %t.exe -o %t.bolt --use-old-text=0 --lite=0 --skip-funcs=_start
|
||||||
|
+# RUN: llvm-objdump -dz %t.bolt | FileCheck %s
|
||||||
|
+
|
||||||
|
+# CHECK: <pathedEntries.org.0>:
|
||||||
|
+# CHECK-NEXT: adrp x16, 0x[[#%x,ADRP:]]
|
||||||
|
+# CHECK-NEXT: add x16, x16, #0x[[#%x,ADD:]]
|
||||||
|
+# CHECK-NEXT: br x16
|
||||||
|
+
|
||||||
|
+# CHECK: [[#ADRP + ADD]] <pathedEntries>:
|
||||||
|
+# CHECK-NEXT: [[#ADRP + ADD]]: {{.*}} ret
|
||||||
|
+
|
||||||
|
+.text
|
||||||
|
+.balign 4
|
||||||
|
+.global pathedEntries
|
||||||
|
+.type pathedEntries, %function
|
||||||
|
+pathedEntries:
|
||||||
|
+ .rept 32
|
||||||
|
+ nop
|
||||||
|
+ .endr
|
||||||
|
+ ret
|
||||||
|
+.size pathedEntries, .-pathedEntries
|
||||||
|
+
|
||||||
|
+.global _start
|
||||||
|
+.type _start, %function
|
||||||
|
+_start:
|
||||||
|
+ bl pathedEntries
|
||||||
|
+ .inst 0xdeadbeef
|
||||||
|
+ ret
|
||||||
|
+.size _start, .-_start
|
||||||
|
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
index 5a80cb4a2..7ac1c1435 100644
|
||||||
|
--- a/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
+++ b/bolt/unittests/Core/BinaryContext.cpp
|
||||||
|
@@ -62,6 +62,46 @@ INSTANTIATE_TEST_SUITE_P(X86, BinaryContextTester,
|
||||||
|
INSTANTIATE_TEST_SUITE_P(AArch64, BinaryContextTester,
|
||||||
|
::testing::Values(Triple::aarch64));
|
||||||
|
|
||||||
|
+TEST_P(BinaryContextTester, FlushPendingRelocCALL26) {
|
||||||
|
+ if (GetParam() != Triple::aarch64)
|
||||||
|
+ GTEST_SKIP();
|
||||||
|
+
|
||||||
|
+ // This test checks that encodeValueAArch64 used by flushPendingRelocations
|
||||||
|
+ // returns correctly encoded values for CALL26 relocation for both backward
|
||||||
|
+ // and forward branches.
|
||||||
|
+ //
|
||||||
|
+ // The offsets layout is:
|
||||||
|
+ // 4: func1
|
||||||
|
+ // 8: bl func1
|
||||||
|
+ // 12: bl func2
|
||||||
|
+ // 16: func2
|
||||||
|
+
|
||||||
|
+ char Data[20] = {};
|
||||||
|
+ BinarySection &BS = BC->registerOrUpdateSection(
|
||||||
|
+ ".text", ELF::SHT_PROGBITS, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
|
||||||
|
+ (uint8_t *)Data, sizeof(Data), 4);
|
||||||
|
+ MCSymbol *RelSymbol1 = BC->getOrCreateGlobalSymbol(4, "Func1");
|
||||||
|
+ ASSERT_TRUE(RelSymbol1);
|
||||||
|
+ BS.addRelocation(8, RelSymbol1, ELF::R_AARCH64_CALL26, 0, 0, true);
|
||||||
|
+ MCSymbol *RelSymbol2 = BC->getOrCreateGlobalSymbol(16, "Func2");
|
||||||
|
+ ASSERT_TRUE(RelSymbol2);
|
||||||
|
+ BS.addRelocation(12, RelSymbol2, ELF::R_AARCH64_CALL26, 0, 0, true);
|
||||||
|
+
|
||||||
|
+ std::error_code EC;
|
||||||
|
+ SmallVector<char> Vect(sizeof(Data));
|
||||||
|
+ raw_svector_ostream OS(Vect);
|
||||||
|
+
|
||||||
|
+ BS.flushPendingRelocations(OS, [&](const MCSymbol *S) {
|
||||||
|
+ return S == RelSymbol1 ? 4 : S == RelSymbol2 ? 16 : 0;
|
||||||
|
+ });
|
||||||
|
+
|
||||||
|
+ const uint8_t Func1Call[4] = {255, 255, 255, 151};
|
||||||
|
+ const uint8_t Func2Call[4] = {1, 0, 0, 148};
|
||||||
|
+
|
||||||
|
+ EXPECT_FALSE(memcmp(Func1Call, &Vect[8], 4)) << "Wrong backward call value\n";
|
||||||
|
+ EXPECT_FALSE(memcmp(Func2Call, &Vect[12], 4)) << "Wrong forward call value\n";
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#endif
|
||||||
|
|
||||||
|
TEST_P(BinaryContextTester, BaseAddress) {
|
||||||
|
--
|
||||||
|
2.39.2 (Apple Git-143)
|
||||||
|
|
||||||
1820
0006-AArch64-Add-CFG-block-count-correction-optimization.patch
Normal file
1820
0006-AArch64-Add-CFG-block-count-correction-optimization.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,96 @@
|
|||||||
|
From 6c8933e1a095028d648a5a26aecee0f569304dd0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: sinan <sinan.lin@linux.alibaba.com>
|
||||||
|
Date: Wed, 7 Aug 2024 18:02:42 +0800
|
||||||
|
Subject: [PATCH] [BOLT] Skip PLT search for zero-value weak reference symbols
|
||||||
|
(#69136)
|
||||||
|
|
||||||
|
Take a common weak reference pattern for example
|
||||||
|
```
|
||||||
|
__attribute__((weak)) void undef_weak_fun();
|
||||||
|
|
||||||
|
if (&undef_weak_fun)
|
||||||
|
undef_weak_fun();
|
||||||
|
```
|
||||||
|
|
||||||
|
In this case, an undefined weak symbol `undef_weak_fun` has an address
|
||||||
|
of zero, and Bolt incorrectly changes the relocation for the
|
||||||
|
corresponding symbol to symbol@PLT, leading to incorrect runtime
|
||||||
|
behavior.
|
||||||
|
---
|
||||||
|
bolt/lib/Rewrite/RewriteInstance.cpp | 11 +++++-
|
||||||
|
.../AArch64/update-weak-reference-symbol.s | 34 +++++++++++++++++++
|
||||||
|
2 files changed, 44 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 bolt/test/AArch64/update-weak-reference-symbol.s
|
||||||
|
|
||||||
|
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
index 78b4889bf2ae..d2e2ca2f7553 100644
|
||||||
|
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
|
||||||
|
@@ -2143,6 +2143,14 @@ bool RewriteInstance::analyzeRelocation(
|
||||||
|
if (!Relocation::isSupported(RType))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
+ auto IsWeakReference = [](const SymbolRef &Symbol) {
|
||||||
|
+ Expected<uint32_t> SymFlagsOrErr = Symbol.getFlags();
|
||||||
|
+ if (!SymFlagsOrErr)
|
||||||
|
+ return false;
|
||||||
|
+ return (*SymFlagsOrErr & SymbolRef::SF_Undefined) &&
|
||||||
|
+ (*SymFlagsOrErr & SymbolRef::SF_Weak);
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
const bool IsAArch64 = BC->isAArch64();
|
||||||
|
|
||||||
|
const size_t RelSize = Relocation::getSizeForType(RType);
|
||||||
|
@@ -2174,7 +2182,8 @@ bool RewriteInstance::analyzeRelocation(
|
||||||
|
// Section symbols are marked as ST_Debug.
|
||||||
|
IsSectionRelocation = (cantFail(Symbol.getType()) == SymbolRef::ST_Debug);
|
||||||
|
// Check for PLT entry registered with symbol name
|
||||||
|
- if (!SymbolAddress && (IsAArch64 || BC->isRISCV())) {
|
||||||
|
+ if (!SymbolAddress && !IsWeakReference(Symbol) &&
|
||||||
|
+ (IsAArch64 || BC->isRISCV())) {
|
||||||
|
const BinaryData *BD = BC->getPLTBinaryDataByName(SymbolName);
|
||||||
|
SymbolAddress = BD ? BD->getAddress() : 0;
|
||||||
|
}
|
||||||
|
diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000000..600a06b8b6d8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/bolt/test/AArch64/update-weak-reference-symbol.s
|
||||||
|
@@ -0,0 +1,34 @@
|
||||||
|
+// This test checks whether BOLT can correctly handle relocations against weak symbols.
|
||||||
|
+
|
||||||
|
+// RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so
|
||||||
|
+// RUN: llvm-bolt %t.so -o %t.so.bolt
|
||||||
|
+// RUN: llvm-nm -n %t.so.bolt > %t.out.txt
|
||||||
|
+// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt
|
||||||
|
+// RUN: FileCheck %s --input-file=%t.out.txt
|
||||||
|
+
|
||||||
|
+# CHECK: w func_1
|
||||||
|
+# CHECK: {{0+}}[[#%x,ADDR:]] W func_2
|
||||||
|
+
|
||||||
|
+# CHECK: {{.*}} <.rodata>:
|
||||||
|
+# CHECK-NEXT: {{.*}} .word 0x00000000
|
||||||
|
+# CHECK-NEXT: {{.*}} .word 0x00000000
|
||||||
|
+# CHECK-NEXT: {{.*}} .word 0x{{[0]+}}[[#ADDR]]
|
||||||
|
+# CHECK-NEXT: {{.*}} .word 0x00000000
|
||||||
|
+
|
||||||
|
+ .text
|
||||||
|
+ .weak func_2
|
||||||
|
+ .weak func_1
|
||||||
|
+ .global wow
|
||||||
|
+ .type wow, %function
|
||||||
|
+wow:
|
||||||
|
+ bl func_1
|
||||||
|
+ bl func_2
|
||||||
|
+ ret
|
||||||
|
+ .type func_2, %function
|
||||||
|
+func_2:
|
||||||
|
+ ret
|
||||||
|
+ .section .rodata
|
||||||
|
+.LC0:
|
||||||
|
+ .xword func_1
|
||||||
|
+.LC1:
|
||||||
|
+ .xword func_2
|
||||||
|
--
|
||||||
|
2.39.3 (Apple Git-146)
|
||||||
|
|
||||||
58
0008-merge-fdata-Support-process-no_lbr-profile-file.patch
Normal file
58
0008-merge-fdata-Support-process-no_lbr-profile-file.patch
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
From 583d41ce046670eae7a59fb678a9e959cf0af061 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liyancheng <412998149@qq.com>
|
||||||
|
Date: Tue, 10 Sep 2024 15:09:51 +0800
|
||||||
|
Subject: [PATCH] [merge-fdata] Support processing no_lbr profile file
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/tools/merge-fdata/merge-fdata.cpp | 21 ++++++++++++++++++++-
|
||||||
|
1 file changed, 20 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp
|
||||||
|
index 757f05366..147e18639 100644
|
||||||
|
--- a/bolt/tools/merge-fdata/merge-fdata.cpp
|
||||||
|
+++ b/bolt/tools/merge-fdata/merge-fdata.cpp
|
||||||
|
@@ -261,6 +261,7 @@ bool isYAML(const StringRef Filename) {
|
||||||
|
void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
|
||||||
|
errs() << "Using legacy profile format.\n";
|
||||||
|
std::optional<bool> BoltedCollection;
|
||||||
|
+ std::optional<bool> NoLBRMode;
|
||||||
|
std::mutex BoltedCollectionMutex;
|
||||||
|
typedef StringMap<uint64_t> ProfileTy;
|
||||||
|
|
||||||
|
@@ -294,6 +295,22 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
|
||||||
|
BoltedCollection = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ // Check if the string "no_lbr" is in the first line
|
||||||
|
+ if (Buf.startswith("no_lbr")) {
|
||||||
|
+ if (!NoLBRMode.value_or(true))
|
||||||
|
+ report_error(
|
||||||
|
+ Filename,
|
||||||
|
+ "cannot mix profile collected with lbr and non-lbr info");
|
||||||
|
+ NoLBRMode = true;
|
||||||
|
+ Buf = Buf.drop_front(Buf.find_first_of("\n"));
|
||||||
|
+ } else {
|
||||||
|
+ if (NoLBRMode.value_or(false))
|
||||||
|
+ report_error(
|
||||||
|
+ Filename,
|
||||||
|
+ "cannot mix profile collected with lbr and non-lbr info");
|
||||||
|
+ NoLBRMode = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
Profile = &Profiles[tid];
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -329,7 +346,9 @@ void mergeLegacyProfiles(const SmallVectorImpl<std::string> &Filenames) {
|
||||||
|
MergedProfile.insert_or_assign(Key, Count);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (BoltedCollection)
|
||||||
|
+ if (NoLBRMode)
|
||||||
|
+ output() << "no_lbr cycles:u:\n";
|
||||||
|
+ else if (BoltedCollection)
|
||||||
|
output() << "boltedcollection\n";
|
||||||
|
for (const auto &[Key, Value] : MergedProfile)
|
||||||
|
output() << Key << " " << Value << "\n";
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
2630
0009-support-aarch64-instrumentation.patch
Normal file
2630
0009-support-aarch64-instrumentation.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,74 @@
|
|||||||
|
From 43aa1ec5b46baf032cf2fee22d765a195d40cf59 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?=E7=89=9F=E6=96=87=E9=BE=99?= <muwl182@163.com>
|
||||||
|
Date: Mon, 18 Nov 2024 02:13:25 +0000
|
||||||
|
Subject: [PATCH] [AArch64] Add hybrid guess approach for edge weight
|
||||||
|
estimation
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/lib/Passes/MCF.cpp | 33 +++++++++++++++++++++++++++++++--
|
||||||
|
1 file changed, 31 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp
|
||||||
|
index c3898d2dc..a6455bbeb 100644
|
||||||
|
--- a/bolt/lib/Passes/MCF.cpp
|
||||||
|
+++ b/bolt/lib/Passes/MCF.cpp
|
||||||
|
@@ -36,6 +36,11 @@ static cl::opt<bool> IterativeGuess(
|
||||||
|
cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
|
||||||
|
cl::Hidden, cl::cat(BoltOptCategory));
|
||||||
|
|
||||||
|
+static cl::opt<bool> HybridGuess(
|
||||||
|
+ "hybrid-guess",
|
||||||
|
+ cl::desc("in non-LBR mode, guess edge counts using hybird estimation technique"),
|
||||||
|
+ cl::Hidden, cl::cat(BoltOptCategory));
|
||||||
|
+
|
||||||
|
static cl::opt<bool> UseRArcs(
|
||||||
|
"mcf-use-rarcs",
|
||||||
|
cl::desc("in MCF, consider the possibility of cancelling flow to balance "
|
||||||
|
@@ -350,6 +355,27 @@ void guessEdgeByIterativeApproach(BinaryFunction &BF) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+void guessEdgeByHybridApproach(BinaryFunction &BF,
|
||||||
|
+ EdgeWeightMap &PredEdgeWeights,
|
||||||
|
+ EdgeWeightMap &SuccEdgeWeights) {
|
||||||
|
+ for (BinaryBasicBlock &BB : BF) {
|
||||||
|
+ for (BinaryBasicBlock *Pred : BB.predecessors()) {
|
||||||
|
+ double RelativeExecSucc = SuccEdgeWeights[std::make_pair(Pred, &BB)];
|
||||||
|
+ double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
|
||||||
|
+ RelativeExec *= BB.getExecutionCount();
|
||||||
|
+ RelativeExecSucc *= Pred->getExecutionCount();
|
||||||
|
+ BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
|
||||||
|
+ if ((static_cast<uint64_t>(RelativeExec) != 0) && (static_cast<uint64_t>(RelativeExecSucc) != 0)) {
|
||||||
|
+ BI.Count = (static_cast<uint64_t>(RelativeExec) + RelativeExecSucc) / 2;
|
||||||
|
+ } else if (static_cast<uint64_t>(RelativeExec) != 0) {
|
||||||
|
+ BI.Count = static_cast<uint64_t>(RelativeExec);
|
||||||
|
+ } else if (static_cast<uint64_t>(RelativeExecSucc) != 0) {
|
||||||
|
+ BI.Count = static_cast<uint64_t>(RelativeExecSucc);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/// Associate each basic block with the BinaryLoop object corresponding to the
|
||||||
|
/// innermost loop containing this block.
|
||||||
|
DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
|
||||||
|
@@ -454,11 +480,14 @@ void estimateEdgeCounts(BinaryFunction &BF) {
|
||||||
|
equalizeBBCounts(Info, BF);
|
||||||
|
LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts"));
|
||||||
|
}
|
||||||
|
- if (opts::IterativeGuess)
|
||||||
|
+ if (opts::IterativeGuess) {
|
||||||
|
guessEdgeByIterativeApproach(BF);
|
||||||
|
- else
|
||||||
|
+ } else if (opts::HybridGuess) {
|
||||||
|
+ guessEdgeByHybridApproach(BF, PredEdgeWeights, SuccEdgeWeights);
|
||||||
|
+ } else {
|
||||||
|
guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
|
||||||
|
SuccEdgeWeights);
|
||||||
|
+ }
|
||||||
|
recalculateBBCounts(BF, /*AllEdges=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.25.1
|
||||||
|
|
||||||
226
0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
Normal file
226
0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
From 525a2d44443547c0349198df18286f594d62d557 Mon Sep 17 00:00:00 2001
|
||||||
|
From: rfwang07 <wangrufeng5@huawei.com>
|
||||||
|
Date: Tue, 19 Nov 2024 09:48:40 +0800
|
||||||
|
Subject: [PATCH] support D-FOT addrs data parsing for optimized binary
|
||||||
|
|
||||||
|
---
|
||||||
|
bolt/include/bolt/Profile/DataAggregator.h | 31 ++++++++
|
||||||
|
bolt/lib/Profile/DataAggregator.cpp | 86 +++++++++++++++++++++-
|
||||||
|
2 files changed, 113 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
|
||||||
|
index cc237a6..d352f1b 100644
|
||||||
|
--- a/bolt/include/bolt/Profile/DataAggregator.h
|
||||||
|
+++ b/bolt/include/bolt/Profile/DataAggregator.h
|
||||||
|
@@ -102,6 +102,12 @@ private:
|
||||||
|
Type EntryType;
|
||||||
|
};
|
||||||
|
|
||||||
|
+ /// Used for parsing specific libkperf input files.
|
||||||
|
+ struct LibkperfDataEntry {
|
||||||
|
+ uint64_t Addr;
|
||||||
|
+ uint64_t Count;
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
struct Trace {
|
||||||
|
uint64_t From;
|
||||||
|
uint64_t To;
|
||||||
|
@@ -300,6 +306,9 @@ private:
|
||||||
|
/// Parse pre-aggregated LBR samples created by an external tool
|
||||||
|
ErrorOr<AggregatedLBREntry> parseAggregatedLBREntry();
|
||||||
|
|
||||||
|
+ /// Parse libkperf samples created by D-FOT
|
||||||
|
+ ErrorOr<LibkperfDataEntry> parseLibkperfDataEntry();
|
||||||
|
+
|
||||||
|
/// Parse either buildid:offset or just offset, representing a location in the
|
||||||
|
/// binary. Used exclusevely for pre-aggregated LBR samples.
|
||||||
|
ErrorOr<Location> parseLocationOrOffset();
|
||||||
|
@@ -417,10 +426,32 @@ private:
|
||||||
|
/// B 4b196f 4b19e0 2 0
|
||||||
|
void parsePreAggregated();
|
||||||
|
|
||||||
|
+ /// Coordinate reading and parsing of libkperf file
|
||||||
|
+ /// The regular perf2bolt aggregation job is to read perf output directly.
|
||||||
|
+ /// But in the oeaware framework, sampling is done by libkperf.
|
||||||
|
+ /// For data collected by sampling the BOLT-optimized binary,
|
||||||
|
+ /// oeaware can export addrs and counts.
|
||||||
|
+ /// In perf2bolt, with the help of the BAT section,
|
||||||
|
+ /// this data is converted to profile that is usable for the original binary.
|
||||||
|
+ ///
|
||||||
|
+ /// File format syntax:
|
||||||
|
+ /// - first line: <event type>
|
||||||
|
+ /// - the other lines: <addr> <count>
|
||||||
|
+ ///
|
||||||
|
+ /// Example:
|
||||||
|
+ /// cycles
|
||||||
|
+ /// 40f544 1
|
||||||
|
+ /// 40f750 2
|
||||||
|
+ /// 40f810 53
|
||||||
|
+ void parseLibkperfFile();
|
||||||
|
+
|
||||||
|
/// Parse the full output of pre-aggregated LBR samples generated by
|
||||||
|
/// an external tool.
|
||||||
|
std::error_code parsePreAggregatedLBRSamples();
|
||||||
|
|
||||||
|
+ /// Parse the libkperf samples
|
||||||
|
+ std::error_code parseLibkperfSamples();
|
||||||
|
+
|
||||||
|
/// Process parsed pre-aggregated data.
|
||||||
|
void processPreAggregated();
|
||||||
|
|
||||||
|
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
|
||||||
|
index 24dbe34..509e7c9 100644
|
||||||
|
--- a/bolt/lib/Profile/DataAggregator.cpp
|
||||||
|
+++ b/bolt/lib/Profile/DataAggregator.cpp
|
||||||
|
@@ -85,6 +85,11 @@ cl::opt<bool> ReadPreAggregated(
|
||||||
|
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
|
||||||
|
cl::cat(AggregatorCategory));
|
||||||
|
|
||||||
|
+cl::opt<bool> ReadLibkperfFile(
|
||||||
|
+ "libkperf", cl::desc("skip perf and read data from a libkperf file format, "
|
||||||
|
+ "only for continuous optimizing with BAT"),
|
||||||
|
+ cl::cat(AggregatorCategory));
|
||||||
|
+
|
||||||
|
static cl::opt<bool>
|
||||||
|
TimeAggregator("time-aggr",
|
||||||
|
cl::desc("time BOLT aggregator"),
|
||||||
|
@@ -157,8 +162,8 @@ void DataAggregator::findPerfExecutable() {
|
||||||
|
void DataAggregator::start() {
|
||||||
|
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
|
||||||
|
|
||||||
|
- // Don't launch perf for pre-aggregated files
|
||||||
|
- if (opts::ReadPreAggregated)
|
||||||
|
+ // Don't launch perf for pre-aggregated files and libkperf files
|
||||||
|
+ if (opts::ReadPreAggregated || opts::ReadLibkperfFile)
|
||||||
|
return;
|
||||||
|
|
||||||
|
findPerfExecutable();
|
||||||
|
@@ -193,7 +198,7 @@ void DataAggregator::start() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void DataAggregator::abort() {
|
||||||
|
- if (opts::ReadPreAggregated)
|
||||||
|
+ if (opts::ReadPreAggregated || opts::ReadLibkperfFile)
|
||||||
|
return;
|
||||||
|
|
||||||
|
std::string Error;
|
||||||
|
@@ -313,6 +318,8 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
|
||||||
|
bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
|
||||||
|
if (opts::ReadPreAggregated)
|
||||||
|
return true;
|
||||||
|
+ if (opts::ReadLibkperfFile)
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
|
||||||
|
if (!FD) {
|
||||||
|
@@ -359,6 +366,27 @@ void DataAggregator::parsePreAggregated() {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+void DataAggregator::parseLibkperfFile() {
|
||||||
|
+ std::string Error;
|
||||||
|
+
|
||||||
|
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||||
|
+ MemoryBuffer::getFileOrSTDIN(Filename);
|
||||||
|
+ if (std::error_code EC = MB.getError()) {
|
||||||
|
+ errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
|
||||||
|
+ << EC.message() << "\n";
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ FileBuf = std::move(*MB);
|
||||||
|
+ ParsingBuf = FileBuf->getBuffer();
|
||||||
|
+ Col = 0;
|
||||||
|
+ Line = 0;
|
||||||
|
+ if (parseLibkperfSamples()) {
|
||||||
|
+ errs() << "PERF2BOLT: failed to parse libkperf samples\n";
|
||||||
|
+ exit(1);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
|
||||||
|
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
|
||||||
|
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
|
||||||
|
@@ -502,6 +530,11 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
|
||||||
|
return Error::success();
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (opts::ReadLibkperfFile) {
|
||||||
|
+ parseLibkperfFile();
|
||||||
|
+ return Error::success();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
|
||||||
|
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
|
||||||
|
processFileBuildID(*FileBuildID);
|
||||||
|
@@ -608,7 +641,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
|
||||||
|
void DataAggregator::processProfile(BinaryContext &BC) {
|
||||||
|
if (opts::ReadPreAggregated)
|
||||||
|
processPreAggregated();
|
||||||
|
- else if (opts::BasicAggregation)
|
||||||
|
+ else if (opts::BasicAggregation || opts::ReadLibkperfFile)
|
||||||
|
processBasicEvents();
|
||||||
|
else
|
||||||
|
processBranchEvents();
|
||||||
|
@@ -1206,6 +1239,28 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
|
||||||
|
return Location(true, BuildID.get(), Offset.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
+ErrorOr<DataAggregator::LibkperfDataEntry>
|
||||||
|
+DataAggregator::parseLibkperfDataEntry() {
|
||||||
|
+ // <hex addr> <count>
|
||||||
|
+ while (checkAndConsumeFS()) {
|
||||||
|
+ }
|
||||||
|
+ ErrorOr<uint64_t> Addr = parseHexField(FieldSeparator);
|
||||||
|
+ if (std::error_code EC = Addr.getError())
|
||||||
|
+ return EC;
|
||||||
|
+ while (checkAndConsumeFS()) {
|
||||||
|
+ }
|
||||||
|
+ ErrorOr<uint64_t> Count = parseNumberField(FieldSeparator, true);
|
||||||
|
+ if (std::error_code EC = Count.getError())
|
||||||
|
+ return EC;
|
||||||
|
+
|
||||||
|
+ if (!checkAndConsumeNewLine()) {
|
||||||
|
+ reportError("expected end of line");
|
||||||
|
+ return make_error_code(llvm::errc::io_error);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return LibkperfDataEntry{Addr.get(), Count.get()};
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
ErrorOr<DataAggregator::AggregatedLBREntry>
|
||||||
|
DataAggregator::parseAggregatedLBREntry() {
|
||||||
|
while (checkAndConsumeFS()) {
|
||||||
|
@@ -1712,6 +1767,29 @@ void DataAggregator::processMemEvents() {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+std::error_code DataAggregator::parseLibkperfSamples() {
|
||||||
|
+ outs() << "PERF2BOLT: parsing libkperf data...\n";
|
||||||
|
+ NamedRegionTimer T("parseLibkperfData", "Parsing libkperf data",
|
||||||
|
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
|
||||||
|
+ bool FirstLine = true;
|
||||||
|
+ while (hasData()) {
|
||||||
|
+ if (FirstLine) {
|
||||||
|
+ ErrorOr<StringRef> Event = parseString('\n');
|
||||||
|
+ if (std::error_code EC = Event.getError())
|
||||||
|
+ return EC;
|
||||||
|
+ EventNames.insert(Event.get());
|
||||||
|
+ FirstLine = false;
|
||||||
|
+ }
|
||||||
|
+ ErrorOr<LibkperfDataEntry> KperfEntry = parseLibkperfDataEntry();
|
||||||
|
+ if (std::error_code EC = KperfEntry.getError())
|
||||||
|
+ return EC;
|
||||||
|
+
|
||||||
|
+ BasicSamples[KperfEntry->Addr] += KperfEntry->Count;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return std::error_code();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
|
||||||
|
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
|
||||||
|
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
|
||||||
|
--
|
||||||
|
2.39.5 (Apple Git-154)
|
||||||
|
|
||||||
@ -22,7 +22,7 @@
|
|||||||
|
|
||||||
Name: %{pkg_name}
|
Name: %{pkg_name}
|
||||||
Version: %{bolt_version}
|
Version: %{bolt_version}
|
||||||
Release: 1
|
Release: 2
|
||||||
Summary: BOLT is a post-link optimizer developed to speed up large applications
|
Summary: BOLT is a post-link optimizer developed to speed up large applications
|
||||||
License: Apache 2.0
|
License: Apache 2.0
|
||||||
URL: https://github.com/llvm/llvm-project/tree/main/bolt
|
URL: https://github.com/llvm/llvm-project/tree/main/bolt
|
||||||
@ -30,10 +30,17 @@ URL: https://github.com/llvm/llvm-project/tree/main/bolt
|
|||||||
Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz
|
Source0: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz
|
||||||
Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz.sig
|
Source1: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{bolt_version}/%{bolt_srcdir}.tar.xz.sig
|
||||||
|
|
||||||
# BOLT is not respecting the component split of LLVM and requires some private
|
Patch1: 0001-Fix-trap-value-for-non-X86.patch
|
||||||
# headers in order to compile itself. Try to disable as much libraries as
|
Patch2: 0002-Add-test-for-emitting-trap-value.patch
|
||||||
# possible in order to reduce build time.
|
Patch3: 0003-AArch64-Add-AArch64-support-for-inline.patch
|
||||||
#Patch0: rm-llvm-libs.diff
|
Patch4: 0004-Bolt-Solving-pie-support-issue.patch
|
||||||
|
Patch5: 0005-BOLT-AArch64-Don-t-change-layout-in-PatchEntries.patch
|
||||||
|
Patch6: 0006-AArch64-Add-CFG-block-count-correction-optimization.patch
|
||||||
|
Patch7: 0007-BOLT-Skip-PLT-search-for-zero-value-weak-reference-symbols.patch
|
||||||
|
Patch8: 0008-merge-fdata-Support-process-no_lbr-profile-file.patch
|
||||||
|
Patch9: 0009-support-aarch64-instrumentation.patch
|
||||||
|
Patch10: 0010-AArch64-Add-hybrid-guess-approach-for-edge-weight-estimation.patch
|
||||||
|
Patch11: 0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch
|
||||||
|
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
BuildRequires: gcc-c++
|
BuildRequires: gcc-c++
|
||||||
@ -84,7 +91,6 @@ Documentation for the BOLT optimizer
|
|||||||
-DLLVM_TARGETS_TO_BUILD="AArch64"
|
-DLLVM_TARGETS_TO_BUILD="AArch64"
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
|
||||||
# Set LD_LIBRARY_PATH now because we skip rpath generation and the build uses
|
# Set LD_LIBRARY_PATH now because we skip rpath generation and the build uses
|
||||||
# some just built libraries.
|
# some just built libraries.
|
||||||
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}
|
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}
|
||||||
@ -104,6 +110,9 @@ find %{buildroot}%{install_prefix} \
|
|||||||
! -name "libbolt_rt_instr.a" \
|
! -name "libbolt_rt_instr.a" \
|
||||||
-type f,l -exec rm -f '{}' \;
|
-type f,l -exec rm -f '{}' \;
|
||||||
|
|
||||||
|
%ifarch aarch64
|
||||||
|
find %{buildroot}%{install_prefix} -name "libbolt_rt_hugify.a" -type f,l -exec rm -f '{}' \;
|
||||||
|
%endif
|
||||||
|
|
||||||
# Remove files installed during the build phase.
|
# Remove files installed during the build phase.
|
||||||
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
||||||
@ -120,7 +129,7 @@ mv bolt/README.md bolt/docs/*.md %{buildroot}%{install_docdir}
|
|||||||
rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp
|
rm bolt/test/cache+-deprecated.test bolt/test/bolt-icf.test bolt/test/R_ABS.pic.lld.cpp
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}//%{_vpath_builddir}/%{_lib}
|
export LD_LIBRARY_PATH=%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}
|
||||||
export DESTDIR=%{buildroot}
|
export DESTDIR=%{buildroot}
|
||||||
%ninja_build check-bolt
|
%ninja_build check-bolt
|
||||||
|
|
||||||
@ -136,9 +145,9 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
|||||||
%{install_bindir}/perf2bolt
|
%{install_bindir}/perf2bolt
|
||||||
%{install_bindir}/llvm-bolt-heatmap
|
%{install_bindir}/llvm-bolt-heatmap
|
||||||
|
|
||||||
|
%{install_libdir}/libbolt_rt_instr.a
|
||||||
%ifarch x86_64
|
%ifarch x86_64
|
||||||
%{install_libdir}/libbolt_rt_hugify.a
|
%{install_libdir}/libbolt_rt_hugify.a
|
||||||
%{install_libdir}/libbolt_rt_instr.a
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%exclude %{_builddir}/%{bolt_srcdir}/lib/*
|
%exclude %{_builddir}/%{bolt_srcdir}/lib/*
|
||||||
@ -146,8 +155,13 @@ rm -f %{buildroot}/%{_builddir}/%{bolt_srcdir}/%{_vpath_builddir}/%{_lib}/lib*.a
|
|||||||
%files doc
|
%files doc
|
||||||
%doc %{install_docdir}
|
%doc %{install_docdir}
|
||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Nov 21 2024 rfwang07 <wangrufeng5@huawei.com> 17.0.6-2
|
||||||
|
- Type:backport
|
||||||
|
- ID:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC: Sync patch from 2203sp4
|
||||||
|
|
||||||
* Mon Dec 4 2023 zhoujing <zhoujing106@huawei.com> 17.0.6-1
|
* Mon Dec 4 2023 zhoujing <zhoujing106@huawei.com> 17.0.6-1
|
||||||
- Update to 17.0.6
|
- Update to 17.0.6
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user