2631 lines
94 KiB
Diff
2631 lines
94 KiB
Diff
From a7d826d3985dd886523df050949f1c3c151df636 Mon Sep 17 00:00:00 2001
|
|
From: rfwang07 <wangrufeng5@huawei.com>
|
|
Date: Thu, 31 Oct 2024 15:34:10 +0800
|
|
Subject: [PATCH] support aarch64 instrumentation
|
|
|
|
---
|
|
bolt/CMakeLists.txt | 6 +-
|
|
bolt/include/bolt/Core/MCPlusBuilder.h | 24 +-
|
|
bolt/lib/Core/BinaryFunction.cpp | 6 +
|
|
bolt/lib/Passes/Instrumentation.cpp | 28 +-
|
|
bolt/lib/Passes/MCF.cpp | 1 +
|
|
bolt/lib/Passes/TailDuplication.cpp | 2 +-
|
|
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 446 +++++++++++++++++-
|
|
bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 67 +--
|
|
bolt/runtime/CMakeLists.txt | 12 +-
|
|
bolt/runtime/common.h | 417 ++--------------
|
|
bolt/runtime/instr.cpp | 61 ++-
|
|
bolt/runtime/sys_aarch64.h | 394 ++++++++++++++++
|
|
bolt/runtime/sys_x86_64.h | 360 ++++++++++++++
|
|
bolt/test/AArch64/exclusive-instrument.s | 39 ++
|
|
bolt/test/X86/asm-dump.c | 5 +-
|
|
...olt-address-translation-internal-call.test | 9 +-
|
|
.../test/X86/instrumentation-eh_frame_hdr.cpp | 2 +-
|
|
bolt/test/X86/internal-call-instrument.s | 24 +-
|
|
bolt/test/X86/tail-duplication-pass.s | 9 +
|
|
bolt/test/assume-abi.test | 7 +
|
|
.../AArch64/Inputs/basic-instrumentation.s | 9 +
|
|
.../AArch64/basic-instrumentation.test | 22 +
|
|
.../AArch64/instrumentation-ind-call.c | 38 ++
|
|
.../{X86 => }/Inputs/exceptions_split.cpp | 16 +-
|
|
.../runtime/X86/instrumentation-tail-call.s | 6 +-
|
|
.../{X86 => }/exceptions-instrumentation.test | 0
|
|
.../{X86 => }/pie-exceptions-split.test | 4 +-
|
|
27 files changed, 1545 insertions(+), 469 deletions(-)
|
|
create mode 100644 bolt/runtime/sys_aarch64.h
|
|
create mode 100644 bolt/runtime/sys_x86_64.h
|
|
create mode 100644 bolt/test/AArch64/exclusive-instrument.s
|
|
create mode 100644 bolt/test/assume-abi.test
|
|
create mode 100644 bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s
|
|
create mode 100644 bolt/test/runtime/AArch64/basic-instrumentation.test
|
|
create mode 100644 bolt/test/runtime/AArch64/instrumentation-ind-call.c
|
|
rename bolt/test/runtime/{X86 => }/Inputs/exceptions_split.cpp (85%)
|
|
rename bolt/test/runtime/{X86 => }/exceptions-instrumentation.test (100%)
|
|
rename bolt/test/runtime/{X86 => }/pie-exceptions-split.test (95%)
|
|
|
|
diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
|
|
index 4ff90c1..89462f8 100644
|
|
--- a/bolt/CMakeLists.txt
|
|
+++ b/bolt/CMakeLists.txt
|
|
@@ -32,10 +32,10 @@ foreach (tgt ${BOLT_TARGETS_TO_BUILD})
|
|
endforeach()
|
|
|
|
set(BOLT_ENABLE_RUNTIME_default OFF)
|
|
-if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"
|
|
+if ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64"
|
|
+ OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
|
AND (CMAKE_SYSTEM_NAME STREQUAL "Linux"
|
|
- OR CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
|
- AND "X86" IN_LIST BOLT_TARGETS_TO_BUILD)
|
|
+ OR CMAKE_SYSTEM_NAME STREQUAL "Darwin"))
|
|
set(BOLT_ENABLE_RUNTIME_default ON)
|
|
endif()
|
|
option(BOLT_ENABLE_RUNTIME "Enable BOLT runtime" ${BOLT_ENABLE_RUNTIME_default})
|
|
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
index beb0675..e6945c9 100644
|
|
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
|
|
@@ -498,9 +498,9 @@ public:
|
|
}
|
|
|
|
/// Create increment contents of target by 1 for Instrumentation
|
|
- virtual InstructionListType createInstrIncMemory(const MCSymbol *Target,
|
|
- MCContext *Ctx,
|
|
- bool IsLeaf) const {
|
|
+ virtual InstructionListType
|
|
+ createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
|
|
+ unsigned CodePointerSize) const {
|
|
llvm_unreachable("not implemented");
|
|
return InstructionListType();
|
|
}
|
|
@@ -620,6 +620,11 @@ public:
|
|
return false;
|
|
}
|
|
|
|
+ virtual bool isAArch64Exclusive(const MCInst &Inst) const {
|
|
+ llvm_unreachable("not implemented");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
virtual bool isCleanRegXOR(const MCInst &Inst) const {
|
|
llvm_unreachable("not implemented");
|
|
return false;
|
|
@@ -1597,18 +1602,11 @@ public:
|
|
return false;
|
|
}
|
|
|
|
- virtual void createLoadImmediate(MCInst &Inst, const MCPhysReg Dest,
|
|
- uint32_t Imm) const {
|
|
+ virtual InstructionListType createLoadImmediate(const MCPhysReg Dest,
|
|
+ uint64_t Imm) const {
|
|
llvm_unreachable("not implemented");
|
|
}
|
|
|
|
- /// Create instruction to increment contents of target by 1
|
|
- virtual bool createIncMemory(MCInst &Inst, const MCSymbol *Target,
|
|
- MCContext *Ctx) const {
|
|
- llvm_unreachable("not implemented");
|
|
- return false;
|
|
- }
|
|
-
|
|
/// Create a fragment of code (sequence of instructions) that load a 32-bit
|
|
/// address from memory, zero-extends it to 64 and jump to it (indirect jump).
|
|
virtual bool
|
|
@@ -1969,7 +1967,7 @@ public:
|
|
}
|
|
|
|
virtual InstructionListType createSymbolTrampoline(const MCSymbol *TgtSym,
|
|
- MCContext *Ctx) const {
|
|
+ MCContext *Ctx) {
|
|
llvm_unreachable("not implemented");
|
|
return InstructionListType();
|
|
}
|
|
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
|
|
index 5b44a76..b79bd58 100644
|
|
--- a/bolt/lib/Core/BinaryFunction.cpp
|
|
+++ b/bolt/lib/Core/BinaryFunction.cpp
|
|
@@ -2305,6 +2305,12 @@ void BinaryFunction::removeConditionalTailCalls() {
|
|
|
|
// This branch is no longer a conditional tail call.
|
|
BC.MIB->unsetConditionalTailCall(*CTCInstr);
|
|
+
|
|
+ // Move offset from CTCInstr to TailCallInstr.
|
|
+ if (std::optional<uint32_t> Offset = BC.MIB->getOffset(*CTCInstr)) {
|
|
+ BC.MIB->setOffset(TailCallInstr, *Offset);
|
|
+ BC.MIB->clearOffset(*CTCInstr);
|
|
+ }
|
|
}
|
|
|
|
insertBasicBlocks(std::prev(end()), std::move(NewBlocks),
|
|
diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp
|
|
index fae6770..72adb31 100644
|
|
--- a/bolt/lib/Passes/Instrumentation.cpp
|
|
+++ b/bolt/lib/Passes/Instrumentation.cpp
|
|
@@ -13,6 +13,7 @@
|
|
#include "bolt/Passes/Instrumentation.h"
|
|
#include "bolt/Core/ParallelUtilities.h"
|
|
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
|
|
+#include "bolt/Utils/CommandLineOpts.h"
|
|
#include "bolt/Utils/Utils.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/RWMutex.h"
|
|
@@ -85,6 +86,24 @@ cl::opt<bool> InstrumentCalls("instrument-calls",
|
|
namespace llvm {
|
|
namespace bolt {
|
|
|
|
+static bool hasAArch64ExclusiveMemop(BinaryFunction &Function) {
|
|
+ // FIXME ARMv8-a architecture reference manual says that software must avoid
|
|
+ // having any explicit memory accesses between exclusive load and associated
|
|
+ // store instruction. So for now skip instrumentation for functions that have
|
|
+ // these instructions, since it might lead to runtime deadlock.
|
|
+ BinaryContext &BC = Function.getBinaryContext();
|
|
+ for (const BinaryBasicBlock &BB : Function)
|
|
+ for (const MCInst &Inst : BB)
|
|
+ if (BC.MIB->isAArch64Exclusive(Inst)) {
|
|
+ if (opts::Verbosity >= 1)
|
|
+ outs() << "BOLT-INSTRUMENTER: Function " << Function
|
|
+ << " has exclusive instructions, skip instrumentation\n";
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+
|
|
uint32_t Instrumentation::getFunctionNameIndex(const BinaryFunction &Function) {
|
|
auto Iter = FuncToStringIdx.find(&Function);
|
|
if (Iter != FuncToStringIdx.end())
|
|
@@ -176,7 +195,8 @@ Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
|
|
auto L = BC.scopeLock();
|
|
MCSymbol *Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
|
|
Summary->Counters.emplace_back(Label);
|
|
- return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf);
|
|
+ return BC.MIB->createInstrIncMemory(Label, BC.Ctx.get(), IsLeaf,
|
|
+ BC.AsmInfo->getCodePointerSize());
|
|
}
|
|
|
|
// Helper instruction sequence insertion function
|
|
@@ -287,6 +307,9 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
|
|
if (BC.isMachO() && Function.hasName("___GLOBAL_init_65535/1"))
|
|
return;
|
|
|
|
+ if (BC.isAArch64() && hasAArch64ExclusiveMemop(Function))
|
|
+ return;
|
|
+
|
|
SplitWorklistTy SplitWorklist;
|
|
SplitInstrsTy SplitInstrs;
|
|
|
|
@@ -504,9 +527,6 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
|
|
}
|
|
|
|
void Instrumentation::runOnFunctions(BinaryContext &BC) {
|
|
- if (!BC.isX86())
|
|
- return;
|
|
-
|
|
const unsigned Flags = BinarySection::getFlags(/*IsReadOnly=*/false,
|
|
/*IsText=*/false,
|
|
/*IsAllocatable=*/true);
|
|
diff --git a/bolt/lib/Passes/MCF.cpp b/bolt/lib/Passes/MCF.cpp
|
|
index ec04012..c3898d2 100644
|
|
--- a/bolt/lib/Passes/MCF.cpp
|
|
+++ b/bolt/lib/Passes/MCF.cpp
|
|
@@ -262,6 +262,7 @@ bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
|
|
continue;
|
|
|
|
Pred->getBranchInfo(*BB).Count = Guessed;
|
|
+ GuessedArcs.insert(std::make_pair(Pred, BB));
|
|
return true;
|
|
}
|
|
llvm_unreachable("Expected unguessed arc");
|
|
diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp
|
|
index c04efd7..7141d5d 100644
|
|
--- a/bolt/lib/Passes/TailDuplication.cpp
|
|
+++ b/bolt/lib/Passes/TailDuplication.cpp
|
|
@@ -303,7 +303,7 @@ TailDuplication::aggressiveDuplicate(BinaryBasicBlock &BB,
|
|
if (isInCacheLine(BB, Tail))
|
|
return BlocksToDuplicate;
|
|
|
|
- BinaryBasicBlock *CurrBB = &BB;
|
|
+ BinaryBasicBlock *CurrBB = &Tail;
|
|
while (CurrBB) {
|
|
LLVM_DEBUG(dbgs() << "Aggressive tail duplication: adding "
|
|
<< CurrBB->getName() << " to duplication list\n";);
|
|
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
index cd66b65..3f6497e 100644
|
|
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
|
|
@@ -16,6 +16,9 @@
|
|
#include "Utils/AArch64BaseInfo.h"
|
|
#include "bolt/Core/MCPlusBuilder.h"
|
|
#include "llvm/BinaryFormat/ELF.h"
|
|
+#include "llvm/MC/MCContext.h"
|
|
+#include "llvm/MC/MCFixupKindInfo.h"
|
|
+#include "llvm/MC/MCInstBuilder.h"
|
|
#include "llvm/MC/MCInstrInfo.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/Support/Debug.h"
|
|
@@ -28,6 +31,100 @@ using namespace bolt;
|
|
|
|
namespace {
|
|
|
|
+static void getSystemFlag(MCInst &Inst, MCPhysReg RegName) {
|
|
+ Inst.setOpcode(AArch64::MRS);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createReg(RegName));
|
|
+ Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
|
|
+}
|
|
+
|
|
+static void setSystemFlag(MCInst &Inst, MCPhysReg RegName) {
|
|
+ Inst.setOpcode(AArch64::MSR);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createImm(AArch64SysReg::NZCV));
|
|
+ Inst.addOperand(MCOperand::createReg(RegName));
|
|
+}
|
|
+
|
|
+static void createPushRegisters(MCInst &Inst, MCPhysReg Reg1, MCPhysReg Reg2) {
|
|
+ Inst.clear();
|
|
+ unsigned NewOpcode = AArch64::STPXpre;
|
|
+ Inst.setOpcode(NewOpcode);
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createReg(Reg1));
|
|
+ Inst.addOperand(MCOperand::createReg(Reg2));
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createImm(-2));
|
|
+}
|
|
+
|
|
+static void createPopRegisters(MCInst &Inst, MCPhysReg Reg1, MCPhysReg Reg2) {
|
|
+ Inst.clear();
|
|
+ unsigned NewOpcode = AArch64::LDPXpost;
|
|
+ Inst.setOpcode(NewOpcode);
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createReg(Reg1));
|
|
+ Inst.addOperand(MCOperand::createReg(Reg2));
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createImm(2));
|
|
+}
|
|
+
|
|
+static void loadReg(MCInst &Inst, MCPhysReg To, MCPhysReg From) {
|
|
+ Inst.setOpcode(AArch64::LDRXui);
|
|
+ Inst.clear();
|
|
+ if (From == AArch64::SP) {
|
|
+ Inst.setOpcode(AArch64::LDRXpost);
|
|
+ Inst.addOperand(MCOperand::createReg(From));
|
|
+ Inst.addOperand(MCOperand::createReg(To));
|
|
+ Inst.addOperand(MCOperand::createReg(From));
|
|
+ Inst.addOperand(MCOperand::createImm(16));
|
|
+ } else {
|
|
+ Inst.addOperand(MCOperand::createReg(To));
|
|
+ Inst.addOperand(MCOperand::createReg(From));
|
|
+ Inst.addOperand(MCOperand::createImm(0));
|
|
+ }
|
|
+}
|
|
+
|
|
+static void storeReg(MCInst &Inst, MCPhysReg From, MCPhysReg To) {
|
|
+ Inst.setOpcode(AArch64::STRXui);
|
|
+ Inst.clear();
|
|
+ if (To == AArch64::SP) {
|
|
+ Inst.setOpcode(AArch64::STRXpre);
|
|
+ Inst.addOperand(MCOperand::createReg(To));
|
|
+ Inst.addOperand(MCOperand::createReg(From));
|
|
+ Inst.addOperand(MCOperand::createReg(To));
|
|
+ Inst.addOperand(MCOperand::createImm(-16));
|
|
+ } else {
|
|
+ Inst.addOperand(MCOperand::createReg(From));
|
|
+ Inst.addOperand(MCOperand::createReg(To));
|
|
+ Inst.addOperand(MCOperand::createImm(0));
|
|
+ }
|
|
+}
|
|
+
|
|
+static void atomicAdd(MCInst &Inst, MCPhysReg RegTo, MCPhysReg RegCnt) {
|
|
+ // NOTE: Supports only ARM with LSE extension
|
|
+ Inst.setOpcode(AArch64::LDADDX);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::XZR));
|
|
+ Inst.addOperand(MCOperand::createReg(RegCnt));
|
|
+ Inst.addOperand(MCOperand::createReg(RegTo));
|
|
+}
|
|
+
|
|
+static void createMovz(MCInst &Inst, MCPhysReg Reg, uint64_t Imm) {
|
|
+ assert(Imm <= UINT16_MAX && "Invalid Imm size");
|
|
+ Inst.clear();
|
|
+ Inst.setOpcode(AArch64::MOVZXi);
|
|
+ Inst.addOperand(MCOperand::createReg(Reg));
|
|
+ Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
|
|
+ Inst.addOperand(MCOperand::createImm(0));
|
|
+}
|
|
+
|
|
+static InstructionListType createIncMemory(MCPhysReg RegTo, MCPhysReg RegTmp) {
|
|
+ InstructionListType Insts;
|
|
+ Insts.emplace_back();
|
|
+ createMovz(Insts.back(), RegTmp, 1);
|
|
+ Insts.emplace_back();
|
|
+ atomicAdd(Insts.back(), RegTo, RegTmp);
|
|
+ return Insts;
|
|
+}
|
|
class AArch64MCPlusBuilder : public MCPlusBuilder {
|
|
public:
|
|
AArch64MCPlusBuilder(const MCInstrAnalysis *Analysis, const MCInstrInfo *Info,
|
|
@@ -176,6 +273,34 @@ public:
|
|
return isLDRB(Inst) || isLDRH(Inst) || isLDRW(Inst) || isLDRX(Inst);
|
|
}
|
|
|
|
+ bool isAArch64Exclusive(const MCInst &Inst) const override {
|
|
+ return (Inst.getOpcode() == AArch64::LDXPX ||
|
|
+ Inst.getOpcode() == AArch64::LDXPW ||
|
|
+ Inst.getOpcode() == AArch64::LDXRX ||
|
|
+ Inst.getOpcode() == AArch64::LDXRW ||
|
|
+ Inst.getOpcode() == AArch64::LDXRH ||
|
|
+ Inst.getOpcode() == AArch64::LDXRB ||
|
|
+ Inst.getOpcode() == AArch64::STXPX ||
|
|
+ Inst.getOpcode() == AArch64::STXPW ||
|
|
+ Inst.getOpcode() == AArch64::STXRX ||
|
|
+ Inst.getOpcode() == AArch64::STXRW ||
|
|
+ Inst.getOpcode() == AArch64::STXRH ||
|
|
+ Inst.getOpcode() == AArch64::STXRB ||
|
|
+ Inst.getOpcode() == AArch64::LDAXPX ||
|
|
+ Inst.getOpcode() == AArch64::LDAXPW ||
|
|
+ Inst.getOpcode() == AArch64::LDAXRX ||
|
|
+ Inst.getOpcode() == AArch64::LDAXRW ||
|
|
+ Inst.getOpcode() == AArch64::LDAXRH ||
|
|
+ Inst.getOpcode() == AArch64::LDAXRB ||
|
|
+ Inst.getOpcode() == AArch64::STLXPX ||
|
|
+ Inst.getOpcode() == AArch64::STLXPW ||
|
|
+ Inst.getOpcode() == AArch64::STLXRX ||
|
|
+ Inst.getOpcode() == AArch64::STLXRW ||
|
|
+ Inst.getOpcode() == AArch64::STLXRH ||
|
|
+ Inst.getOpcode() == AArch64::STLXRB ||
|
|
+ Inst.getOpcode() == AArch64::CLREX);
|
|
+ }
|
|
+
|
|
bool isLoadFromStack(const MCInst &Inst) const {
|
|
if (!isLoad(Inst))
|
|
return false;
|
|
@@ -207,6 +332,40 @@ public:
|
|
return Inst.getOpcode() == AArch64::BLR;
|
|
}
|
|
|
|
+ MCPhysReg getSpRegister(int Size) const {
|
|
+ switch (Size) {
|
|
+ case 4:
|
|
+ return AArch64::WSP;
|
|
+ case 8:
|
|
+ return AArch64::SP;
|
|
+ default:
|
|
+ llvm_unreachable("Unexpected size");
|
|
+ }
|
|
+ }
|
|
+
|
|
+ MCPhysReg getIntArgRegister(unsigned ArgNo) const override {
|
|
+ switch (ArgNo) {
|
|
+ case 0:
|
|
+ return AArch64::X0;
|
|
+ case 1:
|
|
+ return AArch64::X1;
|
|
+ case 2:
|
|
+ return AArch64::X2;
|
|
+ case 3:
|
|
+ return AArch64::X3;
|
|
+ case 4:
|
|
+ return AArch64::X4;
|
|
+ case 5:
|
|
+ return AArch64::X5;
|
|
+ case 6:
|
|
+ return AArch64::X6;
|
|
+ case 7:
|
|
+ return AArch64::X7;
|
|
+ default:
|
|
+ return getNoRegister();
|
|
+ }
|
|
+ }
|
|
+
|
|
bool hasPCRelOperand(const MCInst &Inst) const override {
|
|
// ADRP is blacklisted and is an exception. Even though it has a
|
|
// PC-relative operand, this operand is not a complete symbol reference
|
|
@@ -313,6 +472,22 @@ public:
|
|
return true;
|
|
}
|
|
|
|
+ void getCalleeSavedRegs(BitVector &Regs) const override {
|
|
+ Regs |= getAliases(AArch64::X18);
|
|
+ Regs |= getAliases(AArch64::X19);
|
|
+ Regs |= getAliases(AArch64::X20);
|
|
+ Regs |= getAliases(AArch64::X21);
|
|
+ Regs |= getAliases(AArch64::X22);
|
|
+ Regs |= getAliases(AArch64::X23);
|
|
+ Regs |= getAliases(AArch64::X24);
|
|
+ Regs |= getAliases(AArch64::X25);
|
|
+ Regs |= getAliases(AArch64::X26);
|
|
+ Regs |= getAliases(AArch64::X27);
|
|
+ Regs |= getAliases(AArch64::X28);
|
|
+ Regs |= getAliases(AArch64::LR);
|
|
+ Regs |= getAliases(AArch64::FP);
|
|
+ }
|
|
+
|
|
const MCExpr *getTargetExprFor(MCInst &Inst, const MCExpr *Expr,
|
|
MCContext &Ctx,
|
|
uint64_t RelType) const override {
|
|
@@ -818,6 +993,22 @@ public:
|
|
|
|
int getUncondBranchEncodingSize() const override { return 28; }
|
|
|
|
+ InstructionListType createCmpJE(MCPhysReg RegNo, int64_t Imm,
|
|
+ const MCSymbol *Target,
|
|
+ MCContext *Ctx) const override {
|
|
+ InstructionListType Code;
|
|
+ Code.emplace_back(MCInstBuilder(AArch64::SUBSXri)
|
|
+ .addReg(RegNo)
|
|
+ .addReg(RegNo)
|
|
+ .addImm(Imm)
|
|
+ .addImm(0));
|
|
+ Code.emplace_back(MCInstBuilder(AArch64::Bcc)
|
|
+ .addImm(Imm)
|
|
+ .addExpr(MCSymbolRefExpr::create(
|
|
+ Target, MCSymbolRefExpr::VK_None, *Ctx)));
|
|
+ return Code;
|
|
+ }
|
|
+
|
|
bool createCall(MCInst &Inst, const MCSymbol *Target,
|
|
MCContext *Ctx) override {
|
|
Inst.setOpcode(AArch64::BL);
|
|
@@ -828,12 +1019,7 @@ public:
|
|
|
|
bool createTailCall(MCInst &Inst, const MCSymbol *Target,
|
|
MCContext *Ctx) override {
|
|
- Inst.setOpcode(AArch64::B);
|
|
- Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
|
|
- Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
|
|
- *Ctx, 0)));
|
|
- setTailCall(Inst);
|
|
- return true;
|
|
+ return createDirectCall(Inst, Target, Ctx, /*IsTailCall*/ true);
|
|
}
|
|
|
|
void createLongTailCall(InstructionListType &Seq, const MCSymbol *Target,
|
|
@@ -882,6 +1068,18 @@ public:
|
|
|
|
bool isStore(const MCInst &Inst) const override { return false; }
|
|
|
|
+ bool createDirectCall(MCInst &Inst, const MCSymbol *Target, MCContext *Ctx,
|
|
+ bool IsTailCall) override {
|
|
+ Inst.setOpcode(IsTailCall ? AArch64::B : AArch64::BL);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
|
|
+ Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
|
|
+ *Ctx, 0)));
|
|
+ if (IsTailCall)
|
|
+ convertJmpToTailCall(Inst);
|
|
+ return true;
|
|
+ }
|
|
+
|
|
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
|
|
const MCSymbol *&TBB, const MCSymbol *&FBB,
|
|
MCInst *&CondBranch,
|
|
@@ -1153,6 +1351,242 @@ public:
|
|
return true;
|
|
}
|
|
|
|
+ bool createStackPointerIncrement(
|
|
+ MCInst &Inst, int Size,
|
|
+ bool NoFlagsClobber = false /*unused for AArch64*/) const override {
|
|
+ Inst.setOpcode(AArch64::SUBXri);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createImm(Size));
|
|
+ Inst.addOperand(MCOperand::createImm(0));
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ bool createStackPointerDecrement(
|
|
+ MCInst &Inst, int Size,
|
|
+ bool NoFlagsClobber = false /*unused for AArch64*/) const override {
|
|
+ Inst.setOpcode(AArch64::ADDXri);
|
|
+ Inst.clear();
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createReg(AArch64::SP));
|
|
+ Inst.addOperand(MCOperand::createImm(Size));
|
|
+ Inst.addOperand(MCOperand::createImm(0));
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ void createIndirectBranch(MCInst &Inst, MCPhysReg MemBaseReg,
|
|
+ int64_t Disp) const {
|
|
+ Inst.setOpcode(AArch64::BR);
|
|
+ Inst.addOperand(MCOperand::createReg(MemBaseReg));
|
|
+ }
|
|
+
|
|
+ InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
|
|
+ InstructionListType Insts(5);
|
|
+ // Code sequence for instrumented indirect call handler:
|
|
+ // msr nzcv, x1
|
|
+ // ldp x0, x1, [sp], #16
|
|
+ // ldr x16, [sp], #16
|
|
+ // ldp x0, x1, [sp], #16
|
|
+ // br x16
|
|
+ setSystemFlag(Insts[0], AArch64::X1);
|
|
+ createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
|
|
+ // Here we load address of the next function which should be called in the
|
|
+ // original binary to X16 register. Writing to X16 is permitted without
|
|
+ // needing to restore.
|
|
+ loadReg(Insts[2], AArch64::X16, AArch64::SP);
|
|
+ createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
|
|
+ createIndirectBranch(Insts[4], AArch64::X16, 0);
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
+ InstructionListType
|
|
+ createInstrumentedIndTailCallHandlerExitBB() const override {
|
|
+ return createInstrumentedIndCallHandlerExitBB();
|
|
+ }
|
|
+
|
|
+ InstructionListType createGetter(MCContext *Ctx, const char *name) const {
|
|
+ InstructionListType Insts(4);
|
|
+ MCSymbol *Locs = Ctx->getOrCreateSymbol(name);
|
|
+ InstructionListType Addr = materializeAddress(Locs, Ctx, AArch64::X0);
|
|
+ std::copy(Addr.begin(), Addr.end(), Insts.begin());
|
|
+ assert(Addr.size() == 2 && "Invalid Addr size");
|
|
+ loadReg(Insts[2], AArch64::X0, AArch64::X0);
|
|
+ createReturn(Insts[3]);
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
+ InstructionListType createNumCountersGetter(MCContext *Ctx) const override {
|
|
+ return createGetter(Ctx, "__bolt_num_counters");
|
|
+ }
|
|
+
|
|
+ InstructionListType
|
|
+ createInstrLocationsGetter(MCContext *Ctx) const override {
|
|
+ return createGetter(Ctx, "__bolt_instr_locations");
|
|
+ }
|
|
+
|
|
+ InstructionListType createInstrTablesGetter(MCContext *Ctx) const override {
|
|
+ return createGetter(Ctx, "__bolt_instr_tables");
|
|
+ }
|
|
+
|
|
+ InstructionListType createInstrNumFuncsGetter(MCContext *Ctx) const override {
|
|
+ return createGetter(Ctx, "__bolt_instr_num_funcs");
|
|
+ }
|
|
+
|
|
+ void convertIndirectCallToLoad(MCInst &Inst, MCPhysReg Reg) override {
|
|
+ bool IsTailCall = isTailCall(Inst);
|
|
+ if (IsTailCall)
|
|
+ removeAnnotation(Inst, MCPlus::MCAnnotation::kTailCall);
|
|
+ if (Inst.getOpcode() == AArch64::BR || Inst.getOpcode() == AArch64::BLR) {
|
|
+ Inst.setOpcode(AArch64::ORRXrs);
|
|
+ Inst.insert(Inst.begin(), MCOperand::createReg(Reg));
|
|
+ Inst.insert(Inst.begin() + 1, MCOperand::createReg(AArch64::XZR));
|
|
+ Inst.insert(Inst.begin() + 3, MCOperand::createImm(0));
|
|
+ return;
|
|
+ }
|
|
+ llvm_unreachable("not implemented");
|
|
+ }
|
|
+
|
|
+ InstructionListType createLoadImmediate(const MCPhysReg Dest,
|
|
+ uint64_t Imm) const override {
|
|
+ InstructionListType Insts(4);
|
|
+ int Shift = 48;
|
|
+ for (int I = 0; I < 4; I++, Shift -= 16) {
|
|
+ Insts[I].setOpcode(AArch64::MOVKXi);
|
|
+ Insts[I].addOperand(MCOperand::createReg(Dest));
|
|
+ Insts[I].addOperand(MCOperand::createReg(Dest));
|
|
+ Insts[I].addOperand(MCOperand::createImm((Imm >> Shift) & 0xFFFF));
|
|
+ Insts[I].addOperand(MCOperand::createImm(Shift));
|
|
+ }
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
+ void createIndirectCallInst(MCInst &Inst, bool IsTailCall,
|
|
+ MCPhysReg Reg) const {
|
|
+ Inst.clear();
|
|
+ Inst.setOpcode(IsTailCall ? AArch64::BR : AArch64::BLR);
|
|
+ Inst.addOperand(MCOperand::createReg(Reg));
|
|
+ }
|
|
+
|
|
+ InstructionListType createInstrumentedIndirectCall(MCInst &&CallInst,
|
|
+ MCSymbol *HandlerFuncAddr,
|
|
+ int CallSiteID,
|
|
+ MCContext *Ctx) override {
|
|
+ InstructionListType Insts;
|
|
+ // Code sequence used to enter indirect call instrumentation helper:
|
|
+ // stp x0, x1, [sp, #-16]! createPushRegisters
|
|
+ // mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
|
|
+ // mov x1 CallSiteID createLoadImmediate ->
|
|
+ // movk x1, #0x0, lsl #48
|
|
+ // movk x1, #0x0, lsl #32
|
|
+ // movk x1, #0x0, lsl #16
|
|
+ // movk x1, #0x0
|
|
+ // stp x0, x1, [sp, #-16]!
|
|
+ // bl *HandlerFuncAddr createIndirectCall ->
|
|
+ // adr x0 *HandlerFuncAddr -> adrp + add
|
|
+ // blr x0
|
|
+ Insts.emplace_back();
|
|
+ createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
|
|
+ Insts.emplace_back(CallInst);
|
|
+ convertIndirectCallToLoad(Insts.back(), AArch64::X0);
|
|
+ InstructionListType LoadImm =
|
|
+ createLoadImmediate(getIntArgRegister(1), CallSiteID);
|
|
+ Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
|
|
+ Insts.emplace_back();
|
|
+ createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
|
|
+ Insts.resize(Insts.size() + 2);
|
|
+ InstructionListType Addr =
|
|
+ materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
|
|
+ assert(Addr.size() == 2 && "Invalid Addr size");
|
|
+ std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
|
|
+ Insts.emplace_back();
|
|
+ createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
|
|
+
|
|
+ // Carry over metadata including tail call marker if present.
|
|
+ stripAnnotations(Insts.back());
|
|
+ moveAnnotations(std::move(CallInst), Insts.back());
|
|
+
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
+ InstructionListType
|
|
+ createInstrumentedIndCallHandlerEntryBB(const MCSymbol *InstrTrampoline,
|
|
+ const MCSymbol *IndCallHandler,
|
|
+ MCContext *Ctx) override {
|
|
+ // Code sequence used to check whether InstrTampoline was initialized
|
|
+ // and call it if so, returns via IndCallHandler
|
|
+ // stp x0, x1, [sp, #-16]!
|
|
+ // mrs x1, nzcv
|
|
+ // adr x0, InstrTrampoline -> adrp + add
|
|
+ // ldr x0, [x0]
|
|
+ // subs x0, x0, #0x0
|
|
+ // b.eq IndCallHandler
|
|
+ // str x30, [sp, #-16]!
|
|
+ // blr x0
|
|
+ // ldr x30, [sp], #16
|
|
+ // b IndCallHandler
|
|
+ InstructionListType Insts;
|
|
+ Insts.emplace_back();
|
|
+ createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
|
|
+ Insts.emplace_back();
|
|
+ getSystemFlag(Insts.back(), getIntArgRegister(1));
|
|
+ Insts.emplace_back();
|
|
+ Insts.emplace_back();
|
|
+ InstructionListType Addr =
|
|
+ materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
|
|
+ std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
|
|
+ assert(Addr.size() == 2 && "Invalid Addr size");
|
|
+ Insts.emplace_back();
|
|
+ loadReg(Insts.back(), AArch64::X0, AArch64::X0);
|
|
+ InstructionListType cmpJmp =
|
|
+ createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
|
|
+ Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
|
|
+ Insts.emplace_back();
|
|
+ storeReg(Insts.back(), AArch64::LR, AArch64::SP);
|
|
+ Insts.emplace_back();
|
|
+ Insts.back().setOpcode(AArch64::BLR);
|
|
+ Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
|
|
+ Insts.emplace_back();
|
|
+ loadReg(Insts.back(), AArch64::LR, AArch64::SP);
|
|
+ Insts.emplace_back();
|
|
+ createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
+ InstructionListType
|
|
+ createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
|
|
+ unsigned CodePointerSize) const override {
|
|
+ unsigned int I = 0;
|
|
+ InstructionListType Instrs(IsLeaf ? 12 : 10);
|
|
+
|
|
+ if (IsLeaf)
|
|
+ createStackPointerIncrement(Instrs[I++], 128);
|
|
+ createPushRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
|
|
+ getSystemFlag(Instrs[I++], AArch64::X1);
|
|
+ InstructionListType Addr = materializeAddress(Target, Ctx, AArch64::X0);
|
|
+ assert(Addr.size() == 2 && "Invalid Addr size");
|
|
+ std::copy(Addr.begin(), Addr.end(), Instrs.begin() + I);
|
|
+ I += Addr.size();
|
|
+ storeReg(Instrs[I++], AArch64::X2, AArch64::SP);
|
|
+ InstructionListType Insts = createIncMemory(AArch64::X0, AArch64::X2);
|
|
+ assert(Insts.size() == 2 && "Invalid Insts size");
|
|
+ std::copy(Insts.begin(), Insts.end(), Instrs.begin() + I);
|
|
+ I += Insts.size();
|
|
+ loadReg(Instrs[I++], AArch64::X2, AArch64::SP);
|
|
+ setSystemFlag(Instrs[I++], AArch64::X1);
|
|
+ createPopRegisters(Instrs[I++], AArch64::X0, AArch64::X1);
|
|
+ if (IsLeaf)
|
|
+ createStackPointerDecrement(Instrs[I++], 128);
|
|
+ return Instrs;
|
|
+ }
|
|
+
|
|
+ std::vector<MCInst> createSymbolTrampoline(const MCSymbol *TgtSym,
|
|
+ MCContext *Ctx) override {
|
|
+ std::vector<MCInst> Insts;
|
|
+ createShortJmp(Insts, TgtSym, Ctx, /*IsTailCall*/ true);
|
|
+ return Insts;
|
|
+ }
|
|
+
|
|
InstructionListType materializeAddress(const MCSymbol *Target, MCContext *Ctx,
|
|
MCPhysReg RegName,
|
|
int64_t Addend = 0) const override {
|
|
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
|
index 5e3c01a..25b6970 100644
|
|
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
|
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
|
|
@@ -61,6 +61,25 @@ bool isADDri(const MCInst &Inst) {
|
|
Inst.getOpcode() == X86::ADD64ri8;
|
|
}
|
|
|
|
+// Create instruction to increment contents of target by 1
|
|
+static InstructionListType createIncMemory(const MCSymbol *Target,
|
|
+ MCContext *Ctx) {
|
|
+ InstructionListType Insts;
|
|
+ Insts.emplace_back();
|
|
+ Insts.back().setOpcode(X86::LOCK_INC64m);
|
|
+ Insts.back().clear();
|
|
+ Insts.back().addOperand(MCOperand::createReg(X86::RIP)); // BaseReg
|
|
+ Insts.back().addOperand(MCOperand::createImm(1)); // ScaleAmt
|
|
+ Insts.back().addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
|
|
+
|
|
+ Insts.back().addOperand(MCOperand::createExpr(
|
|
+ MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None,
|
|
+ *Ctx))); // Displacement
|
|
+ Insts.back().addOperand(
|
|
+ MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
|
|
+ return Insts;
|
|
+}
|
|
+
|
|
#define GET_INSTRINFO_OPERAND_TYPES_ENUM
|
|
#define GET_INSTRINFO_OPERAND_TYPE
|
|
#define GET_INSTRINFO_MEM_OPERAND_SIZE
|
|
@@ -2309,28 +2328,15 @@ public:
|
|
return true;
|
|
}
|
|
|
|
- void createLoadImmediate(MCInst &Inst, const MCPhysReg Dest,
|
|
- uint32_t Imm) const override {
|
|
- Inst.setOpcode(X86::MOV64ri32);
|
|
- Inst.clear();
|
|
- Inst.addOperand(MCOperand::createReg(Dest));
|
|
- Inst.addOperand(MCOperand::createImm(Imm));
|
|
- }
|
|
-
|
|
- bool createIncMemory(MCInst &Inst, const MCSymbol *Target,
|
|
- MCContext *Ctx) const override {
|
|
-
|
|
- Inst.setOpcode(X86::LOCK_INC64m);
|
|
- Inst.clear();
|
|
- Inst.addOperand(MCOperand::createReg(X86::RIP)); // BaseReg
|
|
- Inst.addOperand(MCOperand::createImm(1)); // ScaleAmt
|
|
- Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // IndexReg
|
|
-
|
|
- Inst.addOperand(MCOperand::createExpr(
|
|
- MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None,
|
|
- *Ctx))); // Displacement
|
|
- Inst.addOperand(MCOperand::createReg(X86::NoRegister)); // AddrSegmentReg
|
|
- return true;
|
|
+ InstructionListType createLoadImmediate(const MCPhysReg Dest,
|
|
+ uint64_t Imm) const override {
|
|
+ InstructionListType Insts;
|
|
+ Insts.emplace_back();
|
|
+ Insts.back().setOpcode(X86::MOV64ri32);
|
|
+ Insts.back().clear();
|
|
+ Insts.back().addOperand(MCOperand::createReg(Dest));
|
|
+ Insts.back().addOperand(MCOperand::createImm(Imm));
|
|
+ return Insts;
|
|
}
|
|
|
|
bool createIJmp32Frag(SmallVectorImpl<MCInst> &Insts,
|
|
@@ -3057,9 +3063,9 @@ public:
|
|
Inst.clear();
|
|
}
|
|
|
|
- InstructionListType createInstrIncMemory(const MCSymbol *Target,
|
|
- MCContext *Ctx,
|
|
- bool IsLeaf) const override {
|
|
+ InstructionListType
|
|
+ createInstrIncMemory(const MCSymbol *Target, MCContext *Ctx, bool IsLeaf,
|
|
+ unsigned CodePointerSize) const override {
|
|
InstructionListType Instrs(IsLeaf ? 13 : 11);
|
|
unsigned int I = 0;
|
|
|
|
@@ -3079,7 +3085,10 @@ public:
|
|
createClearRegWithNoEFlagsUpdate(Instrs[I++], X86::RAX, 8);
|
|
createX86SaveOVFlagToRegister(Instrs[I++], X86::AL);
|
|
// LOCK INC
|
|
- createIncMemory(Instrs[I++], Target, Ctx);
|
|
+ InstructionListType IncMem = createIncMemory(Target, Ctx);
|
|
+ assert(IncMem.size() == 1 && "Invalid IncMem size");
|
|
+ std::copy(IncMem.begin(), IncMem.end(), Instrs.begin() + I);
|
|
+ I += IncMem.size();
|
|
// POPF
|
|
createAddRegImm(Instrs[I++], X86::AL, 127, 1);
|
|
createPopRegister(Instrs[I++], X86::RAX, 8);
|
|
@@ -3153,8 +3162,8 @@ public:
|
|
}
|
|
Insts.emplace_back();
|
|
createPushRegister(Insts.back(), TempReg, 8);
|
|
- Insts.emplace_back();
|
|
- createLoadImmediate(Insts.back(), TempReg, CallSiteID);
|
|
+ InstructionListType LoadImm = createLoadImmediate(TempReg, CallSiteID);
|
|
+ Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
|
|
Insts.emplace_back();
|
|
createPushRegister(Insts.back(), TempReg, 8);
|
|
|
|
@@ -3264,7 +3273,7 @@ public:
|
|
}
|
|
|
|
InstructionListType createSymbolTrampoline(const MCSymbol *TgtSym,
|
|
- MCContext *Ctx) const override {
|
|
+ MCContext *Ctx) override {
|
|
InstructionListType Insts(1);
|
|
createUncondBranch(Insts[0], TgtSym, Ctx);
|
|
return Insts;
|
|
diff --git a/bolt/runtime/CMakeLists.txt b/bolt/runtime/CMakeLists.txt
|
|
index 8472ce0..838c8cb 100644
|
|
--- a/bolt/runtime/CMakeLists.txt
|
|
+++ b/bolt/runtime/CMakeLists.txt
|
|
@@ -27,8 +27,14 @@ set(BOLT_RT_FLAGS
|
|
-fno-exceptions
|
|
-fno-rtti
|
|
-fno-stack-protector
|
|
- -mno-sse
|
|
- -fPIC)
|
|
+ -fPIC
|
|
+ -mgeneral-regs-only)
|
|
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
|
|
+ set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS} "-mno-sse")
|
|
+endif()
|
|
+if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|
+ set(BOLT_RT_FLAGS ${BOLT_RT_FLAGS} "-mno-outline-atomics")
|
|
+endif()
|
|
|
|
# Don't let the compiler think it can create calls to standard libs
|
|
target_compile_options(bolt_rt_instr PRIVATE ${BOLT_RT_FLAGS})
|
|
@@ -39,7 +45,7 @@ target_include_directories(bolt_rt_hugify PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
|
install(TARGETS bolt_rt_instr DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
|
|
install(TARGETS bolt_rt_hugify DESTINATION "lib${LLVM_LIBDIR_SUFFIX}")
|
|
|
|
-if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*")
|
|
+if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang.*" AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
|
add_library(bolt_rt_instr_osx STATIC
|
|
instr.cpp
|
|
${CMAKE_CURRENT_BINARY_DIR}/config.h
|
|
diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h
|
|
index 9e6f175..9b9965b 100644
|
|
--- a/bolt/runtime/common.h
|
|
+++ b/bolt/runtime/common.h
|
|
@@ -6,10 +6,6 @@
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
-#if !defined(__x86_64__)
|
|
-#error "For x86_64 only"
|
|
-#endif
|
|
-
|
|
#if defined(__linux__)
|
|
|
|
#include <cstddef>
|
|
@@ -44,44 +40,6 @@ typedef int int32_t;
|
|
#error "For Linux or MacOS only"
|
|
#endif
|
|
|
|
-// Save all registers while keeping 16B stack alignment
|
|
-#define SAVE_ALL \
|
|
- "push %%rax\n" \
|
|
- "push %%rbx\n" \
|
|
- "push %%rcx\n" \
|
|
- "push %%rdx\n" \
|
|
- "push %%rdi\n" \
|
|
- "push %%rsi\n" \
|
|
- "push %%rbp\n" \
|
|
- "push %%r8\n" \
|
|
- "push %%r9\n" \
|
|
- "push %%r10\n" \
|
|
- "push %%r11\n" \
|
|
- "push %%r12\n" \
|
|
- "push %%r13\n" \
|
|
- "push %%r14\n" \
|
|
- "push %%r15\n" \
|
|
- "sub $8, %%rsp\n"
|
|
-
|
|
-// Mirrors SAVE_ALL
|
|
-#define RESTORE_ALL \
|
|
- "add $8, %%rsp\n" \
|
|
- "pop %%r15\n" \
|
|
- "pop %%r14\n" \
|
|
- "pop %%r13\n" \
|
|
- "pop %%r12\n" \
|
|
- "pop %%r11\n" \
|
|
- "pop %%r10\n" \
|
|
- "pop %%r9\n" \
|
|
- "pop %%r8\n" \
|
|
- "pop %%rbp\n" \
|
|
- "pop %%rsi\n" \
|
|
- "pop %%rdi\n" \
|
|
- "pop %%rdx\n" \
|
|
- "pop %%rcx\n" \
|
|
- "pop %%rbx\n" \
|
|
- "pop %%rax\n"
|
|
-
|
|
#define PROT_READ 0x1 /* Page can be read. */
|
|
#define PROT_WRITE 0x2 /* Page can be written. */
|
|
#define PROT_EXEC 0x4 /* Page can be executed. */
|
|
@@ -165,141 +123,41 @@ int memcmp(const void *s1, const void *s2, size_t n) {
|
|
// Anonymous namespace covering everything but our library entry point
|
|
namespace {
|
|
|
|
-// Get the difference between runtime addrress of .text section and
|
|
-// static address in section header table. Can be extracted from arbitrary
|
|
-// pc value recorded at runtime to get the corresponding static address, which
|
|
-// in turn can be used to search for indirect call description. Needed because
|
|
-// indirect call descriptions are read-only non-relocatable data.
|
|
-uint64_t getTextBaseAddress() {
|
|
- uint64_t DynAddr;
|
|
- uint64_t StaticAddr;
|
|
- __asm__ volatile("leaq __hot_end(%%rip), %0\n\t"
|
|
- "movabsq $__hot_end, %1\n\t"
|
|
- : "=r"(DynAddr), "=r"(StaticAddr));
|
|
- return DynAddr - StaticAddr;
|
|
-}
|
|
-
|
|
-constexpr uint32_t BufSize = 10240;
|
|
-
|
|
-#define _STRINGIFY(x) #x
|
|
-#define STRINGIFY(x) _STRINGIFY(x)
|
|
-
|
|
-uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
|
|
- uint64_t ret;
|
|
-#if defined(__APPLE__)
|
|
-#define READ_SYSCALL 0x2000003
|
|
-#else
|
|
-#define READ_SYSCALL 0
|
|
-#endif
|
|
- __asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd), "S"(buf), "d"(count)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
|
|
- uint64_t ret;
|
|
-#if defined(__APPLE__)
|
|
-#define WRITE_SYSCALL 0x2000004
|
|
-#else
|
|
-#define WRITE_SYSCALL 1
|
|
-#endif
|
|
- __asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd), "S"(buf), "d"(count)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
|
|
- uint64_t fd, uint64_t offset) {
|
|
-#if defined(__APPLE__)
|
|
-#define MMAP_SYSCALL 0x20000c5
|
|
-#else
|
|
-#define MMAP_SYSCALL 9
|
|
-#endif
|
|
- void *ret;
|
|
- register uint64_t r8 asm("r8") = fd;
|
|
- register uint64_t r9 asm("r9") = offset;
|
|
- register uint64_t r10 asm("r10") = flags;
|
|
- __asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8),
|
|
- "r"(r9)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __munmap(void *addr, uint64_t size) {
|
|
-#if defined(__APPLE__)
|
|
-#define MUNMAP_SYSCALL 0x2000049
|
|
-#else
|
|
-#define MUNMAP_SYSCALL 11
|
|
-#endif
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(addr), "S"(size)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
+struct dirent64 {
|
|
+ uint64_t d_ino; /* Inode number */
|
|
+ int64_t d_off; /* Offset to next linux_dirent */
|
|
+ unsigned short d_reclen; /* Length of this linux_dirent */
|
|
+ unsigned char d_type;
|
|
+ char d_name[]; /* Filename (null-terminated) */
|
|
+ /* length is actually (d_reclen - 2 -
|
|
+ offsetof(struct linux_dirent, d_name)) */
|
|
+};
|
|
|
|
-#define SIG_BLOCK 0
|
|
-#define SIG_UNBLOCK 1
|
|
-#define SIG_SETMASK 2
|
|
+/* Length of the entries in `struct utsname' is 65. */
|
|
+#define _UTSNAME_LENGTH 65
|
|
|
|
-static const uint64_t MaskAllSignals[] = {-1ULL};
|
|
+struct UtsNameTy {
|
|
+ char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */
|
|
+ char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined
|
|
+ network" */
|
|
+ char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */
|
|
+ char version[_UTSNAME_LENGTH]; /* Operating system version */
|
|
+ char machine[_UTSNAME_LENGTH]; /* Hardware identifier */
|
|
+ char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */
|
|
+};
|
|
|
|
-uint64_t __sigprocmask(int how, const void *set, void *oldset) {
|
|
-#if defined(__APPLE__)
|
|
-#define SIGPROCMASK_SYSCALL 0x2000030
|
|
-#else
|
|
-#define SIGPROCMASK_SYSCALL 14
|
|
-#endif
|
|
- uint64_t ret;
|
|
- register long r10 asm("r10") = sizeof(uint64_t);
|
|
- __asm__ __volatile__("movq $" STRINGIFY(SIGPROCMASK_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(how), "S"(set), "d"(oldset), "r"(r10)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
+struct timespec {
|
|
+ uint64_t tv_sec; /* seconds */
|
|
+ uint64_t tv_nsec; /* nanoseconds */
|
|
+};
|
|
|
|
-uint64_t __getpid() {
|
|
- uint64_t ret;
|
|
-#if defined(__APPLE__)
|
|
-#define GETPID_SYSCALL 20
|
|
+#if defined(__aarch64__)
|
|
+#include "sys_aarch64.h"
|
|
#else
|
|
-#define GETPID_SYSCALL 39
|
|
+#include "sys_x86_64.h"
|
|
#endif
|
|
- __asm__ __volatile__("movq $" STRINGIFY(GETPID_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- :
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
|
|
-uint64_t __exit(uint64_t code) {
|
|
-#if defined(__APPLE__)
|
|
-#define EXIT_SYSCALL 0x2000001
|
|
-#else
|
|
-#define EXIT_SYSCALL 231
|
|
-#endif
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(code)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
+constexpr uint32_t BufSize = 10240;
|
|
|
|
// Helper functions for writing strings to the .fdata file. We intentionally
|
|
// avoid using libc names to make it clear it is our impl.
|
|
@@ -415,219 +273,6 @@ static bool scanUInt32(const char *&Buf, const char *End, uint32_t &Ret) {
|
|
return false;
|
|
}
|
|
|
|
-#if !defined(__APPLE__)
|
|
-// We use a stack-allocated buffer for string manipulation in many pieces of
|
|
-// this code, including the code that prints each line of the fdata file. This
|
|
-// buffer needs to accomodate large function names, but shouldn't be arbitrarily
|
|
-// large (dynamically allocated) for simplicity of our memory space usage.
|
|
-
|
|
-// Declare some syscall wrappers we use throughout this code to avoid linking
|
|
-// against system libc.
|
|
-uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $2, %%rax\n"
|
|
- "syscall"
|
|
- : "=a"(ret)
|
|
- : "D"(pathname), "S"(flags), "d"(mode)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-struct dirent {
|
|
- unsigned long d_ino; /* Inode number */
|
|
- unsigned long d_off; /* Offset to next linux_dirent */
|
|
- unsigned short d_reclen; /* Length of this linux_dirent */
|
|
- char d_name[]; /* Filename (null-terminated) */
|
|
- /* length is actually (d_reclen - 2 -
|
|
- offsetof(struct linux_dirent, d_name)) */
|
|
-};
|
|
-
|
|
-long __getdents(unsigned int fd, dirent *dirp, size_t count) {
|
|
- long ret;
|
|
- __asm__ __volatile__("movq $78, %%rax\n"
|
|
- "syscall"
|
|
- : "=a"(ret)
|
|
- : "D"(fd), "S"(dirp), "d"(count)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $89, %%rax\n"
|
|
- "syscall"
|
|
- : "=a"(ret)
|
|
- : "D"(pathname), "S"(buf), "d"(bufsize)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $8, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd), "S"(pos), "d"(whence)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __ftruncate(uint64_t fd, uint64_t length) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $77, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd), "S"(length)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __close(uint64_t fd) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $3, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __madvise(void *addr, size_t length, int advice) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $28, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(addr), "S"(length), "d"(advice)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-#define _UTSNAME_LENGTH 65
|
|
-
|
|
-struct UtsNameTy {
|
|
- char sysname[_UTSNAME_LENGTH]; /* Operating system name (e.g., "Linux") */
|
|
- char nodename[_UTSNAME_LENGTH]; /* Name within "some implementation-defined
|
|
- network" */
|
|
- char release[_UTSNAME_LENGTH]; /* Operating system release (e.g., "2.6.28") */
|
|
- char version[_UTSNAME_LENGTH]; /* Operating system version */
|
|
- char machine[_UTSNAME_LENGTH]; /* Hardware identifier */
|
|
- char domainname[_UTSNAME_LENGTH]; /* NIS or YP domain name */
|
|
-};
|
|
-
|
|
-int __uname(struct UtsNameTy *Buf) {
|
|
- int Ret;
|
|
- __asm__ __volatile__("movq $63, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(Ret)
|
|
- : "D"(Buf)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return Ret;
|
|
-}
|
|
-
|
|
-struct timespec {
|
|
- uint64_t tv_sec; /* seconds */
|
|
- uint64_t tv_nsec; /* nanoseconds */
|
|
-};
|
|
-
|
|
-uint64_t __nanosleep(const timespec *req, timespec *rem) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $35, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(req), "S"(rem)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int64_t __fork() {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $57, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- :
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __mprotect(void *addr, size_t len, int prot) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $10, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(addr), "S"(len), "d"(prot)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __getppid() {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $110, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- :
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __setpgid(uint64_t pid, uint64_t pgid) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $109, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(pid), "S"(pgid)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-uint64_t __getpgid(uint64_t pid) {
|
|
- uint64_t ret;
|
|
- __asm__ __volatile__("movq $121, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(pid)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __kill(uint64_t pid, int sig) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $62, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(pid), "S"(sig)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-int __fsync(int fd) {
|
|
- int ret;
|
|
- __asm__ __volatile__("movq $74, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(ret)
|
|
- : "D"(fd)
|
|
- : "cc", "rcx", "r11", "memory");
|
|
- return ret;
|
|
-}
|
|
-
|
|
-// %rdi %rsi %rdx %r10 %r8
|
|
-// sys_prctl int option unsigned unsigned unsigned unsigned
|
|
-// long arg2 long arg3 long arg4 long arg5
|
|
-int __prctl(int Option, unsigned long Arg2, unsigned long Arg3,
|
|
- unsigned long Arg4, unsigned long Arg5) {
|
|
- int Ret;
|
|
- register long rdx asm("rdx") = Arg3;
|
|
- register long r8 asm("r8") = Arg5;
|
|
- register long r10 asm("r10") = Arg4;
|
|
- __asm__ __volatile__("movq $157, %%rax\n"
|
|
- "syscall\n"
|
|
- : "=a"(Ret)
|
|
- : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8)
|
|
- :);
|
|
- return Ret;
|
|
-}
|
|
-
|
|
-#endif
|
|
-
|
|
void reportError(const char *Msg, uint64_t Size) {
|
|
__write(2, Msg, Size);
|
|
__exit(1);
|
|
@@ -644,6 +289,12 @@ void assert(bool Assertion, const char *Msg) {
|
|
reportError(Buf, Ptr - Buf);
|
|
}
|
|
|
|
+#define SIG_BLOCK 0
|
|
+#define SIG_UNBLOCK 1
|
|
+#define SIG_SETMASK 2
|
|
+
|
|
+static const uint64_t MaskAllSignals[] = {-1ULL};
|
|
+
|
|
class Mutex {
|
|
volatile bool InUse{false};
|
|
|
|
diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp
|
|
index 96a43f6..cfd113e 100644
|
|
--- a/bolt/runtime/instr.cpp
|
|
+++ b/bolt/runtime/instr.cpp
|
|
@@ -40,7 +40,6 @@
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
-#if defined (__x86_64__)
|
|
#include "common.h"
|
|
|
|
// Enables a very verbose logging to stderr useful when debugging
|
|
@@ -695,12 +694,12 @@ static char *getBinaryPath() {
|
|
assert(static_cast<int64_t>(FDdir) >= 0,
|
|
"failed to open /proc/self/map_files");
|
|
|
|
- while (long Nread = __getdents(FDdir, (struct dirent *)Buf, BufSize)) {
|
|
+ while (long Nread = __getdents64(FDdir, (struct dirent64 *)Buf, BufSize)) {
|
|
assert(static_cast<int64_t>(Nread) != -1, "failed to get folder entries");
|
|
|
|
- struct dirent *d;
|
|
+ struct dirent64 *d;
|
|
for (long Bpos = 0; Bpos < Nread; Bpos += d->d_reclen) {
|
|
- d = (struct dirent *)(Buf + Bpos);
|
|
+ d = (struct dirent64 *)(Buf + Bpos);
|
|
|
|
uint64_t StartAddress, EndAddress;
|
|
if (!parseAddressRange(d->d_name, StartAddress, EndAddress))
|
|
@@ -1668,6 +1667,17 @@ instrumentIndirectCall(uint64_t Target, uint64_t IndCallID) {
|
|
/// as well as the target address for the call
|
|
extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
|
|
{
|
|
+#if defined(__aarch64__)
|
|
+ // clang-format off
|
|
+ __asm__ __volatile__(SAVE_ALL
|
|
+ "ldp x0, x1, [sp, #288]\n"
|
|
+ "bl instrumentIndirectCall\n"
|
|
+ RESTORE_ALL
|
|
+ "ret\n"
|
|
+ :::);
|
|
+ // clang-format on
|
|
+#else
|
|
+ // clang-format off
|
|
__asm__ __volatile__(SAVE_ALL
|
|
"mov 0xa0(%%rsp), %%rdi\n"
|
|
"mov 0x98(%%rsp), %%rsi\n"
|
|
@@ -1675,10 +1685,23 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
|
|
RESTORE_ALL
|
|
"ret\n"
|
|
:::);
|
|
+ // clang-format on
|
|
+#endif
|
|
}
|
|
|
|
extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
|
|
{
|
|
+#if defined(__aarch64__)
|
|
+ // clang-format off
|
|
+ __asm__ __volatile__(SAVE_ALL
|
|
+ "ldp x0, x1, [sp, #288]\n"
|
|
+ "bl instrumentIndirectCall\n"
|
|
+ RESTORE_ALL
|
|
+ "ret\n"
|
|
+ :::);
|
|
+ // clang-format on
|
|
+#else
|
|
+ // clang-format off
|
|
__asm__ __volatile__(SAVE_ALL
|
|
"mov 0x98(%%rsp), %%rdi\n"
|
|
"mov 0x90(%%rsp), %%rsi\n"
|
|
@@ -1686,21 +1709,48 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
|
|
RESTORE_ALL
|
|
"ret\n"
|
|
:::);
|
|
+ // clang-format on
|
|
+#endif
|
|
}
|
|
|
|
/// This is hooking ELF's entry, it needs to save all machine state.
|
|
extern "C" __attribute((naked)) void __bolt_instr_start()
|
|
{
|
|
+#if defined(__aarch64__)
|
|
+ // clang-format off
|
|
+ __asm__ __volatile__(SAVE_ALL
|
|
+ "bl __bolt_instr_setup\n"
|
|
+ RESTORE_ALL
|
|
+ "adrp x16, __bolt_start_trampoline\n"
|
|
+ "add x16, x16, #:lo12:__bolt_start_trampoline\n"
|
|
+ "br x16\n"
|
|
+ :::);
|
|
+ // clang-format on
|
|
+#else
|
|
+ // clang-format off
|
|
__asm__ __volatile__(SAVE_ALL
|
|
"call __bolt_instr_setup\n"
|
|
RESTORE_ALL
|
|
"jmp __bolt_start_trampoline\n"
|
|
:::);
|
|
+ // clang-format on
|
|
+#endif
|
|
}
|
|
|
|
/// This is hooking into ELF's DT_FINI
|
|
extern "C" void __bolt_instr_fini() {
|
|
- __bolt_fini_trampoline();
|
|
+#if defined(__aarch64__)
|
|
+ // clang-format off
|
|
+ __asm__ __volatile__(SAVE_ALL
|
|
+ "adrp x16, __bolt_fini_trampoline\n"
|
|
+ "add x16, x16, #:lo12:__bolt_fini_trampoline\n"
|
|
+ "blr x16\n"
|
|
+ RESTORE_ALL
|
|
+ :::);
|
|
+ // clang-format on
|
|
+#else
|
|
+ __asm__ __volatile__("call __bolt_fini_trampoline\n" :::);
|
|
+#endif
|
|
if (__bolt_instr_sleep_time == 0) {
|
|
int FD = openProfile();
|
|
__bolt_instr_data_dump(FD);
|
|
@@ -1752,4 +1802,3 @@ void _bolt_instr_fini() {
|
|
}
|
|
|
|
#endif
|
|
-#endif
|
|
diff --git a/bolt/runtime/sys_aarch64.h b/bolt/runtime/sys_aarch64.h
|
|
new file mode 100644
|
|
index 0000000..77c9cfc
|
|
--- /dev/null
|
|
+++ b/bolt/runtime/sys_aarch64.h
|
|
@@ -0,0 +1,394 @@
|
|
+#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64
|
|
+#define LLVM_TOOLS_LLVM_BOLT_SYS_AARCH64
|
|
+
|
|
+// Save all registers while keeping 16B stack alignment
|
|
+#define SAVE_ALL \
|
|
+ "stp x0, x1, [sp, #-16]!\n" \
|
|
+ "stp x2, x3, [sp, #-16]!\n" \
|
|
+ "stp x4, x5, [sp, #-16]!\n" \
|
|
+ "stp x6, x7, [sp, #-16]!\n" \
|
|
+ "stp x8, x9, [sp, #-16]!\n" \
|
|
+ "stp x10, x11, [sp, #-16]!\n" \
|
|
+ "stp x12, x13, [sp, #-16]!\n" \
|
|
+ "stp x14, x15, [sp, #-16]!\n" \
|
|
+ "stp x16, x17, [sp, #-16]!\n" \
|
|
+ "stp x18, x19, [sp, #-16]!\n" \
|
|
+ "stp x20, x21, [sp, #-16]!\n" \
|
|
+ "stp x22, x23, [sp, #-16]!\n" \
|
|
+ "stp x24, x25, [sp, #-16]!\n" \
|
|
+ "stp x26, x27, [sp, #-16]!\n" \
|
|
+ "stp x28, x29, [sp, #-16]!\n" \
|
|
+ "str x30, [sp,#-16]!\n"
|
|
+// Mirrors SAVE_ALL
|
|
+#define RESTORE_ALL \
|
|
+ "ldr x30, [sp], #16\n" \
|
|
+ "ldp x28, x29, [sp], #16\n" \
|
|
+ "ldp x26, x27, [sp], #16\n" \
|
|
+ "ldp x24, x25, [sp], #16\n" \
|
|
+ "ldp x22, x23, [sp], #16\n" \
|
|
+ "ldp x20, x21, [sp], #16\n" \
|
|
+ "ldp x18, x19, [sp], #16\n" \
|
|
+ "ldp x16, x17, [sp], #16\n" \
|
|
+ "ldp x14, x15, [sp], #16\n" \
|
|
+ "ldp x12, x13, [sp], #16\n" \
|
|
+ "ldp x10, x11, [sp], #16\n" \
|
|
+ "ldp x8, x9, [sp], #16\n" \
|
|
+ "ldp x6, x7, [sp], #16\n" \
|
|
+ "ldp x4, x5, [sp], #16\n" \
|
|
+ "ldp x2, x3, [sp], #16\n" \
|
|
+ "ldp x0, x1, [sp], #16\n"
|
|
+
|
|
+// Anonymous namespace covering everything but our library entry point
|
|
+namespace {
|
|
+
|
|
+// Get the difference between runtime addrress of .text section and
|
|
+// static address in section header table. Can be extracted from arbitrary
|
|
+// pc value recorded at runtime to get the corresponding static address, which
|
|
+// in turn can be used to search for indirect call description. Needed because
|
|
+// indirect call descriptions are read-only non-relocatable data.
|
|
+uint64_t getTextBaseAddress() {
|
|
+ uint64_t DynAddr;
|
|
+ uint64_t StaticAddr;
|
|
+ __asm__ volatile("b .instr%=\n\t"
|
|
+ ".StaticAddr%=:\n\t"
|
|
+ ".dword __hot_end\n\t"
|
|
+ ".instr%=:\n\t"
|
|
+ "ldr %0, .StaticAddr%=\n\t"
|
|
+ "adrp %1, __hot_end\n\t"
|
|
+ "add %1, %1, :lo12:__hot_end\n\t"
|
|
+ : "=r"(StaticAddr), "=r"(DynAddr));
|
|
+ return DynAddr - StaticAddr;
|
|
+}
|
|
+
|
|
+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
|
|
+ uint64_t ret;
|
|
+ register uint64_t x0 __asm__("x0") = fd;
|
|
+ register const void *x1 __asm__("x1") = buf;
|
|
+ register uint64_t x2 __asm__("x2") = count;
|
|
+ register uint32_t w8 __asm__("w8") = 63;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
|
|
+ uint64_t ret;
|
|
+ register uint64_t x0 __asm__("x0") = fd;
|
|
+ register const void *x1 __asm__("x1") = buf;
|
|
+ register uint64_t x2 __asm__("x2") = count;
|
|
+ register uint32_t w8 __asm__("w8") = 64;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
|
|
+ uint64_t fd, uint64_t offset) {
|
|
+ void *ret;
|
|
+ register uint64_t x0 __asm__("x0") = addr;
|
|
+ register uint64_t x1 __asm__("x1") = size;
|
|
+ register uint64_t x2 __asm__("x2") = prot;
|
|
+ register uint64_t x3 __asm__("x3") = flags;
|
|
+ register uint64_t x4 __asm__("x4") = fd;
|
|
+ register uint64_t x5 __asm__("x5") = offset;
|
|
+ register uint32_t w8 __asm__("w8") = 222;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __munmap(void *addr, uint64_t size) {
|
|
+ uint64_t ret;
|
|
+ register void *x0 __asm__("x0") = addr;
|
|
+ register uint64_t x1 __asm__("x1") = size;
|
|
+ register uint32_t w8 __asm__("w8") = 215;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __exit(uint64_t code) {
|
|
+ uint64_t ret;
|
|
+ register uint64_t x0 __asm__("x0") = code;
|
|
+ register uint32_t w8 __asm__("w8") = 94;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
|
|
+ uint64_t ret;
|
|
+ register int x0 __asm__("x0") = -100;
|
|
+ register const char *x1 __asm__("x1") = pathname;
|
|
+ register uint64_t x2 __asm__("x2") = flags;
|
|
+ register uint64_t x3 __asm__("x3") = mode;
|
|
+ register uint32_t w8 __asm__("w8") = 56;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) {
|
|
+ long ret;
|
|
+ register unsigned int x0 __asm__("x0") = fd;
|
|
+ register dirent64 *x1 __asm__("x1") = dirp;
|
|
+ register size_t x2 __asm__("x2") = count;
|
|
+ register uint32_t w8 __asm__("w8") = 61;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
|
|
+ uint64_t ret;
|
|
+ register int x0 __asm__("x0") = -100;
|
|
+ register const char *x1 __asm__("x1") = pathname;
|
|
+ register char *x2 __asm__("x2") = buf;
|
|
+ register size_t x3 __asm__("x3") = bufsize;
|
|
+ register uint32_t w8 __asm__("w8") = 78; // readlinkat
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
|
|
+ uint64_t ret;
|
|
+ register uint64_t x0 __asm__("x0") = fd;
|
|
+ register uint64_t x1 __asm__("x1") = pos;
|
|
+ register uint64_t x2 __asm__("x2") = whence;
|
|
+ register uint32_t w8 __asm__("w8") = 62;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __ftruncate(uint64_t fd, uint64_t length) {
|
|
+ int ret;
|
|
+ register uint64_t x0 __asm__("x0") = fd;
|
|
+ register uint64_t x1 __asm__("x1") = length;
|
|
+ register uint32_t w8 __asm__("w8") = 46;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __close(uint64_t fd) {
|
|
+ int ret;
|
|
+ register uint64_t x0 __asm__("x0") = fd;
|
|
+ register uint32_t w8 __asm__("w8") = 57;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __madvise(void *addr, size_t length, int advice) {
|
|
+ int ret;
|
|
+ register void *x0 __asm__("x0") = addr;
|
|
+ register size_t x1 __asm__("x1") = length;
|
|
+ register int x2 __asm__("x2") = advice;
|
|
+ register uint32_t w8 __asm__("w8") = 233;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __uname(struct UtsNameTy *buf) {
|
|
+ int ret;
|
|
+ register UtsNameTy *x0 __asm__("x0") = buf;
|
|
+ register uint32_t w8 __asm__("w8") = 160;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __nanosleep(const timespec *req, timespec *rem) {
|
|
+ uint64_t ret;
|
|
+ register const timespec *x0 __asm__("x0") = req;
|
|
+ register timespec *x1 __asm__("x1") = rem;
|
|
+ register uint32_t w8 __asm__("w8") = 101;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int64_t __fork() {
|
|
+ uint64_t ret;
|
|
+ // clone instead of fork with flags
|
|
+ // "CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD"
|
|
+ register uint64_t x0 __asm__("x0") = 0x1200011;
|
|
+ register uint64_t x1 __asm__("x1") = 0;
|
|
+ register uint64_t x2 __asm__("x2") = 0;
|
|
+ register uint64_t x3 __asm__("x3") = 0;
|
|
+ register uint64_t x4 __asm__("x4") = 0;
|
|
+ register uint32_t w8 __asm__("w8") = 220;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(x4), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __mprotect(void *addr, size_t len, int prot) {
|
|
+ int ret;
|
|
+ register void *x0 __asm__("x0") = addr;
|
|
+ register size_t x1 __asm__("x1") = len;
|
|
+ register int x2 __asm__("x2") = prot;
|
|
+ register uint32_t w8 __asm__("w8") = 226;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getpid() {
|
|
+ uint64_t ret;
|
|
+ register uint32_t w8 __asm__("w8") = 172;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x0", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getppid() {
|
|
+ uint64_t ret;
|
|
+ register uint32_t w8 __asm__("w8") = 173;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x0", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __setpgid(uint64_t pid, uint64_t pgid) {
|
|
+ int ret;
|
|
+ register uint64_t x0 __asm__("x0") = pid;
|
|
+ register uint64_t x1 __asm__("x1") = pgid;
|
|
+ register uint32_t w8 __asm__("w8") = 154;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getpgid(uint64_t pid) {
|
|
+ uint64_t ret;
|
|
+ register uint64_t x0 __asm__("x0") = pid;
|
|
+ register uint32_t w8 __asm__("w8") = 155;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __kill(uint64_t pid, int sig) {
|
|
+ int ret;
|
|
+ register uint64_t x0 __asm__("x0") = pid;
|
|
+ register int x1 __asm__("x1") = sig;
|
|
+ register uint32_t w8 __asm__("w8") = 129;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __fsync(int fd) {
|
|
+ int ret;
|
|
+ register int x0 __asm__("x0") = fd;
|
|
+ register uint32_t w8 __asm__("w8") = 82;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0)
|
|
+ : "r"(w8)
|
|
+ : "cc", "memory", "x1");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __sigprocmask(int how, const void *set, void *oldset) {
|
|
+ uint64_t ret;
|
|
+ register int x0 __asm__("x0") = how;
|
|
+ register const void *x1 __asm__("x1") = set;
|
|
+ register void *x2 __asm__("x2") = oldset;
|
|
+ register long x3 asm("x3") = 8;
|
|
+ register uint32_t w8 __asm__("w8") = 135;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %0, x0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __prctl(int option, unsigned long arg2, unsigned long arg3,
|
|
+ unsigned long arg4, unsigned long arg5) {
|
|
+ int ret;
|
|
+ register int x0 __asm__("x0") = option;
|
|
+ register unsigned long x1 __asm__("x1") = arg2;
|
|
+ register unsigned long x2 __asm__("x2") = arg3;
|
|
+ register unsigned long x3 __asm__("x3") = arg4;
|
|
+ register unsigned long x4 __asm__("x4") = arg5;
|
|
+ register uint32_t w8 __asm__("w8") = 167;
|
|
+ __asm__ __volatile__("svc #0\n"
|
|
+ "mov %w0, w0"
|
|
+ : "=r"(ret), "+r"(x0), "+r"(x1)
|
|
+ : "r"(x2), "r"(x3), "r"(x4), "r"(w8)
|
|
+ : "cc", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+} // anonymous namespace
|
|
+
|
|
+#endif
|
|
diff --git a/bolt/runtime/sys_x86_64.h b/bolt/runtime/sys_x86_64.h
|
|
new file mode 100644
|
|
index 0000000..ca2c693
|
|
--- /dev/null
|
|
+++ b/bolt/runtime/sys_x86_64.h
|
|
@@ -0,0 +1,360 @@
|
|
+#ifndef LLVM_TOOLS_LLVM_BOLT_SYS_X86_64
|
|
+#define LLVM_TOOLS_LLVM_BOLT_SYS_X86_64
|
|
+
|
|
+// Save all registers while keeping 16B stack alignment
|
|
+#define SAVE_ALL \
|
|
+ "push %%rax\n" \
|
|
+ "push %%rbx\n" \
|
|
+ "push %%rcx\n" \
|
|
+ "push %%rdx\n" \
|
|
+ "push %%rdi\n" \
|
|
+ "push %%rsi\n" \
|
|
+ "push %%rbp\n" \
|
|
+ "push %%r8\n" \
|
|
+ "push %%r9\n" \
|
|
+ "push %%r10\n" \
|
|
+ "push %%r11\n" \
|
|
+ "push %%r12\n" \
|
|
+ "push %%r13\n" \
|
|
+ "push %%r14\n" \
|
|
+ "push %%r15\n" \
|
|
+ "sub $8, %%rsp\n"
|
|
+// Mirrors SAVE_ALL
|
|
+#define RESTORE_ALL \
|
|
+ "add $8, %%rsp\n" \
|
|
+ "pop %%r15\n" \
|
|
+ "pop %%r14\n" \
|
|
+ "pop %%r13\n" \
|
|
+ "pop %%r12\n" \
|
|
+ "pop %%r11\n" \
|
|
+ "pop %%r10\n" \
|
|
+ "pop %%r9\n" \
|
|
+ "pop %%r8\n" \
|
|
+ "pop %%rbp\n" \
|
|
+ "pop %%rsi\n" \
|
|
+ "pop %%rdi\n" \
|
|
+ "pop %%rdx\n" \
|
|
+ "pop %%rcx\n" \
|
|
+ "pop %%rbx\n" \
|
|
+ "pop %%rax\n"
|
|
+
|
|
+namespace {
|
|
+
|
|
+// Get the difference between runtime addrress of .text section and
|
|
+// static address in section header table. Can be extracted from arbitrary
|
|
+// pc value recorded at runtime to get the corresponding static address, which
|
|
+// in turn can be used to search for indirect call description. Needed because
|
|
+// indirect call descriptions are read-only non-relocatable data.
|
|
+uint64_t getTextBaseAddress() {
|
|
+ uint64_t DynAddr;
|
|
+ uint64_t StaticAddr;
|
|
+ __asm__ volatile("leaq __hot_end(%%rip), %0\n\t"
|
|
+ "movabsq $__hot_end, %1\n\t"
|
|
+ : "=r"(DynAddr), "=r"(StaticAddr));
|
|
+ return DynAddr - StaticAddr;
|
|
+}
|
|
+
|
|
+#define _STRINGIFY(x) #x
|
|
+#define STRINGIFY(x) _STRINGIFY(x)
|
|
+
|
|
+uint64_t __read(uint64_t fd, const void *buf, uint64_t count) {
|
|
+ uint64_t ret;
|
|
+#if defined(__APPLE__)
|
|
+#define READ_SYSCALL 0x2000003
|
|
+#else
|
|
+#define READ_SYSCALL 0
|
|
+#endif
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(READ_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd), "S"(buf), "d"(count)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __write(uint64_t fd, const void *buf, uint64_t count) {
|
|
+ uint64_t ret;
|
|
+#if defined(__APPLE__)
|
|
+#define WRITE_SYSCALL 0x2000004
|
|
+#else
|
|
+#define WRITE_SYSCALL 1
|
|
+#endif
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(WRITE_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd), "S"(buf), "d"(count)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void *__mmap(uint64_t addr, uint64_t size, uint64_t prot, uint64_t flags,
|
|
+ uint64_t fd, uint64_t offset) {
|
|
+#if defined(__APPLE__)
|
|
+#define MMAP_SYSCALL 0x20000c5
|
|
+#else
|
|
+#define MMAP_SYSCALL 9
|
|
+#endif
|
|
+ void *ret;
|
|
+ register uint64_t r8 asm("r8") = fd;
|
|
+ register uint64_t r9 asm("r9") = offset;
|
|
+ register uint64_t r10 asm("r10") = flags;
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(MMAP_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(addr), "S"(size), "d"(prot), "r"(r10), "r"(r8),
|
|
+ "r"(r9)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __munmap(void *addr, uint64_t size) {
|
|
+#if defined(__APPLE__)
|
|
+#define MUNMAP_SYSCALL 0x2000049
|
|
+#else
|
|
+#define MUNMAP_SYSCALL 11
|
|
+#endif
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(MUNMAP_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(addr), "S"(size)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __sigprocmask(int how, const void *set, void *oldset) {
|
|
+#if defined(__APPLE__)
|
|
+#define SIGPROCMASK_SYSCALL 0x2000030
|
|
+#else
|
|
+#define SIGPROCMASK_SYSCALL 14
|
|
+#endif
|
|
+ uint64_t ret;
|
|
+ register long r10 asm("r10") = sizeof(uint64_t);
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(SIGPROCMASK_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(how), "S"(set), "d"(oldset), "r"(r10)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getpid() {
|
|
+ uint64_t ret;
|
|
+#if defined(__APPLE__)
|
|
+#define GETPID_SYSCALL 20
|
|
+#else
|
|
+#define GETPID_SYSCALL 39
|
|
+#endif
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(GETPID_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ :
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __exit(uint64_t code) {
|
|
+#if defined(__APPLE__)
|
|
+#define EXIT_SYSCALL 0x2000001
|
|
+#else
|
|
+#define EXIT_SYSCALL 231
|
|
+#endif
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $" STRINGIFY(EXIT_SYSCALL) ", %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(code)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#if !defined(__APPLE__)
|
|
+// We use a stack-allocated buffer for string manipulation in many pieces of
|
|
+// this code, including the code that prints each line of the fdata file. This
|
|
+// buffer needs to accomodate large function names, but shouldn't be arbitrarily
|
|
+// large (dynamically allocated) for simplicity of our memory space usage.
|
|
+
|
|
+// Declare some syscall wrappers we use throughout this code to avoid linking
|
|
+// against system libc.
|
|
+uint64_t __open(const char *pathname, uint64_t flags, uint64_t mode) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $2, %%rax\n"
|
|
+ "syscall"
|
|
+ : "=a"(ret)
|
|
+ : "D"(pathname), "S"(flags), "d"(mode)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+long __getdents64(unsigned int fd, dirent64 *dirp, size_t count) {
|
|
+ long ret;
|
|
+ __asm__ __volatile__("movq $217, %%rax\n"
|
|
+ "syscall"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd), "S"(dirp), "d"(count)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __readlink(const char *pathname, char *buf, size_t bufsize) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $89, %%rax\n"
|
|
+ "syscall"
|
|
+ : "=a"(ret)
|
|
+ : "D"(pathname), "S"(buf), "d"(bufsize)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $8, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd), "S"(pos), "d"(whence)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __ftruncate(uint64_t fd, uint64_t length) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $77, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd), "S"(length)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __close(uint64_t fd) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $3, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __madvise(void *addr, size_t length, int advice) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $28, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(addr), "S"(length), "d"(advice)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __uname(struct UtsNameTy *Buf) {
|
|
+ int Ret;
|
|
+ __asm__ __volatile__("movq $63, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(Ret)
|
|
+ : "D"(Buf)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return Ret;
|
|
+}
|
|
+
|
|
+uint64_t __nanosleep(const timespec *req, timespec *rem) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $35, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(req), "S"(rem)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int64_t __fork() {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $57, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ :
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __mprotect(void *addr, size_t len, int prot) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $10, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(addr), "S"(len), "d"(prot)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getppid() {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $110, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ :
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __setpgid(uint64_t pid, uint64_t pgid) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $109, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(pid), "S"(pgid)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+uint64_t __getpgid(uint64_t pid) {
|
|
+ uint64_t ret;
|
|
+ __asm__ __volatile__("movq $121, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(pid)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __kill(uint64_t pid, int sig) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $62, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(pid), "S"(sig)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int __fsync(int fd) {
|
|
+ int ret;
|
|
+ __asm__ __volatile__("movq $74, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(ret)
|
|
+ : "D"(fd)
|
|
+ : "cc", "rcx", "r11", "memory");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+// %rdi %rsi %rdx %r10 %r8
|
|
+// sys_prctl int option unsigned unsigned unsigned unsigned
|
|
+// long arg2 long arg3 long arg4 long arg5
|
|
+int __prctl(int Option, unsigned long Arg2, unsigned long Arg3,
|
|
+ unsigned long Arg4, unsigned long Arg5) {
|
|
+ int Ret;
|
|
+ register long rdx asm("rdx") = Arg3;
|
|
+ register long r8 asm("r8") = Arg5;
|
|
+ register long r10 asm("r10") = Arg4;
|
|
+ __asm__ __volatile__("movq $157, %%rax\n"
|
|
+ "syscall\n"
|
|
+ : "=a"(Ret)
|
|
+ : "D"(Option), "S"(Arg2), "d"(rdx), "r"(r10), "r"(r8)
|
|
+ :);
|
|
+ return Ret;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+} // anonymous namespace
|
|
+
|
|
+#endif
|
|
diff --git a/bolt/test/AArch64/exclusive-instrument.s b/bolt/test/AArch64/exclusive-instrument.s
|
|
new file mode 100644
|
|
index 0000000..502dd83
|
|
--- /dev/null
|
|
+++ b/bolt/test/AArch64/exclusive-instrument.s
|
|
@@ -0,0 +1,39 @@
|
|
+// This test checks that the foo function having exclusive memory access
|
|
+// instructions won't be instrumented.
|
|
+
|
|
+// REQUIRES: system-linux,bolt-runtime,target=aarch64{{.*}}
|
|
+
|
|
+// RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
|
|
+// RUN: %s -o %t.o
|
|
+// RUN: %clang %cflags -fPIC -pie %t.o -o %t.exe -nostdlib -Wl,-q -Wl,-fini=dummy
|
|
+// RUN: llvm-bolt %t.exe -o %t.bolt -instrument -v=1 | FileCheck %s
|
|
+
|
|
+// CHECK: Function foo has exclusive instructions, skip instrumentation
|
|
+
|
|
+.global foo
|
|
+.type foo, %function
|
|
+foo:
|
|
+ ldaxr w9, [x10]
|
|
+ cbnz w9, .Lret
|
|
+ stlxr w12, w11, [x9]
|
|
+ cbz w12, foo
|
|
+ clrex
|
|
+.Lret:
|
|
+ ret
|
|
+.size foo, .-foo
|
|
+
|
|
+.global _start
|
|
+.type _start, %function
|
|
+_start:
|
|
+ cmp x0, #0
|
|
+ b.eq .Lexit
|
|
+ bl foo
|
|
+.Lexit:
|
|
+ ret
|
|
+.size _start, .-_start
|
|
+
|
|
+.global dummy
|
|
+.type dummy, %function
|
|
+dummy:
|
|
+ ret
|
|
+.size dummy, .-dummy
|
|
diff --git a/bolt/test/X86/asm-dump.c b/bolt/test/X86/asm-dump.c
|
|
index 5d85e2a..fdd448e 100644
|
|
--- a/bolt/test/X86/asm-dump.c
|
|
+++ b/bolt/test/X86/asm-dump.c
|
|
@@ -1,13 +1,14 @@
|
|
/**
|
|
* Test for asm-dump functionality.
|
|
*
|
|
- * REQUIRES: system-linux,bolt-runtime
|
|
+ * REQUIRES: x86_64-linux,bolt-runtime
|
|
*
|
|
* Compile the source
|
|
* RUN: %clang -fPIC %s -o %t.exe -Wl,-q
|
|
*
|
|
* Profile collection: instrument the binary
|
|
- * RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata -o %t.instr
|
|
+ * RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata -o \
|
|
+ * RUN: %t.instr
|
|
*
|
|
* Profile collection: run instrumented binary (and capture output)
|
|
* RUN: %t.instr > %t.result
|
|
diff --git a/bolt/test/X86/bolt-address-translation-internal-call.test b/bolt/test/X86/bolt-address-translation-internal-call.test
|
|
index edc32d9..24cb635 100644
|
|
--- a/bolt/test/X86/bolt-address-translation-internal-call.test
|
|
+++ b/bolt/test/X86/bolt-address-translation-internal-call.test
|
|
@@ -4,12 +4,12 @@
|
|
# internal calls) might create new blocks without a mapping to an
|
|
# input block.
|
|
|
|
-# REQUIRES: system-linux,bolt-runtime
|
|
+# REQUIRES: x86_64-linux,bolt-runtime
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
|
|
# Delete our BB symbols so BOLT doesn't mark them as entry points
|
|
# RUN: llvm-strip --strip-unneeded %t.o
|
|
-# RUN: %clang %t.o -o %t.exe -Wl,-q
|
|
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
|
|
|
|
# RUN: llvm-bolt --enable-bat %t.exe --relocs -o %t.out | FileCheck %s
|
|
# CHECK: BOLT-INFO: Wrote {{.*}} BAT maps
|
|
@@ -29,6 +29,7 @@ main:
|
|
push %rbx
|
|
sub $0x120,%rsp
|
|
mov $0x3,%rbx
|
|
+ movq rel(%rip), %rdi
|
|
.J1:
|
|
cmp $0x0,%rbx
|
|
je .J2
|
|
@@ -49,4 +50,8 @@ main:
|
|
.J4:
|
|
pop %rbp
|
|
retq
|
|
+end:
|
|
.size main, .-main
|
|
+
|
|
+ .data
|
|
+rel: .quad end
|
|
diff --git a/bolt/test/X86/instrumentation-eh_frame_hdr.cpp b/bolt/test/X86/instrumentation-eh_frame_hdr.cpp
|
|
index f6ebd6b..4ed8be4 100644
|
|
--- a/bolt/test/X86/instrumentation-eh_frame_hdr.cpp
|
|
+++ b/bolt/test/X86/instrumentation-eh_frame_hdr.cpp
|
|
@@ -1,7 +1,7 @@
|
|
// This test checks that .eh_frame_hdr address is in bounds of the last LOAD
|
|
// end address i.e. the section address is smaller then the LOAD end address.
|
|
|
|
-// REQUIRES: system-linux,bolt-runtime
|
|
+// REQUIRES: system-linux,bolt-runtime,target=x86_64{{.*}}
|
|
|
|
// RUN: %clangxx %cxxflags -static -Wl,-q %s -o %t.exe -Wl,--entry=_start
|
|
// RUN: llvm-bolt %t.exe -o %t.instr -instrument \
|
|
diff --git a/bolt/test/X86/internal-call-instrument.s b/bolt/test/X86/internal-call-instrument.s
|
|
index c137174..c393f1d 100644
|
|
--- a/bolt/test/X86/internal-call-instrument.s
|
|
+++ b/bolt/test/X86/internal-call-instrument.s
|
|
@@ -1,15 +1,23 @@
|
|
# This reproduces a bug with instrumentation crashes on internal call
|
|
|
|
-# REQUIRES: system-linux,bolt-runtime
|
|
+# REQUIRES: x86_64-linux,bolt-runtime,target=x86_64{{.*}}
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %s -o %t.o
|
|
# Delete our BB symbols so BOLT doesn't mark them as entry points
|
|
# RUN: llvm-strip --strip-unneeded %t.o
|
|
-# RUN: %clang %t.o -o %t.exe -Wl,-q
|
|
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
|
|
|
|
# RUN: llvm-bolt --instrument %t.exe --relocs -o %t.out
|
|
|
|
.text
|
|
+ .globl _start
|
|
+ .type _start, %function
|
|
+ .p2align 4
|
|
+_start:
|
|
+ call main
|
|
+ ret
|
|
+ .size _start, .-_start
|
|
+
|
|
.globl main
|
|
.type main, %function
|
|
.p2align 4
|
|
@@ -20,6 +28,7 @@ main:
|
|
push %rbx
|
|
sub $0x120,%rsp
|
|
mov $0x3,%rbx
|
|
+ movq rel(%rip), %rdi
|
|
.J1:
|
|
cmp $0x0,%rbx
|
|
je .J2
|
|
@@ -40,4 +49,15 @@ main:
|
|
.J4:
|
|
pop %rbp
|
|
retq
|
|
+end:
|
|
.size main, .-main
|
|
+
|
|
+ .globl _fini
|
|
+ .type _fini, %function
|
|
+ .p2align 4
|
|
+_fini:
|
|
+ hlt
|
|
+ .size _fini, .-_fini
|
|
+
|
|
+ .data
|
|
+rel: .quad end
|
|
diff --git a/bolt/test/X86/tail-duplication-pass.s b/bolt/test/X86/tail-duplication-pass.s
|
|
index 677f498..ed50cc5 100644
|
|
--- a/bolt/test/X86/tail-duplication-pass.s
|
|
+++ b/bolt/test/X86/tail-duplication-pass.s
|
|
@@ -7,12 +7,21 @@
|
|
# RUN: llvm-bolt %t.exe --data %t.fdata --reorder-blocks=ext-tsp \
|
|
# RUN: --print-finalized --tail-duplication=moderate \
|
|
# RUN: --tail-duplication-minimum-offset=1 -o %t.out | FileCheck %s
|
|
+# RUN: llvm-bolt %t.exe --data %t.fdata --print-finalized \
|
|
+# RUN: --tail-duplication=aggressive --tail-duplication-minimum-offset=1 \
|
|
+# RUN: -o %t.out | FileCheck %s --check-prefix CHECK-NOLOOP
|
|
|
|
# FDATA: 1 main 2 1 main #.BB2# 0 10
|
|
# FDATA: 1 main 4 1 main #.BB2# 0 20
|
|
# CHECK: BOLT-INFO: tail duplication modified 1 ({{.*}}%) functions; duplicated 1 blocks (1 bytes) responsible for {{.*}} dynamic executions ({{.*}}% of all block executions)
|
|
# CHECK: BB Layout : .LBB00, .Ltail-dup0, .Ltmp0, .Ltmp1
|
|
|
|
+# Check that the successor of Ltail-dup0 is .LBB00, not itself.
|
|
+# CHECK-NOLOOP: .Ltail-dup0 (1 instructions, align : 1)
|
|
+# CHECK-NOLOOP: Predecessors: .LBB00
|
|
+# CHECK-NOLOOP: retq
|
|
+# CHECK-NOLOOP: .Ltmp0 (1 instructions, align : 1)
|
|
+
|
|
.text
|
|
.globl main
|
|
.type main, %function
|
|
diff --git a/bolt/test/assume-abi.test b/bolt/test/assume-abi.test
|
|
new file mode 100644
|
|
index 0000000..688ab01
|
|
--- /dev/null
|
|
+++ b/bolt/test/assume-abi.test
|
|
@@ -0,0 +1,7 @@
|
|
+# Validate the usage of the `--assume-abi` option in conjunction with
|
|
+# options related to the RegAnalysis Pass.
|
|
+
|
|
+REQUIRES: system-linux
|
|
+
|
|
+RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q
|
|
+RUN: llvm-bolt %t -o %t.bolt --assume-abi --indirect-call-promotion=all
|
|
diff --git a/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s b/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s
|
|
new file mode 100644
|
|
index 0000000..fa1ac35
|
|
--- /dev/null
|
|
+++ b/bolt/test/runtime/AArch64/Inputs/basic-instrumentation.s
|
|
@@ -0,0 +1,9 @@
|
|
+ .globl main
|
|
+ .type main, %function
|
|
+main:
|
|
+ sub sp, sp, #16
|
|
+ mov w0, wzr
|
|
+ str wzr, [sp, #12]
|
|
+ add sp, sp, #16
|
|
+ ret
|
|
+.size main, .-main
|
|
diff --git a/bolt/test/runtime/AArch64/basic-instrumentation.test b/bolt/test/runtime/AArch64/basic-instrumentation.test
|
|
new file mode 100644
|
|
index 0000000..0f77b0c
|
|
--- /dev/null
|
|
+++ b/bolt/test/runtime/AArch64/basic-instrumentation.test
|
|
@@ -0,0 +1,22 @@
|
|
+# Try to instrument a very fast test. Input bin will not execute any code during
|
|
+# runtime besides returning zero in main, so it is a good trivial case.
|
|
+REQUIRES: system-linux,bolt-runtime
|
|
+
|
|
+RUN: %clang %p/Inputs/basic-instrumentation.s -Wl,-q -o %t.exe
|
|
+RUN: llvm-bolt %t.exe -o %t --instrument \
|
|
+RUN: --instrumentation-file=%t \
|
|
+RUN: --instrumentation-file-append-pid
|
|
+
|
|
+# Execute program to collect profile
|
|
+RUN: rm %t.*.fdata || echo Nothing to remove
|
|
+RUN: %t
|
|
+
|
|
+# Profile should be written to %t.PID.fdata, check it
|
|
+RUN: mv %t.*.fdata %t.fdata
|
|
+RUN: cat %t.fdata | FileCheck -check-prefix=CHECK %s
|
|
+
|
|
+# Check BOLT works with this profile
|
|
+RUN: llvm-bolt %t.exe --data %t.fdata -o %t.2 --reorder-blocks=cache
|
|
+
|
|
+# The instrumented profile should at least say main was called once
|
|
+CHECK: main 0 0 1{{$}}
|
|
diff --git a/bolt/test/runtime/AArch64/instrumentation-ind-call.c b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
|
|
new file mode 100644
|
|
index 0000000..76ee8c0
|
|
--- /dev/null
|
|
+++ b/bolt/test/runtime/AArch64/instrumentation-ind-call.c
|
|
@@ -0,0 +1,38 @@
|
|
+#include <stdio.h>
|
|
+
|
|
+typedef int (*func_ptr)(int, int);
|
|
+
|
|
+int add(int a, int b) { return a + b; }
|
|
+
|
|
+int main() {
|
|
+ func_ptr fun;
|
|
+ fun = add;
|
|
+ int sum = fun(10, 20); // indirect call to 'add'
|
|
+ printf("The sum is: %d\n", sum);
|
|
+ return 0;
|
|
+}
|
|
+/*
|
|
+REQUIRES: system-linux,bolt-runtime
|
|
+
|
|
+RUN: %clang %cflags %s -o %t.exe -Wl,-q -nopie -fpie
|
|
+
|
|
+RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \
|
|
+RUN: -o %t.instrumented
|
|
+
|
|
+# Instrumented program needs to finish returning zero
|
|
+RUN: %t.instrumented | FileCheck %s -check-prefix=CHECK-OUTPUT
|
|
+
|
|
+# Test that the instrumented data makes sense
|
|
+RUN: llvm-bolt %t.exe -o %t.bolted --data %t.fdata \
|
|
+RUN: --reorder-blocks=ext-tsp --reorder-functions=hfsort+ \
|
|
+RUN: --print-only=main --print-finalized | FileCheck %s
|
|
+
|
|
+RUN: %t.bolted | FileCheck %s -check-prefix=CHECK-OUTPUT
|
|
+
|
|
+CHECK-OUTPUT: The sum is: 30
|
|
+
|
|
+# Check that our indirect call has 1 hit recorded in the fdata file and that
|
|
+# this was processed correctly by BOLT
|
|
+CHECK: blr x8 # CallProfile: 1 (0 misses) :
|
|
+CHECK-NEXT: { add: 1 (0 misses) }
|
|
+*/
|
|
diff --git a/bolt/test/runtime/X86/Inputs/exceptions_split.cpp b/bolt/test/runtime/Inputs/exceptions_split.cpp
|
|
similarity index 85%
|
|
rename from bolt/test/runtime/X86/Inputs/exceptions_split.cpp
|
|
rename to bolt/test/runtime/Inputs/exceptions_split.cpp
|
|
index 2c136b9..de81adf 100644
|
|
--- a/bolt/test/runtime/X86/Inputs/exceptions_split.cpp
|
|
+++ b/bolt/test/runtime/Inputs/exceptions_split.cpp
|
|
@@ -3,31 +3,25 @@
|
|
//
|
|
// Record performance data with no args. Run test with 2 args.
|
|
|
|
-#include <stdio.h>
|
|
#include <stdint.h>
|
|
+#include <stdio.h>
|
|
|
|
-int foo()
|
|
-{
|
|
- return 0;
|
|
-}
|
|
+int foo() { return 0; }
|
|
|
|
void bar(int a) {
|
|
if (a > 2 && a % 2)
|
|
throw new int();
|
|
}
|
|
|
|
-void filter_only(){
|
|
- foo();
|
|
-}
|
|
+void filter_only() { foo(); }
|
|
|
|
-int main(int argc, char **argv)
|
|
-{
|
|
+int main(int argc, char **argv) {
|
|
unsigned r = 0;
|
|
|
|
uint64_t limit = (argc >= 2 ? 10 : 5000);
|
|
for (uint64_t i = 0; i < limit; ++i) {
|
|
i += foo();
|
|
- try {
|
|
+ try {
|
|
bar(argc);
|
|
try {
|
|
if (argc >= 2)
|
|
diff --git a/bolt/test/runtime/X86/instrumentation-tail-call.s b/bolt/test/runtime/X86/instrumentation-tail-call.s
|
|
index 792d084..dfb12f0 100644
|
|
--- a/bolt/test/runtime/X86/instrumentation-tail-call.s
|
|
+++ b/bolt/test/runtime/X86/instrumentation-tail-call.s
|
|
@@ -14,6 +14,9 @@
|
|
|
|
# CHECK: leaq 0x80(%rsp), %rsp
|
|
|
|
+# RUN: FileCheck %s --input-file %t.fdata --check-prefix=CHECK-FDATA
|
|
+# CHECK-FDATA: 1 main {{.*}} 1 targetFunc 0 0 1
|
|
+
|
|
.text
|
|
.globl main
|
|
.type main, %function
|
|
@@ -32,7 +35,8 @@ main:
|
|
movq %rbp, %rsp
|
|
pop %rbp
|
|
mov -0x10(%rsp),%rax
|
|
- jmp targetFunc
|
|
+ test %rsp, %rsp
|
|
+ jne targetFunc
|
|
|
|
.LBBerror:
|
|
addq $0x20, %rsp
|
|
diff --git a/bolt/test/runtime/X86/exceptions-instrumentation.test b/bolt/test/runtime/exceptions-instrumentation.test
|
|
similarity index 100%
|
|
rename from bolt/test/runtime/X86/exceptions-instrumentation.test
|
|
rename to bolt/test/runtime/exceptions-instrumentation.test
|
|
diff --git a/bolt/test/runtime/X86/pie-exceptions-split.test b/bolt/test/runtime/pie-exceptions-split.test
|
|
similarity index 95%
|
|
rename from bolt/test/runtime/X86/pie-exceptions-split.test
|
|
rename to bolt/test/runtime/pie-exceptions-split.test
|
|
index 124fef6..30f2d02 100644
|
|
--- a/bolt/test/runtime/X86/pie-exceptions-split.test
|
|
+++ b/bolt/test/runtime/pie-exceptions-split.test
|
|
@@ -16,9 +16,9 @@ RUN: --print-only=main 2>&1 | FileCheck %s
|
|
## All calls to printf() should be from exception handling code that was
|
|
## recorded as cold during the profile collection run. Check that the calls
|
|
## are placed after the split point.
|
|
-CHECK-NOT: callq printf
|
|
+CHECK-NOT: printf
|
|
CHECK: HOT-COLD SPLIT POINT
|
|
-CHECK: callq printf
|
|
+CHECK: printf
|
|
|
|
## Verify the output still executes correctly when the exception path is being
|
|
## taken.
|
|
--
|
|
2.39.5 (Apple Git-154)
|
|
|