From 25c9e9c7d4532f6e8962a25c5c7087bf3e3b8445 Mon Sep 17 00:00:00 2001 From: rfwang07 Date: Thu, 25 Jul 2024 14:45:53 +0800 Subject: [PATCH] Add CFG block count correction optimization. --- bolt/include/bolt/Core/BinaryBasicBlock.h | 59 +- .../bolt/Core/BinaryBasicBlockFeature.h | 268 ++++++++ bolt/include/bolt/Passes/FeatureMiner.h | 176 ++++++ bolt/include/bolt/Passes/StaticBranchInfo.h | 108 ++++ bolt/include/bolt/Profile/DataReader.h | 93 ++- bolt/lib/Core/BinaryBasicBlockFeature.cpp | 21 + bolt/lib/Core/CMakeLists.txt | 1 + bolt/lib/Passes/CMakeLists.txt | 2 + bolt/lib/Passes/FeatureMiner.cpp | 572 ++++++++++++++++++ bolt/lib/Passes/StaticBranchInfo.cpp | 143 +++++ bolt/lib/Profile/DataReader.cpp | 120 +++- bolt/lib/Rewrite/RewriteInstance.cpp | 6 + 12 files changed, 1557 insertions(+), 12 deletions(-) create mode 100644 bolt/include/bolt/Core/BinaryBasicBlockFeature.h create mode 100644 bolt/include/bolt/Passes/FeatureMiner.h create mode 100644 bolt/include/bolt/Passes/StaticBranchInfo.h create mode 100644 bolt/lib/Core/BinaryBasicBlockFeature.cpp create mode 100644 bolt/lib/Passes/FeatureMiner.cpp create mode 100644 bolt/lib/Passes/StaticBranchInfo.cpp diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index 02be9c1d4..a39d38d6b 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -15,6 +15,7 @@ #ifndef BOLT_CORE_BINARY_BASIC_BLOCK_H #define BOLT_CORE_BINARY_BASIC_BLOCK_H +#include "bolt/Core/BinaryBasicBlockFeature.h" #include "bolt/Core/FunctionLayout.h" #include "bolt/Core/MCPlus.h" #include "llvm/ADT/GraphTraits.h" @@ -25,6 +26,7 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include namespace llvm { class MCCodeEmitter; @@ -147,6 +149,12 @@ private: /// Last computed hash value. mutable uint64_t Hash{0}; + std::set ChildrenSet; + + std::set ParentSet; + + BinaryBasicBlockFeature BlockFeatures; + private: BinaryBasicBlock() = delete; BinaryBasicBlock(const BinaryBasicBlock &) = delete; @@ -385,11 +393,14 @@ public: /// If the basic block ends with a conditional branch (possibly followed by /// an unconditional branch) and thus has 2 successors, return a successor /// corresponding to a jump condition which could be true or false. - /// Return nullptr if the basic block does not have a conditional jump. + /// Return the only successor if it's followed by an unconditional branch. + /// Return nullptr otherwise. BinaryBasicBlock *getConditionalSuccessor(bool Condition) { - if (succ_size() != 2) - return nullptr; - return Successors[Condition == true ? 0 : 1]; + if (succ_size() == 2) + return Successors[Condition == true ? 0 : 1]; + if (succ_size() == 1) + return Successors[0]; + return nullptr; } const BinaryBasicBlock *getConditionalSuccessor(bool Condition) const { @@ -410,6 +421,13 @@ public: return const_cast(this)->getFallthrough(); } + /// Return branch info corresponding to only branch. + const BinaryBranchInfo &getOnlyBranchInfo() const { + assert(BranchInfo.size() > 0 && + "could only be called for blocks with at least 1 successor"); + return BranchInfo[0]; + }; + /// Return branch info corresponding to a taken branch. const BinaryBranchInfo &getTakenBranchInfo() const { assert(BranchInfo.size() == 2 && @@ -818,6 +836,36 @@ public: OutputAddressRange.second = Address; } + /// Sets features of this BB. + void setFeatures(BinaryBasicBlockFeature BBF) { + BlockFeatures = BBF; + } + + /// Gets numberic features of this BB. + BinaryBasicBlockFeature getFeatures() { + return BlockFeatures; + } + + /// Gets children sets of this BB. + std::set getChildrenSet() { + return ChildrenSet; + } + + /// Gets parent sets of this BB. + std::set getParentSet() { + return ParentSet; + } + + /// Inserts children sets of this BB. + void insertChildrenSet(BinaryBasicBlock *Node) { + ChildrenSet.insert(Node); + } + + /// Inserts parent sets of this BB. + void insertParentSet(BinaryBasicBlock *Node) { + ParentSet.insert(Node); + } + /// Gets the memory address range of this BB in the input binary. std::pair getInputAddressRange() const { return InputRange; @@ -991,7 +1039,8 @@ private: #if defined(LLVM_ON_UNIX) /// Keep the size of the BinaryBasicBlock within a reasonable size class /// (jemalloc bucket) on Linux -static_assert(sizeof(BinaryBasicBlock) <= 256); +/// The size threshod is expanded from 256 to 2048 to contain the extra BB features +static_assert(sizeof(BinaryBasicBlock) <= 2048, ""); #endif bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS); diff --git a/bolt/include/bolt/Core/BinaryBasicBlockFeature.h b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h new file mode 100644 index 000000000..2b4809b1a --- /dev/null +++ b/bolt/include/bolt/Core/BinaryBasicBlockFeature.h @@ -0,0 +1,268 @@ +//===- bolt/Core/BinaryBasicBlockFeature.h - Low-level basic block -----*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Features of BinaryBasicBlock +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H +#define BOLT_CORE_BINARY_BASIC_BLOCK_FEATURE_H + +#include "bolt/Core/FunctionLayout.h" +#include "bolt/Core/MCPlus.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { + +namespace bolt { + +class BinaryBasicBlockFeature { + +public: + int32_t Opcode; + + int16_t Direction; + + int32_t CmpOpcode; + + int16_t LoopHeader; + + int16_t ProcedureType; + + int64_t Count; + + int64_t FallthroughCount; + + int64_t TotalLoops; + + int64_t LoopDepth; + + int64_t LoopNumBlocks; + + int64_t LocalExitingBlock; + + int64_t LocalLatchBlock; + + int64_t LocalLoopHeader; + + int64_t Call; + + int64_t DeltaTaken; + + int64_t NumLoads; + + int64_t NumCalls; + + int64_t OperandRAType; + + int64_t OperandRBType; + + int64_t BasicBlockSize; + + int64_t NumBasicBlocks; + + int64_t HasIndirectCalls; + + std::vector EndOpcode_vec; + + std::vector LoopHeader_vec; + + std::vector Backedge_vec; + + std::vector Exit_vec; + + std::vector Call_vec; + + std::vector BasicBlockSize_vec; + + std::vector InferenceFeatures; + + uint64_t FuncExec; + + int32_t ParentChildNum; + + int32_t ParentCount; + + int32_t ChildParentNum; + + int32_t ChildCount; + +public: + void setOpcode(const int32_t &BlockOpcode) { Opcode = BlockOpcode; } + + void setDirection(const int16_t &BlockDirection) { + Direction = BlockDirection; + } + + void setCmpOpcode(const int32_t &BlockCmpOpcode) { + CmpOpcode = BlockCmpOpcode; + } + + void setLoopHeader(const int16_t &BlockLoopHeader) { + LoopHeader = BlockLoopHeader; + } + + void setProcedureType(const int16_t &BlockProcedureType) { + ProcedureType = BlockProcedureType; + } + + void setCount(const int64_t &BlockCount) { Count = BlockCount; } + + void setFallthroughCount(const int64_t &BlockFallthroughCount) { + FallthroughCount = BlockFallthroughCount; + } + + void setTotalLoops(const int64_t &BlockTotalLoops) { + TotalLoops = BlockTotalLoops; + } + + void setLoopDepth(const int64_t &BlockLoopDepth) { + LoopDepth = BlockLoopDepth; + } + + void setLoopNumBlocks(const int64_t &BlockLoopNumBlocks) { + LoopNumBlocks = BlockLoopNumBlocks; + } + + void setLocalExitingBlock(const int64_t &BlockLocalExitingBlock) { + LocalExitingBlock = BlockLocalExitingBlock; + } + + void setLocalLatchBlock(const int64_t &BlockLocalLatchBlock) { + LocalLatchBlock = BlockLocalLatchBlock; + } + + void setLocalLoopHeader(const int64_t &BlockLocalLoopHeader) { + LocalLoopHeader = BlockLocalLoopHeader; + } + + void setDeltaTaken(const int64_t &BlockDeltaTaken) { + DeltaTaken = BlockDeltaTaken; + } + + void setNumLoads(const int64_t &BlockNumLoads) { NumLoads = BlockNumLoads; } + + void setNumCalls(const int64_t &BlockNumCalls) { NumCalls = BlockNumCalls; } + + void setOperandRAType(const int64_t &BlockOperandRAType) { + OperandRAType = BlockOperandRAType; + } + + void setOperandRBType(const int64_t &BlockOperandRBType) { + OperandRBType = BlockOperandRBType; + } + + void setBasicBlockSize(const int64_t &BlockBasicBlockSize) { + BasicBlockSize = BlockBasicBlockSize; + } + + void setNumBasicBlocks(const int64_t &BlockNumBasicBlocks) { + NumBasicBlocks = BlockNumBasicBlocks; + } + + void setHasIndirectCalls(const int64_t &BlockHasIndirectCalls) { + HasIndirectCalls = BlockHasIndirectCalls; + } + + void setEndOpcodeVec(const int32_t &EndOpcode) { + EndOpcode_vec.push_back(EndOpcode); + } + + void setLoopHeaderVec(const int16_t &LoopHeader) { + LoopHeader_vec.push_back(LoopHeader); + } + + void setBackedgeVec(const int16_t &Backedge) { + Backedge_vec.push_back(Backedge); + } + + void setExitVec(const int16_t &Exit) { Exit_vec.push_back(Exit); } + + void setCallVec(const int16_t &Call) { Call_vec.push_back(Call); } + + void setBasicBlockSizeVec(const int64_t &BasicBlockSize) { + BasicBlockSize_vec.push_back(BasicBlockSize); + } + + void setFunExec(const uint64_t &BlockFuncExec) { FuncExec = BlockFuncExec; } + + void setParentChildNum(const int32_t &BlockParentChildNum) { + ParentChildNum = BlockParentChildNum; + } + + void setParentCount(const int32_t &BlockParentCount) { + ParentCount = BlockParentCount; + } + + void setChildParentNum(const int32_t &BlockChildParentNum) { + ChildParentNum = BlockChildParentNum; + } + + void setChildCount(const int32_t &BlockChildCount) { + ChildCount = BlockChildCount; + } + + void setInferenceFeatures() { + + if (Count == -1 || FallthroughCount == -1) { + return; + } + if (ParentChildNum == -1 && ParentCount == -1 && ChildParentNum == -1 && + ChildCount == -1) { + return; + } + + InferenceFeatures.push_back(static_cast(Direction)); + InferenceFeatures.push_back(static_cast(LoopHeader)); + InferenceFeatures.push_back(static_cast(ProcedureType)); + InferenceFeatures.push_back(static_cast(OperandRAType)); + InferenceFeatures.push_back(static_cast(OperandRBType)); + InferenceFeatures.push_back(static_cast(LoopHeader_vec[0])); + InferenceFeatures.push_back(static_cast(Backedge_vec[0])); + InferenceFeatures.push_back(static_cast(Exit_vec[0])); + InferenceFeatures.push_back(static_cast(LoopHeader_vec[1])); + InferenceFeatures.push_back(static_cast(Call_vec[0])); + InferenceFeatures.push_back(static_cast(LocalExitingBlock)); + InferenceFeatures.push_back(static_cast(HasIndirectCalls)); + InferenceFeatures.push_back(static_cast(LocalLatchBlock)); + InferenceFeatures.push_back(static_cast(LocalLoopHeader)); + InferenceFeatures.push_back(static_cast(Opcode)); + InferenceFeatures.push_back(static_cast(CmpOpcode)); + InferenceFeatures.push_back(static_cast(EndOpcode_vec[0])); + InferenceFeatures.push_back(static_cast(EndOpcode_vec[1])); + InferenceFeatures.push_back(static_cast(FuncExec)); + InferenceFeatures.push_back(static_cast(NumBasicBlocks)); + InferenceFeatures.push_back(static_cast(BasicBlockSize)); + InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[0])); + InferenceFeatures.push_back(static_cast(BasicBlockSize_vec[1])); + InferenceFeatures.push_back(static_cast(LoopNumBlocks)); + InferenceFeatures.push_back(static_cast(NumLoads)); + InferenceFeatures.push_back(static_cast(NumCalls)); + InferenceFeatures.push_back(static_cast(TotalLoops)); + InferenceFeatures.push_back(static_cast(DeltaTaken)); + InferenceFeatures.push_back(static_cast(LoopDepth)); + InferenceFeatures.push_back(static_cast(ParentChildNum)); + InferenceFeatures.push_back(static_cast(ParentCount)); + InferenceFeatures.push_back(static_cast(ChildParentNum)); + InferenceFeatures.push_back(static_cast(ChildCount)); + } + + std::vector getInferenceFeatures() { return InferenceFeatures; } +}; +} // namespace bolt +} // namespace llvm + +#endif \ No newline at end of file diff --git a/bolt/include/bolt/Passes/FeatureMiner.h b/bolt/include/bolt/Passes/FeatureMiner.h new file mode 100644 index 000000000..6170aa62d --- /dev/null +++ b/bolt/include/bolt/Passes/FeatureMiner.h @@ -0,0 +1,176 @@ +//===--- Passes/FeatureMiner.h +//---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ + +#include "bolt/Core/BinaryData.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" +#include "bolt/Passes/BinaryPasses.h" +#include "bolt/Passes/DominatorAnalysis.h" +#include "bolt/Passes/StaticBranchInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include + +namespace llvm { +namespace bolt { + +class FeatureMiner : public BinaryFunctionPass { +private: + std::unique_ptr SBI; + /// BasicBlockInfo - This structure holds feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + struct BasicBlockInfo { + std::optional BranchDominates; // 1 - dominates, 0 - does not dominate + std::optional + BranchPostdominates; // 1 - postdominates, 0 - does not PD + std::optional LoopHeader; // 1 - loop header, 0 - not a loop header + std::optional Backedge; // 1 - loop back, 0 - not a loop back + std::optional Exit; // 1 - loop exit, 0 - not a loop exit + std::optional Call; // 1 - program call, 0 - not a program call + std::optional NumCalls; + std::optional NumLoads; + std::optional NumStores; + std::optional EndOpcode; // 0 = NOTHING + std::string EndOpcodeStr = "UNDEF"; + std::optional BasicBlockSize; + std::string FromFunName = "UNDEF"; + uint32_t FromBb; + std::string ToFunName = "UNDEF"; + uint32_t ToBb; + + std::optional NumCallsExit; + std::optional NumCallsInvoke; + std::optional NumIndirectCalls; + std::optional NumTailCalls; + }; + + typedef std::unique_ptr BBIPtr; + + /// BranchFeaturesInfo - This structure holds feature information about each + /// two-way branch from the program. + struct BranchFeaturesInfo { + std::string OpcodeStr = "UNDEF"; + std::string CmpOpcodeStr = "UNDEF"; + bool Simple = 0; + + std::optional Opcode; + std::optional CmpOpcode; + std::optional Count; + std::optional MissPredicted; + std::optional FallthroughCount; + std::optional FallthroughMissPredicted; + BBIPtr TrueSuccessor = std::make_unique(); + BBIPtr FalseSuccessor = std::make_unique(); + std::optional ProcedureType; // 1 - Leaf, 0 - NonLeaf, 2 - CallSelf + std::optional LoopHeader; // 1 — loop header, 0 - not a loop header + std::optional Direction; // 1 - Forward Branch, 0 - Backward Branch + + std::optional NumOuterLoops; + std::optional TotalLoops; + std::optional MaximumLoopDepth; + std::optional LoopDepth; + std::optional LoopNumExitEdges; + std::optional LoopNumExitBlocks; + std::optional LoopNumExitingBlocks; + std::optional LoopNumLatches; + std::optional LoopNumBlocks; + std::optional LoopNumBackEdges; + std::optional NumLoads; + std::optional NumStores; + + std::optional LocalExitingBlock; + std::optional LocalLatchBlock; + std::optional LocalLoopHeader; + std::optional Call; + + std::optional NumCalls; + std::optional NumCallsExit; + std::optional NumCallsInvoke; + std::optional NumIndirectCalls; + std::optional NumTailCalls; + std::optional NumSelfCalls; + + std::optional NumBasicBlocks; + + std::optional DeltaTaken; + + std::optional OperandRAType; + std::optional OperandRBType; + + std::optional BasicBlockSize; + + std::optional BranchOffset; + }; + + typedef std::unique_ptr BFIPtr; + + std::vector BranchesInfoSet; + + /// getProcedureType - Determines which category the function falls into: + /// Leaf, Non-leaf or Calls-self. + int8_t getProcedureType(BinaryFunction &Function, BinaryContext &BC); + + /// addSuccessorInfo - Discovers feature information for the target successor + /// basic block, and inserts it into the static branch info container. + void addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, + BinaryContext &BC, BinaryBasicBlock &BB, bool SuccType); + + /// extractFeatures - Extracts the feature information for each two-way branch + /// from the program. + void extractFeatures(BinaryFunction &Function, BinaryContext &BC); + + void generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, + BFIPtr const &BFI, int Index); + /// dumpSuccessorFeatures - Dumps the feature information about the target + /// BasicBlock of either the taken or the fallthrough paths of a given branch. + void generateSuccessorFeatures(BBIPtr &Successor, + BinaryBasicBlockFeature *BBF); + + /// dumpFeatures - Dumps the feature information about each two-way branch + /// from the program. + void dumpFeatures(raw_ostream &Printer, uint64_t FunctionAddress, + uint64_t FunctionFrequency); + + /// dumpProfileData - Dumps a limited version of the inout profile data + /// that contains only profile for conditional branches, unconditional + /// branches and terminators that aren't branches. + void dumpProfileData(BinaryFunction &Function, raw_ostream &Printer); + +public: + explicit FeatureMiner(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + std::ofstream trainPrinter; + + const char *getName() const override { return "feature-miner"; } + + void runOnFunctions(BinaryContext &BC) override; + void inferenceFeatures(BinaryFunction &Function); + void generateProfileFeatures(BinaryBasicBlock *BB, + BinaryBasicBlockFeature *BBF); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_FEATUREMINER_H_ */ diff --git a/bolt/include/bolt/Passes/StaticBranchInfo.h b/bolt/include/bolt/Passes/StaticBranchInfo.h new file mode 100644 index 000000000..8de8df793 --- /dev/null +++ b/bolt/include/bolt/Passes/StaticBranchInfo.h @@ -0,0 +1,108 @@ +//===------ Passes/StaticBranchInfo.h -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ +#define LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ + +#include "bolt/Core/BinaryContext.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/BinaryLoop.h" +#include "llvm/MC/MCSymbol.h" +#include + +namespace llvm { +namespace bolt { + +class StaticBranchInfo { + +public: + /// An edge indicates that a control flow may go from a basic block (source) + /// to an other one (destination), and this pair of basic blocks will be used + /// to index maps and retrieve content of sets. + typedef std::pair Edge; + +private: + /// Holds the loop headers of a given function. + DenseSet LoopHeaders; + + /// Holds the loop backedges of a given function. + DenseSet BackEdges; + + /// Holds the loop exit edges of a given function. + DenseSet ExitEdges; + + /// Holds the basic blocks of a given function + /// that contains at least one call instructions. + DenseSet CallSet; + + /// Holds the basic blocks of a given function + /// that contains at least one store instructions. + DenseSet StoreSet; + + unsigned NumLoads; + unsigned NumStores; + +public: + unsigned getNumLoads() { return NumLoads; } + + unsigned getNumStores() { return NumStores; } + + /// findLoopEdgesInfo - Finds all loop back edges, loop exit eges + /// and loop headers within the function. + void findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo); + + /// findBasicBlockInfo - Finds all call and store instructions within + /// the basic blocks of a given function. + void findBasicBlockInfo(const BinaryFunction &Function, BinaryContext &BC); + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const Edge &CFGEdge) const; + + /// isBackEdge - Checks if the edge is a loop back edge. + bool isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryLoop::Edge &CFGEdge) const; + + /// isExitEdge - Checks if the edge is a loop exit edge. + bool isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const; + + /// isLoopHeader - Checks if the basic block is a loop header. + bool isLoopHeader(const BinaryBasicBlock *BB) const; + + /// hasCallInst - Checks if the basic block has a call instruction. + bool hasCallInst(const BinaryBasicBlock *BB) const; + + /// hasStoreInst - Checks if the basic block has a store instruction. + bool hasStoreInst(const BinaryBasicBlock *BB) const; + + /// countBackEdges - Compute the number of BB's successor that are back edges. + unsigned countBackEdges(BinaryBasicBlock *BB) const; + + /// countExitEdges - Compute the number of BB's successor that are exit edges. + unsigned countExitEdges(BinaryBasicBlock *BB) const; + + /// clear - Cleans up all the content from the data structs used. + void clear(); +}; + +} // namespace bolt +} // namespace llvm + +#endif /* LLVM_TOOLS_LLVM_BOLT_PASSES_STATICBRANCHINFO_H_ */ diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 916b4f7e2..bf732d47c 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -22,6 +22,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include @@ -44,6 +45,15 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LBREntry &LBR) { return OS; } +extern "C" { +typedef void *(*CreateONNXRunnerFunc)(const char *); +typedef void (*DeleteONNXRunnerFunc)(void *); +typedef std::vector (*RunONNXModelFunc)(void *, + const std::vector &, + const std::vector &, + const std::vector &, int); +} + struct Location { bool IsSymbol; StringRef Name; @@ -263,7 +273,8 @@ struct FuncSampleData { class DataReader : public ProfileReaderBase { public: explicit DataReader(StringRef Filename) - : ProfileReaderBase(Filename), Diag(errs()) {} + : ProfileReaderBase(Filename), Diag(errs()), onnxRunner(nullptr), + libHandle(nullptr), handleOnnxRuntime(nullptr) {} StringRef getReaderName() const override { return "branch profile reader"; } @@ -282,7 +293,87 @@ public: /// Return all event names used to collect this profile StringSet<> getEventNames() const override { return EventNames; } + ~DataReader() { + // delete onnxrunner; + if (onnxRunner && libHandle && handleOnnxRuntime) { + DeleteONNXRunnerFunc deleteONNXRunner = + (DeleteONNXRunnerFunc)dlsym(libHandle, "deleteONNXRunner"); + deleteONNXRunner(onnxRunner); + dlclose(libHandle); + dlclose(handleOnnxRuntime); + } + } + + /// Initialize the onnxruntime model. + void initializeONNXRunner(const std::string &modelPath) { + if (!onnxRunner && !libHandle && !handleOnnxRuntime) { + handleOnnxRuntime = + dlopen("libonnxruntime.so", RTLD_LAZY | RTLD_GLOBAL); + if (handleOnnxRuntime == nullptr) { + outs() << "error: llvm-bolt failed during loading onnxruntime.so.\n"; + exit(1); + } + libHandle = dlopen("libONNXRunner.so", RTLD_LAZY); + if (libHandle == nullptr) { + outs() << "error: llvm-bolt failed during loading libONNXRunner.so.\n"; + exit(1); + } + CreateONNXRunnerFunc createONNXRunner = + (CreateONNXRunnerFunc)dlsym(libHandle, "createONNXRunner"); + onnxRunner = createONNXRunner(modelPath.c_str()); + } + } + + /// Inference step for predicting the BB counts based on the BB features. + float ONNXInference(const std::vector &input_string, + const std::vector &input_int64, + const std::vector &input_float, int batch_size = 1) { + if (onnxRunner && libHandle) { + RunONNXModelFunc runONNXModel = + (RunONNXModelFunc)dlsym(libHandle, "runONNXModel"); + std::vector model_preds = runONNXModel( + onnxRunner, input_string, input_int64, input_float, batch_size); + if (model_preds.size() <= 0) { + outs() << "error: llvm-bolt model prediction result cannot be empty.\n"; + exit(1); + } + float pred = model_preds[0]; + return pred; + } + return -1.0; + } + + /// Return the annotating threshold for the model prediction. + void setThreshold(float annotate_threshold) { + threshold = annotate_threshold; + } + protected: + /// The onnxruntime model pointer read from the input model path. + void *onnxRunner; + + /// The library handle of the ai4compiler framwork. + void *libHandle; + + /// The library handle of the onnxruntime. + void *handleOnnxRuntime; + + /// The annotating threshold for the model prediction. + float threshold; + + /// Return the annotating threshold for the model prediction. + float getThreshold() const { return threshold; } + + /// The counting value of the total modified BB-count number. + uint64_t modified_BB_total = 0; + + /// Add the total modified BB-count number by the BB modifiied number within + /// the funciton. + void addModifiedBBTotal(uint64_t &value) { modified_BB_total += value; } + + /// Return the counting value of the total modified BB-count number. + uint64_t getModifiedBBTotal() const { return modified_BB_total; } + /// Read profile information available for the function. void readProfile(BinaryFunction &BF); diff --git a/bolt/lib/Core/BinaryBasicBlockFeature.cpp b/bolt/lib/Core/BinaryBasicBlockFeature.cpp new file mode 100644 index 000000000..e1a2a3dd8 --- /dev/null +++ b/bolt/lib/Core/BinaryBasicBlockFeature.cpp @@ -0,0 +1,21 @@ +//===- bolt/Core/BinaryBasicBlockFeature.cpp - Low-level basic block +//-------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the BinaryBasicBlock class. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryBasicBlockFeature.h" + +#define DEBUG_TYPE "bolt" + +namespace llvm { +namespace bolt {} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt index a4612fb93..f93147d39 100644 --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -12,6 +12,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_library(LLVMBOLTCore BinaryBasicBlock.cpp + BinaryBasicBlockFeature.cpp BinaryContext.cpp BinaryData.cpp BinaryEmitter.cpp diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index b8bbe59a6..e9ccea17c 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -13,6 +13,7 @@ add_llvm_library(LLVMBOLTPasses DataflowInfoManager.cpp FrameAnalysis.cpp FrameOptimizer.cpp + FeatureMiner.cpp FixRelaxationPass.cpp FixRISCVCallsPass.cpp HFSort.cpp @@ -41,6 +42,7 @@ add_llvm_library(LLVMBOLTPasses StackAvailableExpressions.cpp StackPointerTracking.cpp StackReachingUses.cpp + StaticBranchInfo.cpp StokeInfo.cpp TailDuplication.cpp ThreeWayBranch.cpp diff --git a/bolt/lib/Passes/FeatureMiner.cpp b/bolt/lib/Passes/FeatureMiner.cpp new file mode 100644 index 000000000..d93aef648 --- /dev/null +++ b/bolt/lib/Passes/FeatureMiner.cpp @@ -0,0 +1,572 @@ +//===--- Passes/FeatureMiner.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// A very simple feature extractor based on Calder's paper +// Evidence-based static branch prediction using machine learning +// https://dl.acm.org/doi/10.1145/239912.239923 +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/DataflowInfoManager.h" +#include "bolt/Passes/FeatureMiner.h" +#include "bolt/Passes/StaticBranchInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" + +#undef DEBUG_TYPE +#define DEBUG_TYPE "bolt-feature-miner" + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt BlockCorrection; + +} // namespace opts + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +int8_t FeatureMiner::getProcedureType(BinaryFunction &Function, + BinaryContext &BC) { + int8_t ProcedureType = 1; + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) { + ProcedureType = 0; // non-leaf type + if (const auto *CalleeSymbol = BC.MIB->getTargetSymbol(Inst)) { + const auto *Callee = BC.getFunctionForSymbol(CalleeSymbol); + if (Callee && + Callee->getFunctionNumber() == Function.getFunctionNumber()) { + return 2; // call self type + } + } + } + } + } + return ProcedureType; // leaf type +} + +void FeatureMiner::addSuccessorInfo(BFIPtr const &BFI, BinaryFunction &Function, + BinaryContext &BC, BinaryBasicBlock &BB, + bool SuccType) { + + BinaryBasicBlock *Successor = BB.getConditionalSuccessor(SuccType); + + if (!Successor) + return; + + unsigned NumCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) { + ++NumCalls; + } + } + + BBIPtr SuccBBInfo = std::make_unique(); + + // Check if the successor basic block is a loop header and store it. + SuccBBInfo->LoopHeader = SBI->isLoopHeader(Successor); + + SuccBBInfo->BasicBlockSize = Successor->size(); + + // Check if the edge getting to the successor basic block is a loop + // exit edge and store it. + SuccBBInfo->Exit = SBI->isExitEdge(&BB, Successor); + + // Check if the edge getting to the successor basic block is a loop + // back edge and store it. + SuccBBInfo->Backedge = SBI->isBackEdge(&BB, Successor); + + MCInst *SuccInst = Successor->getTerminatorBefore(nullptr); + + // Store information about the branch type ending sucessor basic block + SuccBBInfo->EndOpcode = (SuccInst && BC.MIA->isBranch(*SuccInst)) + ? SuccInst->getOpcode() + : 0; // 0 = NOTHING + + // Check if the successor basic block contains + // a procedure call and store it. + SuccBBInfo->Call = (NumCalls > 0) ? 1 // Contains a call instruction + : 0; // Does not contain a call instruction + + uint32_t Offset = BB.getEndOffset(); + + if (SuccType) { + BFI->TrueSuccessor = std::move(SuccBBInfo); + // Check if the taken branch is a forward + // or a backwards branch and store it + BFI->Direction = (Function.isForwardBranch(&BB, Successor) == true) + ? 1 // Forward branch + : 0; // Backwards branch + + auto OnlyBranchInfo = BB.getOnlyBranchInfo(); + BFI->Count = OnlyBranchInfo.Count; + + if (Offset) { + uint32_t TargetOffset = Successor->getInputOffset(); + uint32_t BranchOffset = Offset; + if (BranchOffset != UINT32_MAX && TargetOffset != UINT32_MAX) { + int64_t Delta = static_cast(TargetOffset) - + static_cast(BranchOffset); + BFI->DeltaTaken = std::abs(Delta); + } + } + } else { + if (BB.succ_size() == 2) { + auto FallthroughBranchInfo = BB.getFallthroughBranchInfo(); + BFI->FallthroughCount = FallthroughBranchInfo.Count; + } else { + auto OnlyBranchInfo = BB.getOnlyBranchInfo(); + BFI->FallthroughCount = OnlyBranchInfo.Count; + } + BFI->FalseSuccessor = std::move(SuccBBInfo); + } +} + +void FeatureMiner::extractFeatures(BinaryFunction &Function, + BinaryContext &BC) { + int8_t ProcedureType = getProcedureType(Function, BC); + auto Info = DataflowInfoManager(Function, nullptr, nullptr); + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + + bool Simple = Function.isSimple(); + const auto &Order = Function.dfs(); + std::string Function_name = Function.getPrintName(); + + for (auto *BBA : Order) { + + auto &BB = *BBA; + + BinaryBasicBlockFeature BBF = BB.getFeatures(); + + unsigned TotalLoops{0}; + unsigned LoopDepth{0}; + unsigned LoopNumBlocks{0}; + + bool LocalExitingBlock{false}; + bool LocalLatchBlock{false}; + bool LocalLoopHeader{false}; + + generateProfileFeatures(&BB, &BBF); + + BinaryLoop *Loop = LoopsInfo.getLoopFor(&BB); + if (Loop) { + SmallVector ExitingBlocks; + Loop->getExitingBlocks(ExitingBlocks); + + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + + SmallVector ExitEdges; + Loop->getExitEdges(ExitEdges); + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + TotalLoops = LoopsInfo.TotalLoops; + LoopDepth = Loop->getLoopDepth(); + LoopNumBlocks = Loop->getNumBlocks(); + LocalExitingBlock = Loop->isLoopExiting(&BB); + LocalLatchBlock = Loop->isLoopLatch(&BB); + LocalLoopHeader = ((Loop->getHeader() == (&BB)) ? 1 : 0); + } + + unsigned NumLoads{0}; + unsigned NumCalls{0}; + unsigned NumIndirectCalls{0}; + + for (auto &Inst : BB) { + if (BC.MIB->isLoad(Inst)) { + ++NumLoads; + } else if (BC.MIB->isCall(Inst)) { + ++NumCalls; + if (BC.MIB->isIndirectCall(Inst)) + ++NumIndirectCalls; + } + } + + int Index = -2; + bool LoopHeader = SBI->isLoopHeader(&BB); + + BFIPtr BFI = std::make_unique(); + + BFI->TotalLoops = TotalLoops; + BFI->LoopDepth = LoopDepth; + BFI->LoopNumBlocks = LoopNumBlocks; + BFI->LocalExitingBlock = LocalExitingBlock; + BFI->LocalLatchBlock = LocalLatchBlock; + BFI->LocalLoopHeader = LocalLoopHeader; + BFI->NumCalls = NumCalls; + BFI->BasicBlockSize = BB.size(); + BFI->NumBasicBlocks = Function.size(); + + BFI->NumLoads = NumLoads; + BFI->NumIndirectCalls = NumIndirectCalls; + BFI->LoopHeader = LoopHeader; + BFI->ProcedureType = ProcedureType; + + // Adding taken successor info. + addSuccessorInfo(BFI, Function, BC, BB, true); + // Adding fall through successor info. + addSuccessorInfo(BFI, Function, BC, BB, false); + + MCInst ConditionalInst; + bool hasConditionalBranch = false; + MCInst UnconditionalInst; + bool hasUnconditionalBranch = false; + + for (auto &Inst : BB) { + ++Index; + if (!BC.MIA->isConditionalBranch(Inst) && + !BC.MIA->isUnconditionalBranch(Inst)) + continue; + + generateInstFeatures(BC, BB, BFI, Index); + + if (BC.MIA->isConditionalBranch(Inst)) { + ConditionalInst = Inst; + hasConditionalBranch = true; + } + + if (BC.MIA->isUnconditionalBranch(Inst)) { + UnconditionalInst = Inst; + hasUnconditionalBranch = true; + } + } + + if (hasConditionalBranch) { + BFI->Opcode = ConditionalInst.getOpcode(); + + } else { + if (hasUnconditionalBranch) { + BFI->Opcode = UnconditionalInst.getOpcode(); + + } else { + auto Inst = BB.getLastNonPseudoInstr(); + BFI->Opcode = Inst->getOpcode(); + generateInstFeatures(BC, BB, BFI, Index); + } + } + + auto &FalseSuccessor = BFI->FalseSuccessor; + auto &TrueSuccessor = BFI->TrueSuccessor; + + int16_t ProcedureType = (BFI->ProcedureType.has_value()) + ? static_cast(*(BFI->ProcedureType)) + : -1; + + int64_t Count = + (BFI->Count.has_value()) ? static_cast(*(BFI->Count)) : -1; + + int64_t FallthroughCount = + (BFI->FallthroughCount.has_value()) + ? static_cast(*(BFI->FallthroughCount)) + : -1; + + int16_t LoopHeaderValid = (BFI->LoopHeader.has_value()) + ? static_cast(*(BFI->LoopHeader)) + : -1; + + int64_t TotalLoopsValid = (BFI->TotalLoops.has_value()) + ? static_cast(*(BFI->TotalLoops)) + : -1; + int64_t LoopDepthValid = (BFI->LoopDepth.has_value()) + ? static_cast(*(BFI->LoopDepth)) + : -1; + int64_t LoopNumBlocksValid = + (BFI->LoopNumBlocks.has_value()) + ? static_cast(*(BFI->LoopNumBlocks)) + : -1; + int64_t LocalExitingBlockValid = + (BFI->LocalExitingBlock.has_value()) + ? static_cast(*(BFI->LocalExitingBlock)) + : -1; + + int64_t LocalLatchBlockValid = + (BFI->LocalLatchBlock.has_value()) + ? static_cast(*(BFI->LocalLatchBlock)) + : -1; + + int64_t LocalLoopHeaderValid = + (BFI->LocalLoopHeader.has_value()) + ? static_cast(*(BFI->LocalLoopHeader)) + : -1; + + int32_t CmpOpcode = (BFI->CmpOpcode.has_value()) + ? static_cast(*(BFI->CmpOpcode)) + : -1; + + int64_t OperandRAType = (BFI->OperandRAType.has_value()) + ? static_cast(*(BFI->OperandRAType)) + : 10; + + int64_t OperandRBType = (BFI->OperandRBType.has_value()) + ? static_cast(*(BFI->OperandRBType)) + : 10; + int16_t Direction = (BFI->Direction.has_value()) + ? static_cast(*(BFI->Direction)) + : -1; + + int64_t DeltaTaken = (BFI->DeltaTaken.has_value()) + ? static_cast(*(BFI->DeltaTaken)) + : -1; + + int64_t NumLoadsValid = (BFI->NumLoads.has_value()) + ? static_cast(*(BFI->NumLoads)) + : -1; + + int64_t BasicBlockSize = (BFI->BasicBlockSize.has_value()) + ? static_cast(*(BFI->BasicBlockSize)) + : -1; + + int64_t NumBasicBlocks = (BFI->NumBasicBlocks.has_value()) + ? static_cast(*(BFI->NumBasicBlocks)) + : -1; + + int64_t NumCallsValid = (BFI->NumCalls.has_value()) + ? static_cast(*(BFI->NumCalls)) + : -1; + + int64_t NumIndirectCallsValid = + (BFI->NumIndirectCalls.has_value()) + ? static_cast(*(BFI->NumIndirectCalls)) + : -1; + + int64_t HasIndirectCalls = (NumIndirectCallsValid > 0) ? 1 : 0; + + int32_t Opcode = + (BFI->Opcode.has_value()) ? static_cast(*(BFI->Opcode)) : -1; + + uint64_t fun_exec = Function.getExecutionCount(); + fun_exec = (fun_exec != UINT64_MAX) ? fun_exec : 0; + + BBF.setDirection(Direction); + BBF.setDeltaTaken(DeltaTaken); + BBF.setOpcode(Opcode); + BBF.setCmpOpcode(CmpOpcode); + BBF.setOperandRAType(OperandRAType); + BBF.setOperandRBType(OperandRBType); + BBF.setFunExec(fun_exec); + BBF.setTotalLoops(TotalLoopsValid); + BBF.setLoopDepth(LoopDepthValid); + BBF.setLoopNumBlocks(LoopNumBlocksValid); + BBF.setLocalExitingBlock(LocalExitingBlockValid); + BBF.setLocalLatchBlock(LocalLatchBlockValid); + BBF.setLocalLoopHeader(LocalLoopHeaderValid); + BBF.setNumCalls(NumCallsValid); + BBF.setBasicBlockSize(BasicBlockSize); + BBF.setNumBasicBlocks(NumBasicBlocks); + BBF.setNumLoads(NumLoadsValid); + BBF.setHasIndirectCalls(HasIndirectCalls); + BBF.setLoopHeader(LoopHeaderValid); + BBF.setProcedureType(ProcedureType); + BBF.setCount(Count); + BBF.setFallthroughCount(FallthroughCount); + + generateSuccessorFeatures(TrueSuccessor, &BBF); + generateSuccessorFeatures(FalseSuccessor, &BBF); + + FalseSuccessor.reset(); + TrueSuccessor.reset(); + + BBF.setInferenceFeatures(); + BB.setFeatures(BBF); + + BFI.reset(); + } +} + +void FeatureMiner::generateInstFeatures(BinaryContext &BC, BinaryBasicBlock &BB, + BFIPtr const &BFI, int Index) { + + // Holds the branch opcode info. + + BFI->CmpOpcode = 0; + if (Index > -1) { + auto Cmp = BB.begin() + Index; + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + auto getOperandType = [&](const MCOperand &Operand) -> int32_t { + if (Operand.isReg()) + return 0; + else if (Operand.isImm()) + return 1; + else if (Operand.isSFPImm()) + return 2; + else if (Operand.isExpr()) + return 3; + else + return -1; + }; + + const auto InstInfo = BC.MII->get((*Cmp).getOpcode()); + unsigned NumDefs = InstInfo.getNumDefs(); + int32_t NumPrimeOperands = MCPlus::getNumPrimeOperands(*Cmp) - NumDefs; + switch (NumPrimeOperands) { + case 6: { + int32_t RBType = getOperandType((*Cmp).getOperand(NumDefs)); + int32_t RAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + + if (RBType == 0 && RAType == 0) { + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else if (RBType == 0 && (RAType == 1 || RAType == 2)) { + RAType = getOperandType((*Cmp).getOperand(NumPrimeOperands - 1)); + + if (RAType != 1 && RAType != 2) { + RAType = -1; + } + + BFI->OperandRBType = RBType; + BFI->OperandRAType = RAType; + } else { + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + } + break; + } + case 2: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 1)); + break; + case 3: + BFI->OperandRBType = getOperandType((*Cmp).getOperand(NumDefs)); + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs + 2)); + break; + case 1: + BFI->OperandRAType = getOperandType((*Cmp).getOperand(NumDefs)); + break; + default: + BFI->OperandRAType = -1; + BFI->OperandRBType = -1; + break; + } + + } else { + Index -= 1; + for (int Idx = Index; Idx > -1; Idx--) { + auto Cmp = BB.begin() + Idx; + if (BC.MII->get((*Cmp).getOpcode()).isCompare()) { + // Holding the branch comparison opcode info. + BFI->CmpOpcode = (*Cmp).getOpcode(); + break; + } + } + } + } +} + +void FeatureMiner::generateSuccessorFeatures(BBIPtr &Successor, + BinaryBasicBlockFeature *BBF) { + + int16_t LoopHeader = (Successor->LoopHeader.has_value()) + ? static_cast(*(Successor->LoopHeader)) + : -1; + + int16_t Backedge = (Successor->Backedge.has_value()) + ? static_cast(*(Successor->Backedge)) + : -1; + + int16_t Exit = (Successor->Exit.has_value()) + ? static_cast(*(Successor->Exit)) + : -1; + + int16_t Call = (Successor->Call.has_value()) + ? static_cast(*(Successor->Call)) + : -1; + + int32_t EndOpcode = (Successor->EndOpcode.has_value()) + ? static_cast(*(Successor->EndOpcode)) + : -1; + + int64_t BasicBlockSize = + (Successor->BasicBlockSize.has_value()) + ? static_cast(*(Successor->BasicBlockSize)) + : -1; + + BBF->setEndOpcodeVec(EndOpcode); + BBF->setLoopHeaderVec(LoopHeader); + BBF->setBackedgeVec(Backedge); + BBF->setExitVec(Exit); + BBF->setCallVec(Call); + BBF->setBasicBlockSizeVec(BasicBlockSize); +} + +void FeatureMiner::runOnFunctions(BinaryContext &BC) {} + +void FeatureMiner::inferenceFeatures(BinaryFunction &Function) { + + SBI = std::make_unique(); + + if (Function.empty()) + return; + + if (!Function.isLoopFree()) { + const BinaryLoopInfo &LoopsInfo = Function.getLoopInfo(); + SBI->findLoopEdgesInfo(LoopsInfo); + } + + BinaryContext &BC = Function.getBinaryContext(); + extractFeatures(Function, BC); + + SBI->clear(); +} + +void FeatureMiner::generateProfileFeatures(BinaryBasicBlock *BB, + BinaryBasicBlockFeature *BBF) { + int32_t parentChildNum, parentCount, childParentNum, childCount; + + if (BB->getParentSet().size() == 0) { + parentChildNum = -1; + parentCount = -1; + } else { + parentChildNum = std::numeric_limits::max(); + parentCount = 0; + for (BinaryBasicBlock *parent : BB->getParentSet()) { + if (parent->getChildrenSet().size() < parentChildNum) { + parentChildNum = parent->getChildrenSet().size(); + parentCount = parent->getExecutionCount(); + } else if (parent->getChildrenSet().size() == parentChildNum && + parent->getExecutionCount() > parentCount) { + parentCount = parent->getExecutionCount(); + } + } + } + + if (BB->getChildrenSet().size() == 0) { + childParentNum = -1; + childCount = -1; + } else { + childParentNum = std::numeric_limits::max(); + childCount = 0; + for (BinaryBasicBlock *child : BB->getChildrenSet()) { + if (child->getParentSet().size() < childParentNum) { + childParentNum = child->getParentSet().size(); + childCount = child->getExecutionCount(); + } else if (child->getParentSet().size() == childParentNum && + child->getExecutionCount() > childCount) { + childCount = child->getExecutionCount(); + } + } + } + + int64_t parentCountCatch = parentCount > 0 ? 1 : 0; + int64_t childCountCatch = childCount > 0 ? 1 : 0; + + BBF->setParentChildNum(parentChildNum); + BBF->setParentCount(parentCountCatch); + BBF->setChildParentNum(childParentNum); + BBF->setChildCount(childCountCatch); +} + +} // namespace bolt +} // namespace llvm \ No newline at end of file diff --git a/bolt/lib/Passes/StaticBranchInfo.cpp b/bolt/lib/Passes/StaticBranchInfo.cpp new file mode 100644 index 000000000..585dbcae2 --- /dev/null +++ b/bolt/lib/Passes/StaticBranchInfo.cpp @@ -0,0 +1,143 @@ +//===------ Passes/StaticBranchInfo.cpp -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an auxiliary class to the feature miner, static branch probability +// and frequency passes. This class is responsible for finding loop info (loop +// back edges, loop exit edges and loop headers) of a function. It also finds +// basic block info (if a block contains store and call instructions) and if a +// basic block contains a call to the exit. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryLoop.h" +#include "bolt/Passes/StaticBranchInfo.h" + +namespace llvm { +namespace bolt { + +void StaticBranchInfo::findLoopEdgesInfo(const BinaryLoopInfo &LoopsInfo) { + // Traverse discovered loops + std::stack Loops; + for (BinaryLoop *BL : LoopsInfo) + Loops.push(BL); + + while (!Loops.empty()) { + BinaryLoop *Loop = Loops.top(); + Loops.pop(); + BinaryBasicBlock *LoopHeader = Loop->getHeader(); + LoopHeaders.insert(LoopHeader); + + // Add nested loops in the stack. + for (BinaryLoop::iterator I = Loop->begin(), E = Loop->end(); I != E; ++I) { + Loops.push(*I); + } + + SmallVector Latches; + Loop->getLoopLatches(Latches); + + // Find back edges. + for (BinaryBasicBlock *Latch : Latches) { + for (BinaryBasicBlock *Succ : Latch->successors()) { + if (Succ == LoopHeader) { + Edge CFGEdge = std::make_pair(Latch->getLabel(), Succ->getLabel()); + BackEdges.insert(CFGEdge); + } + } + } + + // Find exit edges. + SmallVector AuxExitEdges; + Loop->getExitEdges(AuxExitEdges); + for (BinaryLoop::Edge &Exit : AuxExitEdges) { + ExitEdges.insert(Exit); + } + } +} + +void StaticBranchInfo::findBasicBlockInfo(const BinaryFunction &Function, + BinaryContext &BC) { + for (auto &BB : Function) { + for (auto &Inst : BB) { + if (BC.MIB->isCall(Inst)) + CallSet.insert(&BB); + else if (BC.MIB->isStore(Inst)) + StoreSet.insert(&BB); + } + } +} + +bool StaticBranchInfo::isBackEdge(const Edge &CFGEdge) const { + return BackEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isBackEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { + const Edge CFGEdge = std::make_pair(SrcBB->getLabel(), DstBB->getLabel()); + return isBackEdge(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryLoop::Edge &CFGEdge) const { + return ExitEdges.count(CFGEdge); +} + +bool StaticBranchInfo::isExitEdge(const BinaryBasicBlock *SrcBB, + const BinaryBasicBlock *DstBB) const { + const BinaryLoop::Edge CFGEdge = + std::make_pair(const_cast(SrcBB), + const_cast(DstBB)); + return isExitEdge(CFGEdge); +} + +bool StaticBranchInfo::isLoopHeader(const BinaryBasicBlock *BB) const { + return LoopHeaders.count(BB); +} + +bool StaticBranchInfo::hasCallInst(const BinaryBasicBlock *BB) const { + return CallSet.count(BB); +} + +bool StaticBranchInfo::hasStoreInst(const BinaryBasicBlock *BB) const { + return StoreSet.count(BB); +} + +unsigned StaticBranchInfo::countBackEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const Edge CFGEdge = std::make_pair(BB->getLabel(), SuccBB->getLabel()); + if (BackEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +unsigned StaticBranchInfo::countExitEdges(BinaryBasicBlock *BB) const { + unsigned CountEdges = 0; + + for (BinaryBasicBlock *SuccBB : BB->successors()) { + const BinaryLoop::Edge CFGEdge = std::make_pair(BB, SuccBB); + if (ExitEdges.count(CFGEdge)) + ++CountEdges; + } + + return CountEdges; +} + +void StaticBranchInfo::clear() { + LoopHeaders.clear(); + BackEdges.clear(); + ExitEdges.clear(); + CallSet.clear(); + StoreSet.clear(); +} + +} // namespace bolt +} // namespace llvm diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index 0e12e8cb3..447b71fe7 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -12,13 +12,16 @@ //===----------------------------------------------------------------------===// #include "bolt/Profile/DataReader.h" +#include "bolt/Passes/FeatureMiner.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Passes/MCF.h" #include "bolt/Utils/Utils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" +#include #include +#include #undef DEBUG_TYPE #define DEBUG_TYPE "bolt-prof" @@ -26,15 +29,23 @@ using namespace llvm; namespace opts { - +extern cl::opt BlockCorrection; extern cl::OptionCategory BoltCategory; extern llvm::cl::opt Verbosity; -static cl::opt -DumpData("dump-data", - cl::desc("dump parsed bolt data for debugging"), - cl::Hidden, - cl::cat(BoltCategory)); +static cl::opt InputModelFilename("model-path", + cl::desc(""), + cl::Optional, + cl::cat(BoltCategory)); + +static cl::opt AnnotateThreshold( + "annotate-threshold", + cl::desc(""), + cl::init(0.85f), cl::Optional, cl::cat(BoltCategory)); + +static cl::opt DumpData("dump-data", + cl::desc("dump parsed bolt data for debugging"), + cl::Hidden, cl::cat(BoltCategory)); } // namespace opts @@ -311,6 +322,17 @@ Error DataReader::readProfilePreCFG(BinaryContext &BC) { } Error DataReader::readProfile(BinaryContext &BC) { + + if (opts::BlockCorrection) { + if (opts::InputModelFilename.empty()) { + outs() << "error: llvm-bolt expected -model-path= option.\n"; + exit(1); + } else { + DataReader::initializeONNXRunner(opts::InputModelFilename); + DataReader::setThreshold(opts::AnnotateThreshold); + } + } + for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; readProfile(Function); @@ -324,6 +346,12 @@ Error DataReader::readProfile(BinaryContext &BC) { } BC.setNumUnusedProfiledObjects(NumUnused); + if (opts::BlockCorrection) { + uint64_t modified_total = DataReader::getModifiedBBTotal(); + outs() << "BOLT-INFO: total modified CFG BB count number is " + << modified_total << ".\n"; + } + return Error::success(); } @@ -555,6 +583,75 @@ float DataReader::evaluateProfileData(BinaryFunction &BF, return MatchRatio; } +void generateChildrenParentCount(BinaryBasicBlock *BB) { + typedef GraphTraits GraphT; + + for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), + E = GraphT::child_end(BB); + CI != E; ++CI) { + typename GraphT::NodeRef Child = *CI; + BB->insertChildrenSet(Child); + Child->insertParentSet(BB); + } +} + +void generateChildrenParentCount(BinaryFunction &BF) { + for (BinaryBasicBlock &BB : BF) { + generateChildrenParentCount(&BB); + } +} + +uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryBasicBlock *BB, + float threshold) { + uint64_t modified = 0; + if (BB->getExecutionCount() != 0) { + return modified; + } + + std::vector input_string; + std::vector input_int64; + std::vector input_float; + + BinaryBasicBlockFeature BBF = BB->getFeatures(); + input_int64 = BBF.getInferenceFeatures(); + + if (input_int64.empty()) { + return 0; + } + + float model_pred = + dataReaderRef->ONNXInference(input_string, input_int64, input_float); + if (model_pred >= threshold) { + uint64_t min_neighbor_count = std::numeric_limits::max(); + for (BinaryBasicBlock *parent : BB->getParentSet()) { + if (parent->getExecutionCount() > 0 && + parent->getExecutionCount() < min_neighbor_count) + min_neighbor_count = parent->getExecutionCount(); + } + for (BinaryBasicBlock *child : BB->getChildrenSet()) { + if (child->getExecutionCount() > 0 && + child->getExecutionCount() < min_neighbor_count) + min_neighbor_count = child->getExecutionCount(); + } + if (min_neighbor_count != std::numeric_limits::max()) { + BB->setExecutionCount(min_neighbor_count); + modified = 1; + } + } + return modified; +} + +uint64_t estimateBBCount(DataReader *dataReaderRef, BinaryFunction &BF, + float threshold) { + uint64_t modified_total_func = 0; + const auto &Order = BF.dfs(); + for (auto *BBA : Order) { + auto &BB = *BBA; + modified_total_func += estimateBBCount(dataReaderRef, &BB, threshold); + } + return modified_total_func; +} + void DataReader::readSampleData(BinaryFunction &BF) { FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); if (!SampleDataOrErr) @@ -600,6 +697,17 @@ void DataReader::readSampleData(BinaryFunction &BF) { BF.ExecutionCount = TotalEntryCount; + if (opts::BlockCorrection) { + generateChildrenParentCount(BF); + std::unique_ptr FM = + std::make_unique(opts::BlockCorrection); + FM->inferenceFeatures(BF); + + float threshold = DataReader::getThreshold(); + uint64_t modified_total_func = estimateBBCount(this, BF, threshold); + DataReader::addModifiedBBTotal(modified_total_func); + } + estimateEdgeCounts(BF); } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index c6ea0b009..4191e18bd 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -106,6 +106,12 @@ cl::opt DumpDotAll( "enable '-print-loops' for color-coded blocks"), cl::Hidden, cl::cat(BoltCategory)); +cl::opt BlockCorrection( + "block-correction", + cl::desc("capture features useful for ML model to inference the count on the binary basic block" + " and correct them on CFG."), + cl::ZeroOrMore, cl::cat(BoltOptCategory)); + static cl::list ForceFunctionNames("funcs", cl::CommaSeparated, -- 2.39.3 (Apple Git-146)