llvm-bolt/0011-support-D-FOT-addrs-data-parsing-for-optimized-binary.patch

227 lines
7.5 KiB
Diff
Raw Normal View History

From 525a2d44443547c0349198df18286f594d62d557 Mon Sep 17 00:00:00 2001
From: rfwang07 <wangrufeng5@huawei.com>
Date: Tue, 19 Nov 2024 09:48:40 +0800
Subject: [PATCH] support D-FOT addrs data parsing for optimized binary
---
bolt/include/bolt/Profile/DataAggregator.h | 31 ++++++++
bolt/lib/Profile/DataAggregator.cpp | 86 +++++++++++++++++++++-
2 files changed, 113 insertions(+), 4 deletions(-)
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index cc237a6..d352f1b 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -102,6 +102,12 @@ private:
Type EntryType;
};
+ /// Used for parsing specific libkperf input files.
+ struct LibkperfDataEntry {
+ uint64_t Addr;
+ uint64_t Count;
+ };
+
struct Trace {
uint64_t From;
uint64_t To;
@@ -300,6 +306,9 @@ private:
/// Parse pre-aggregated LBR samples created by an external tool
ErrorOr<AggregatedLBREntry> parseAggregatedLBREntry();
+ /// Parse libkperf samples created by D-FOT
+ ErrorOr<LibkperfDataEntry> parseLibkperfDataEntry();
+
/// Parse either buildid:offset or just offset, representing a location in the
/// binary. Used exclusevely for pre-aggregated LBR samples.
ErrorOr<Location> parseLocationOrOffset();
@@ -417,10 +426,32 @@ private:
/// B 4b196f 4b19e0 2 0
void parsePreAggregated();
+ /// Coordinate reading and parsing of libkperf file
+ /// The regular perf2bolt aggregation job is to read perf output directly.
+ /// But in the oeaware framework, sampling is done by libkperf.
+ /// For data collected by sampling the BOLT-optimized binary,
+ /// oeaware can export addrs and counts.
+ /// In perf2bolt, with the help of the BAT section,
+ /// this data is converted to profile that is usable for the original binary.
+ ///
+ /// File format syntax:
+ /// - first line: <event type>
+ /// - the other lines: <addr> <count>
+ ///
+ /// Example:
+ /// cycles
+ /// 40f544 1
+ /// 40f750 2
+ /// 40f810 53
+ void parseLibkperfFile();
+
/// Parse the full output of pre-aggregated LBR samples generated by
/// an external tool.
std::error_code parsePreAggregatedLBRSamples();
+ /// Parse the libkperf samples
+ std::error_code parseLibkperfSamples();
+
/// Process parsed pre-aggregated data.
void processPreAggregated();
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index 24dbe34..509e7c9 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -85,6 +85,11 @@ cl::opt<bool> ReadPreAggregated(
"pa", cl::desc("skip perf and read data from a pre-aggregated file format"),
cl::cat(AggregatorCategory));
+cl::opt<bool> ReadLibkperfFile(
+ "libkperf", cl::desc("skip perf and read data from a libkperf file format, "
+ "only for continuous optimizing with BAT"),
+ cl::cat(AggregatorCategory));
+
static cl::opt<bool>
TimeAggregator("time-aggr",
cl::desc("time BOLT aggregator"),
@@ -157,8 +162,8 @@ void DataAggregator::findPerfExecutable() {
void DataAggregator::start() {
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
- // Don't launch perf for pre-aggregated files
- if (opts::ReadPreAggregated)
+ // Don't launch perf for pre-aggregated files and libkperf files
+ if (opts::ReadPreAggregated || opts::ReadLibkperfFile)
return;
findPerfExecutable();
@@ -193,7 +198,7 @@ void DataAggregator::start() {
}
void DataAggregator::abort() {
- if (opts::ReadPreAggregated)
+ if (opts::ReadPreAggregated || opts::ReadLibkperfFile)
return;
std::string Error;
@@ -313,6 +318,8 @@ void DataAggregator::processFileBuildID(StringRef FileBuildID) {
bool DataAggregator::checkPerfDataMagic(StringRef FileName) {
if (opts::ReadPreAggregated)
return true;
+ if (opts::ReadLibkperfFile)
+ return true;
Expected<sys::fs::file_t> FD = sys::fs::openNativeFileForRead(FileName);
if (!FD) {
@@ -359,6 +366,27 @@ void DataAggregator::parsePreAggregated() {
}
}
+void DataAggregator::parseLibkperfFile() {
+ std::string Error;
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = MB.getError()) {
+ errs() << "PERF2BOLT-ERROR: cannot open " << Filename << ": "
+ << EC.message() << "\n";
+ exit(1);
+ }
+
+ FileBuf = std::move(*MB);
+ ParsingBuf = FileBuf->getBuffer();
+ Col = 0;
+ Line = 0;
+ if (parseLibkperfSamples()) {
+ errs() << "PERF2BOLT: failed to parse libkperf samples\n";
+ exit(1);
+ }
+}
+
std::error_code DataAggregator::writeAutoFDOData(StringRef OutputFilename) {
outs() << "PERF2BOLT: writing data for autofdo tools...\n";
NamedRegionTimer T("writeAutoFDO", "Processing branch events", TimerGroupName,
@@ -502,6 +530,11 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
return Error::success();
}
+ if (opts::ReadLibkperfFile) {
+ parseLibkperfFile();
+ return Error::success();
+ }
+
if (std::optional<StringRef> FileBuildID = BC.getFileBuildID()) {
outs() << "BOLT-INFO: binary build-id is: " << *FileBuildID << "\n";
processFileBuildID(*FileBuildID);
@@ -608,7 +641,7 @@ bool DataAggregator::mayHaveProfileData(const BinaryFunction &Function) {
void DataAggregator::processProfile(BinaryContext &BC) {
if (opts::ReadPreAggregated)
processPreAggregated();
- else if (opts::BasicAggregation)
+ else if (opts::BasicAggregation || opts::ReadLibkperfFile)
processBasicEvents();
else
processBranchEvents();
@@ -1206,6 +1239,28 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
return Location(true, BuildID.get(), Offset.get());
}
+ErrorOr<DataAggregator::LibkperfDataEntry>
+DataAggregator::parseLibkperfDataEntry() {
+ // <hex addr> <count>
+ while (checkAndConsumeFS()) {
+ }
+ ErrorOr<uint64_t> Addr = parseHexField(FieldSeparator);
+ if (std::error_code EC = Addr.getError())
+ return EC;
+ while (checkAndConsumeFS()) {
+ }
+ ErrorOr<uint64_t> Count = parseNumberField(FieldSeparator, true);
+ if (std::error_code EC = Count.getError())
+ return EC;
+
+ if (!checkAndConsumeNewLine()) {
+ reportError("expected end of line");
+ return make_error_code(llvm::errc::io_error);
+ }
+
+ return LibkperfDataEntry{Addr.get(), Count.get()};
+}
+
ErrorOr<DataAggregator::AggregatedLBREntry>
DataAggregator::parseAggregatedLBREntry() {
while (checkAndConsumeFS()) {
@@ -1712,6 +1767,29 @@ void DataAggregator::processMemEvents() {
}
}
+std::error_code DataAggregator::parseLibkperfSamples() {
+ outs() << "PERF2BOLT: parsing libkperf data...\n";
+ NamedRegionTimer T("parseLibkperfData", "Parsing libkperf data",
+ TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
+ bool FirstLine = true;
+ while (hasData()) {
+ if (FirstLine) {
+ ErrorOr<StringRef> Event = parseString('\n');
+ if (std::error_code EC = Event.getError())
+ return EC;
+ EventNames.insert(Event.get());
+ FirstLine = false;
+ }
+ ErrorOr<LibkperfDataEntry> KperfEntry = parseLibkperfDataEntry();
+ if (std::error_code EC = KperfEntry.getError())
+ return EC;
+
+ BasicSamples[KperfEntry->Addr] += KperfEntry->Count;
+ }
+
+ return std::error_code();
+}
+
std::error_code DataAggregator::parsePreAggregatedLBRSamples() {
outs() << "PERF2BOLT: parsing pre-aggregated profile...\n";
NamedRegionTimer T("parseAggregated", "Parsing aggregated branch events",
--
2.39.5 (Apple Git-154)