Skip to content

Commit c00c62c

Browse files
authored
[BOLT] Add pseudo probe inline tree to YAML profile
Add probe inline tree information to YAML profile, at function level: - function GUID, - checksum, - parent node id, - call site in the parent. This information is used for pseudo probe block matching (#99891). The encoding adds/changes probe information in multiple levels of YAML profile: - BinaryProfile: add pseudo_probe_desc with GUIDs and Hashes, which permits deduplication of data: - many GUIDs are duplicate as the same callee is commonly inlined into multiple callers, - hashes are also very repetitive, especially for functions with low block counts. - FunctionProfile: add inline tree (see above). Top-level function is included as root of function inline tree, which makes guid and pseudo_probe_desc_hash fields redundant. - BlockProfile: densely-encoded block probe information: - probes reference their containing inline tree node, - separate lists for block, call, indirect call probes, - block probe encoding is specialized: ids are encoded as bitset in uint64_t. If only block probe with id=1 is present, it's encoded as implicit entry (id=0, omitted). - inline tree nodes with identical probes share probe description where node indices are combined into a list. On top of #107970, profile with new probe encoding has the following characteristics (profile for a large binary): - Profile without probe information: 33MB, 3.8MB compressed (baseline). - Profile with inline tree information: 92MB, 14MB compressed. Profile processing time (YAML parsing, inference, attaching steps): - profile without pseudo probes: 5s, - profile with pseudo probes, without pseudo probe matching: 11s, - with pseudo probe matching: 12.5s. Test Plan: updated pseudoprobe-decoding-inline.test Reviewers: wlei-llvm, ayermolo, rafaelauler, dcci, maksfb Reviewed By: wlei-llvm, rafaelauler Pull Request: #107137
1 parent dee058f commit c00c62c

File tree

7 files changed

+366
-52
lines changed

7 files changed

+366
-52
lines changed

bolt/include/bolt/Profile/ProfileYAMLMapping.h

+65-16
Original file line numberDiff line numberDiff line change
@@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {
9595

9696
namespace bolt {
9797
struct PseudoProbeInfo {
98-
llvm::yaml::Hex64 GUID;
99-
uint64_t Index;
100-
uint8_t Type;
98+
uint32_t InlineTreeIndex = 0;
99+
uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
100+
std::vector<uint64_t> BlockProbes; // block probes with indices above 64
101+
std::vector<uint64_t> CallProbes;
102+
std::vector<uint64_t> IndCallProbes;
103+
std::vector<uint32_t> InlineTreeNodes;
101104

102105
bool operator==(const PseudoProbeInfo &Other) const {
103-
return GUID == Other.GUID && Index == Other.Index;
104-
}
105-
bool operator!=(const PseudoProbeInfo &Other) const {
106-
return !(*this == Other);
106+
return InlineTreeIndex == Other.InlineTreeIndex &&
107+
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
108+
IndCallProbes == Other.IndCallProbes;
107109
}
108110
};
109111
} // end namespace bolt
110112

111113
template <> struct MappingTraits<bolt::PseudoProbeInfo> {
112114
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
113-
YamlIO.mapRequired("guid", PI.GUID);
114-
YamlIO.mapRequired("id", PI.Index);
115-
YamlIO.mapRequired("type", PI.Type);
115+
YamlIO.mapOptional("blx", PI.BlockMask, 0);
116+
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
117+
YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
118+
YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
119+
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
120+
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
116121
}
117122

118123
static const bool flow = true;
@@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
158163
std::vector<bolt::CallSiteInfo>());
159164
YamlIO.mapOptional("succ", BBP.Successors,
160165
std::vector<bolt::SuccessorInfo>());
161-
YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
166+
YamlIO.mapOptional("probes", BBP.PseudoProbes,
162167
std::vector<bolt::PseudoProbeInfo>());
163168
}
164169
};
165170

171+
namespace bolt {
172+
struct InlineTreeNode {
173+
uint32_t ParentIndexDelta;
174+
uint32_t CallSiteProbe;
175+
// Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
176+
uint32_t GUIDIndex;
177+
bool operator==(const InlineTreeNode &) const { return false; }
178+
};
179+
} // end namespace bolt
180+
181+
template <> struct MappingTraits<bolt::InlineTreeNode> {
182+
static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) {
183+
YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX);
184+
YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
185+
YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
186+
}
187+
188+
static const bool flow = true;
189+
};
166190
} // end namespace yaml
167191
} // end namespace llvm
168192

169193
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
194+
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode)
170195

171196
namespace llvm {
172197
namespace yaml {
@@ -179,8 +204,7 @@ struct BinaryFunctionProfile {
179204
llvm::yaml::Hex64 Hash{0};
180205
uint64_t ExecCount{0};
181206
std::vector<BinaryBasicBlockProfile> Blocks;
182-
llvm::yaml::Hex64 GUID{0};
183-
llvm::yaml::Hex64 PseudoProbeDescHash{0};
207+
std::vector<InlineTreeNode> InlineTree;
184208
bool Used{false};
185209
};
186210
} // end namespace bolt
@@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
194218
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
195219
YamlIO.mapOptional("blocks", BFP.Blocks,
196220
std::vector<bolt::BinaryBasicBlockProfile>());
197-
YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
198-
YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
199-
(uint64_t)0);
221+
YamlIO.mapOptional("inline_tree", BFP.InlineTree,
222+
std::vector<bolt::InlineTreeNode>());
200223
}
201224
};
202225

@@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
246269
}
247270
};
248271

272+
namespace bolt {
273+
struct PseudoProbeDesc {
274+
std::vector<Hex64> GUID;
275+
std::vector<Hex64> Hash;
276+
std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash
277+
278+
bool operator==(const PseudoProbeDesc &Other) const {
279+
// Only treat empty Desc as equal
280+
return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
281+
Other.Hash.empty() && GUIDHashIdx.empty() &&
282+
Other.GUIDHashIdx.empty();
283+
}
284+
};
285+
} // end namespace bolt
286+
287+
template <> struct MappingTraits<bolt::PseudoProbeDesc> {
288+
static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
289+
YamlIO.mapRequired("gs", PD.GUID);
290+
YamlIO.mapRequired("gh", PD.GUIDHashIdx);
291+
YamlIO.mapRequired("hs", PD.Hash);
292+
}
293+
};
249294
} // end namespace yaml
250295
} // end namespace llvm
251296

252297
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
298+
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)
253299

254300
namespace llvm {
255301
namespace yaml {
@@ -258,13 +304,16 @@ namespace bolt {
258304
struct BinaryProfile {
259305
BinaryProfileHeader Header;
260306
std::vector<BinaryFunctionProfile> Functions;
307+
PseudoProbeDesc PseudoProbeDesc;
261308
};
262309
} // namespace bolt
263310

264311
template <> struct MappingTraits<bolt::BinaryProfile> {
265312
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
266313
YamlIO.mapRequired("header", BP.Header);
267314
YamlIO.mapRequired("functions", BP.Functions);
315+
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
316+
bolt::PseudoProbeDesc());
268317
}
269318
};
270319

bolt/include/bolt/Profile/YAMLProfileWriter.h

+51-1
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,27 @@ class YAMLProfileWriter {
3232
/// Save execution profile for that instance.
3333
std::error_code writeProfile(const RewriteInstance &RI);
3434

35+
using InlineTreeMapTy =
36+
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
37+
struct InlineTreeDesc {
38+
template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
39+
using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
40+
using GUIDNumMap = GUIDMapTy<uint32_t>;
41+
GUIDNodeMap TopLevelGUIDToInlineTree;
42+
GUIDNumMap GUIDIdxMap;
43+
GUIDNumMap HashIdxMap;
44+
};
45+
46+
static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
47+
convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
48+
const InlineTreeDesc &InlineTree, uint64_t GUID);
49+
50+
static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
51+
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);
52+
3553
static yaml::bolt::BinaryFunctionProfile
3654
convert(const BinaryFunction &BF, bool UseDFS,
55+
const InlineTreeDesc &InlineTree,
3756
const BoltAddressTranslation *BAT = nullptr);
3857

3958
/// Set CallSiteInfo destination fields from \p Symbol and return a target
@@ -42,8 +61,39 @@ class YAMLProfileWriter {
4261
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
4362
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
4463
uint32_t Offset = 0);
45-
};
4664

65+
private:
66+
struct InlineTreeNode {
67+
const MCDecodedPseudoProbeInlineTree *InlineTree;
68+
uint64_t GUID;
69+
uint64_t Hash;
70+
uint32_t ParentId;
71+
uint32_t InlineSite;
72+
};
73+
static std::vector<InlineTreeNode>
74+
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
75+
const MCDecodedPseudoProbeInlineTree &Root);
76+
77+
// 0 - block probe, 1 - indirect call, 2 - direct call
78+
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
79+
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
80+
static std::vector<yaml::bolt::PseudoProbeInfo>
81+
convertNodeProbes(NodeIdToProbes &NodeProbes);
82+
83+
public:
84+
template <typename T>
85+
static std::vector<yaml::bolt::PseudoProbeInfo>
86+
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
87+
NodeIdToProbes NodeProbes;
88+
for (const MCDecodedPseudoProbe &Probe : Probes) {
89+
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
90+
if (It == InlineTreeNodeId.end())
91+
continue;
92+
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
93+
}
94+
return convertNodeProbes(NodeProbes);
95+
}
96+
};
4797
} // namespace bolt
4898
} // namespace llvm
4999

bolt/lib/Profile/DataAggregator.cpp

+24-9
Original file line numberDiff line numberDiff line change
@@ -2321,6 +2321,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23212321
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
23222322
: BinaryFunction::PF_LBR;
23232323

2324+
// Add probe inline tree nodes.
2325+
YAMLProfileWriter::InlineTreeDesc InlineTree;
2326+
if (PseudoProbeDecoder)
2327+
std::tie(BP.PseudoProbeDesc, InlineTree) =
2328+
YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);
2329+
23242330
if (!opts::BasicAggregation) {
23252331
// Convert profile for functions not covered by BAT
23262332
for (auto &BFI : BC.getBinaryFunctions()) {
@@ -2329,8 +2335,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
23292335
continue;
23302336
if (BAT->isBATFunction(Function.getAddress()))
23312337
continue;
2332-
BP.Functions.emplace_back(
2333-
YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
2338+
BP.Functions.emplace_back(YAMLProfileWriter::convert(
2339+
Function, /*UseDFS=*/false, InlineTree, BAT));
23342340
}
23352341

23362342
for (const auto &KV : NamesToBranches) {
@@ -2403,16 +2409,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
24032409
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
24042410
}
24052411
if (PseudoProbeDecoder) {
2406-
if ((YamlBF.GUID = BF->getGUID())) {
2407-
const MCPseudoProbeFuncDesc *FuncDesc =
2408-
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
2409-
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
2412+
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
2413+
InlineTreeNodeId;
2414+
if (BF->getGUID()) {
2415+
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
2416+
YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
2417+
InlineTree, BF->getGUID());
24102418
}
24112419
// Fetch probes belonging to all fragments
24122420
const AddressProbesMap &ProbeMap =
24132421
PseudoProbeDecoder->getAddress2ProbesMap();
24142422
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
24152423
Fragments.insert(BF);
2424+
DenseMap<
2425+
uint32_t,
2426+
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
2427+
BlockProbes;
24162428
for (const BinaryFunction *F : Fragments) {
24172429
const uint64_t FuncAddr = F->getAddress();
24182430
for (const MCDecodedPseudoProbe &Probe :
@@ -2421,11 +2433,14 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
24212433
const uint32_t InputOffset = BAT->translate(
24222434
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
24232435
const unsigned BlockIndex = getBlock(InputOffset).second;
2424-
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2425-
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2426-
Probe.getType()});
2436+
BlockProbes[BlockIndex].emplace_back(Probe);
24272437
}
24282438
}
2439+
2440+
for (auto &[Block, Probes] : BlockProbes) {
2441+
YamlBF.Blocks[Block].PseudoProbes =
2442+
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
2443+
}
24292444
}
24302445
// Skip printing if there's no profile data
24312446
llvm::erase_if(

0 commit comments

Comments
 (0)