Skip to content

Commit 290b275

Browse files
committed
coverage: Build the global file table ahead of time
1 parent e961b09 commit 290b275

File tree

4 files changed

+69
-31
lines changed

4 files changed

+69
-31
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3563,6 +3563,7 @@ version = "0.0.0"
35633563
dependencies = [
35643564
"bitflags 1.3.2",
35653565
"cstr",
3566+
"itertools",
35663567
"libc",
35673568
"measureme",
35683569
"object",

compiler/rustc_codegen_llvm/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ test = false
99
[dependencies]
1010
bitflags = "1.0"
1111
cstr = "0.2"
12+
itertools = "0.10.5"
1213
libc = "0.2"
1314
measureme = "10.0.0"
1415
object = { version = "0.32.0", default-features = false, features = [

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use rustc_middle::mir::coverage::{
77
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
88
};
99
use rustc_middle::ty::Instance;
10+
use rustc_span::Symbol;
1011

1112
/// Holds all of the coverage mapping data associated with a function instance,
1213
/// collected during traversal of `Coverage` statements in the function's MIR.
@@ -162,7 +163,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
162163
}
163164

164165
pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
165-
FunctionCoverage::new(self)
166+
FunctionCoverage::from_collector(self)
166167
}
167168
}
168169

@@ -175,7 +176,7 @@ pub(crate) struct FunctionCoverage<'tcx> {
175176
}
176177

177178
impl<'tcx> FunctionCoverage<'tcx> {
178-
fn new(collector: FunctionCoverageCollector<'tcx>) -> Self {
179+
fn from_collector(collector: FunctionCoverageCollector<'tcx>) -> Self {
179180
let zero_expressions = collector.identify_zero_expressions();
180181
let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } =
181182
collector;
@@ -194,6 +195,11 @@ impl<'tcx> FunctionCoverage<'tcx> {
194195
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
195196
}
196197

198+
/// Returns an iterator over all filenames used by this function's mappings.
199+
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
200+
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
201+
}
202+
197203
/// Convert this function's coverage expression data into a form that can be
198204
/// passed through FFI to LLVM.
199205
pub(crate) fn counter_expressions(

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+59-29
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::coverageinfo::ffi::CounterMappingRegion;
44
use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
55
use crate::llvm;
66

7+
use itertools::Itertools as _;
78
use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
89
use rustc_data_structures::fx::FxIndexSet;
910
use rustc_hir::def::DefKind;
@@ -57,20 +58,26 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
5758
return;
5859
}
5960

60-
let mut global_file_table = GlobalFileTable::new(tcx);
61+
let function_coverage_entries = function_coverage_map
62+
.into_iter()
63+
.map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
64+
.collect::<Vec<_>>();
65+
66+
let all_file_names =
67+
function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names());
68+
let global_file_table = GlobalFileTable::new(tcx, all_file_names);
6169

6270
// Encode coverage mappings and generate function records
6371
let mut function_data = Vec::new();
64-
for (instance, function_coverage) in function_coverage_map {
65-
let function_coverage = function_coverage.into_finished();
72+
for (instance, function_coverage) in function_coverage_entries {
6673
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
6774

6875
let mangled_function_name = tcx.symbol_name(instance).name;
6976
let source_hash = function_coverage.source_hash();
7077
let is_used = function_coverage.is_used();
7178

7279
let coverage_mapping_buffer =
73-
encode_mappings_for_function(&mut global_file_table, &function_coverage);
80+
encode_mappings_for_function(&global_file_table, &function_coverage);
7481

7582
if coverage_mapping_buffer.is_empty() {
7683
if function_coverage.is_used() {
@@ -88,11 +95,11 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
8895
}
8996

9097
// Encode all filenames referenced by counters/expressions in this module
91-
let filenames_buffer = global_file_table.into_filenames_buffer();
98+
let filenames_buffer = global_file_table.filenames_buffer();
9299

93100
let filenames_size = filenames_buffer.len();
94-
let filenames_val = cx.const_bytes(&filenames_buffer);
95-
let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer);
101+
let filenames_val = cx.const_bytes(filenames_buffer);
102+
let filenames_ref = coverageinfo::hash_bytes(filenames_buffer);
96103

97104
// Generate the LLVM IR representation of the coverage map and store it in a well-known global
98105
let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
@@ -140,39 +147,62 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
140147
}
141148

142149
struct GlobalFileTable {
143-
global_file_table: FxIndexSet<Symbol>,
150+
/// This "raw" table doesn't include the working dir, so a filename's
151+
/// global ID is its index in this set **plus one**.
152+
raw_file_table: FxIndexSet<Symbol>,
153+
filenames_buffer: Vec<u8>,
144154
}
145155

146156
impl GlobalFileTable {
147-
fn new(tcx: TyCtxt<'_>) -> Self {
148-
let mut global_file_table = FxIndexSet::default();
157+
fn new(tcx: TyCtxt<'_>, all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
149158
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
150159
// requires setting the first filename to the compilation directory.
151160
// Since rustc generates coverage maps with relative paths, the
152161
// compilation directory can be combined with the relative paths
153162
// to get absolute paths, if needed.
154-
let working_dir = Symbol::intern(
155-
&tcx.sess.opts.working_dir.remapped_path_if_available().to_string_lossy(),
156-
);
157-
global_file_table.insert(working_dir);
158-
Self { global_file_table }
159-
}
160-
161-
fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 {
162-
let (global_file_id, _) = self.global_file_table.insert_full(file_name);
163-
global_file_id as u32
164-
}
163+
let working_dir: &str =
164+
&tcx.sess.opts.working_dir.remapped_path_if_available().to_string_lossy();
165+
166+
// Prepare a map from filename symbols to their underlying strings, so
167+
// that we can sort by the strings.
168+
let mut raw_file_table = FxIndexMap::<Symbol, &str>::default();
169+
// Filenames usually come in contiguous runs, so dedup to save work.
170+
let all_file_names = all_file_names.into_iter().dedup().collect::<Vec<_>>();
171+
for file_name in &all_file_names {
172+
raw_file_table.entry(*file_name).or_insert_with(|| file_name.as_str());
173+
}
165174

166-
fn into_filenames_buffer(self) -> Vec<u8> {
167-
// This method takes `self` so that the caller can't accidentally
168-
// modify the original file table after encoding it into a buffer.
175+
// Sort the file table by its actual string values, not the arbitrary
176+
// ordering of its symbols.
177+
raw_file_table.sort_unstable_by(|_, a, _, b| str::cmp(a, b));
169178

170-
llvm::build_byte_buffer(|buffer| {
179+
// Build the LLVM filenames buffer ahead of time, so that we can discard
180+
// the string references afterwards.
181+
let filenames_buffer = llvm::build_byte_buffer(|buffer| {
171182
coverageinfo::write_filenames_section_to_buffer(
172-
self.global_file_table.iter().map(Symbol::as_str),
183+
// Insert the working dir at index 0, before the other filenames.
184+
std::iter::once(working_dir).chain(raw_file_table.values().copied()),
173185
buffer,
174186
);
175-
})
187+
});
188+
189+
// Discard the string reference values, leaving only a set of symbols.
190+
let raw_file_table = raw_file_table.into_keys().collect::<FxIndexSet<_>>();
191+
192+
Self { raw_file_table, filenames_buffer }
193+
}
194+
195+
fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 {
196+
let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
197+
bug!("file name not found in prepared global file table: {file_name}");
198+
});
199+
// The raw file table doesn't include an entry for the working dir
200+
// (which has ID 0), so add 1 to get the correct ID.
201+
(raw_id + 1) as u32
202+
}
203+
204+
fn filenames_buffer(&self) -> &[u8] {
205+
&self.filenames_buffer
176206
}
177207
}
178208

@@ -182,7 +212,7 @@ impl GlobalFileTable {
182212
///
183213
/// Newly-encountered filenames will be added to the global file table.
184214
fn encode_mappings_for_function(
185-
global_file_table: &mut GlobalFileTable,
215+
global_file_table: &GlobalFileTable,
186216
function_coverage: &FunctionCoverage<'_>,
187217
) -> Vec<u8> {
188218
let mut counter_regions = function_coverage.counter_regions().collect::<Vec<_>>();
@@ -203,7 +233,7 @@ fn encode_mappings_for_function(
203233
for counter_regions_for_file in
204234
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
205235
{
206-
// Look up (or allocate) the global file ID for this filename.
236+
// Look up the global file ID for this filename.
207237
let file_name = counter_regions_for_file[0].1.file_name;
208238
let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
209239

0 commit comments

Comments
 (0)