Skip to content

Commit ee417cb

Browse files
committed
Auto merge of #8349 - ehuss:fix-lto, r=alexcrichton
Some LTO fixes. This reworks the LTO computation a little to address a few issues: - `cargo build` in a project with both a lib and bin would not engage the optimization introduced in #8192 where the lib *should* be compiled with `-C linker-plugin-lto` (bitcode only). This happened because the old code was starting root units as `Lto::None`. The solution here is to conditionally choose the starting Lto for roots. - A project with a dylib dependency would fail to build. It was building the dylib with `-C linker-plugin-lto` which is not valid. - A project with a bin/lib would build the lib differently based on whether or not it was selected. This changes it so that the lib is built the same. See `lto::between_builds`, where the second build the lib is now fresh. - Tests/benchmarks of a `lib` target will now support LTO. - Treats example libs a little more consistently as regular libs. I scattered some comments throughout, hopefully it's not too difficult to follow. Closes #8337
2 parents 1ec223e + 62a61dd commit ee417cb

File tree

5 files changed

+481
-86
lines changed

5 files changed

+481
-86
lines changed

src/cargo/core/compiler/crate_type.rs

+11
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,17 @@ impl CrateType {
2626
}
2727
}
2828

29+
pub fn can_lto(&self) -> bool {
30+
match self {
31+
CrateType::Bin | CrateType::Staticlib | CrateType::Cdylib => true,
32+
CrateType::Lib
33+
| CrateType::Rlib
34+
| CrateType::Dylib
35+
| CrateType::ProcMacro
36+
| CrateType::Other(..) => false,
37+
}
38+
}
39+
2940
pub fn is_linkable(&self) -> bool {
3041
match self {
3142
CrateType::Lib | CrateType::Rlib | CrateType::Dylib | CrateType::ProcMacro => true,

src/cargo/core/compiler/lto.rs

+123-62
Original file line numberDiff line numberDiff line change
@@ -1,131 +1,192 @@
1-
use crate::core::compiler::{Context, Unit};
1+
use crate::core::compiler::{CompileMode, Context, CrateType, Unit};
22
use crate::core::interning::InternedString;
33
use crate::core::profiles;
4-
use crate::core::TargetKind;
4+
55
use crate::util::errors::CargoResult;
66
use std::collections::hash_map::{Entry, HashMap};
77

88
/// Possible ways to run rustc and request various parts of LTO.
9-
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
9+
///
10+
/// Variant | Flag | Object Code | Bitcode
11+
/// -------------------|------------------------|-------------|--------
12+
/// `Run` | `-C lto=foo` | n/a | n/a
13+
/// `Off` | `-C lto=off` | n/a | n/a
14+
/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓
15+
/// `ObjectAndBitcode` | | ✓ | ✓
16+
/// `OnlyObject` | `-C embed-bitcode=no` | ✓ |
17+
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
1018
pub enum Lto {
11-
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional.
19+
/// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is
20+
/// None, that corresponds to `-Clto` with no argument, which means do
21+
/// "fat" LTO.
1222
Run(Option<InternedString>),
1323

14-
/// This rustc invocation only needs to produce bitcode, there's no need to
15-
/// produce object files, so we can pass `-Clinker-plugin-lto`
24+
/// LTO has been explicitly listed as "off". This means no thin-local-LTO,
25+
/// no LTO anywhere, I really mean it!
26+
Off,
27+
28+
/// This rustc invocation only needs to produce bitcode (it is *only* used
29+
/// for LTO), there's no need to produce object files, so we can pass
30+
/// `-Clinker-plugin-lto`
1631
OnlyBitcode,
1732

1833
/// This rustc invocation needs to embed bitcode in object files. This means
1934
/// that object files may be used for a normal link, and the crate may be
2035
/// loaded for LTO later, so both are required.
21-
EmbedBitcode,
36+
ObjectAndBitcode,
2237

23-
/// Nothing related to LTO is required of this compilation.
24-
None,
38+
/// This should not include bitcode. This is primarily to reduce disk
39+
/// space usage.
40+
OnlyObject,
2541
}
2642

2743
pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
2844
let mut map = HashMap::new();
2945
for unit in cx.bcx.roots.iter() {
30-
calculate(cx, &mut map, unit, Lto::None)?;
46+
let root_lto = match unit.profile.lto {
47+
// LTO not requested, no need for bitcode.
48+
profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject,
49+
_ => {
50+
let crate_types = unit.target.rustc_crate_types();
51+
if unit.target.for_host() {
52+
Lto::OnlyObject
53+
} else if needs_object(&crate_types) {
54+
lto_when_needs_object(&crate_types)
55+
} else {
56+
// This may or may not participate in LTO, let's start
57+
// with the minimum requirements. This may be expanded in
58+
// `calculate` below if necessary.
59+
Lto::OnlyBitcode
60+
}
61+
}
62+
};
63+
calculate(cx, &mut map, unit, root_lto)?;
3164
}
3265
cx.lto = map;
3366
Ok(())
3467
}
3568

69+
/// Whether or not any of these crate types need object code.
70+
fn needs_object(crate_types: &[CrateType]) -> bool {
71+
crate_types.iter().any(|k| k.can_lto() || k.is_dynamic())
72+
}
73+
74+
/// Lto setting to use when this unit needs object code.
75+
fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto {
76+
if crate_types.iter().any(CrateType::can_lto) {
77+
// A mixed rlib/cdylib whose parent is running LTO. This
78+
// needs both, for bitcode in the rlib (for LTO) and the
79+
// cdylib requires object code.
80+
Lto::ObjectAndBitcode
81+
} else {
82+
// A dylib whose parent is running LTO. rustc currently
83+
// doesn't support LTO with dylibs, so bitcode is not
84+
// needed.
85+
Lto::OnlyObject
86+
}
87+
}
88+
3689
fn calculate(
3790
cx: &Context<'_, '_>,
3891
map: &mut HashMap<Unit, Lto>,
3992
unit: &Unit,
40-
lto_for_deps: Lto,
93+
parent_lto: Lto,
4194
) -> CargoResult<()> {
42-
let (lto, lto_for_deps) = if unit.target.for_host() {
95+
let crate_types = match unit.mode {
96+
// Note: Doctest ignores LTO, but for now we'll compute it as-if it is
97+
// a Bin, in case it is ever supported in the future.
98+
CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin],
99+
// Notes on other modes:
100+
// - Check: Treat as the underlying type, it doesn't really matter.
101+
// - Doc: LTO is N/A for the Doc unit itself since rustdoc does not
102+
// support codegen flags. We still compute the dependencies, which
103+
// are mostly `Check`.
104+
// - RunCustomBuild is ignored because it is always "for_host".
105+
_ => unit.target.rustc_crate_types(),
106+
};
107+
// LTO can only be performed if *all* of the crate types support it.
108+
// For example, a cdylib/rlib combination won't allow LTO.
109+
let all_lto_types = crate_types.iter().all(CrateType::can_lto);
110+
// Compute the LTO based on the profile, and what our parent requires.
111+
let lto = if unit.target.for_host() {
43112
// Disable LTO for host builds since we only really want to perform LTO
44113
// for the final binary, and LTO on plugins/build scripts/proc macros is
45114
// largely not desired.
46-
(Lto::None, Lto::None)
47-
} else if unit.target.is_linkable() {
48-
// A "linkable" target is one that produces and rlib or dylib in this
49-
// case. In this scenario we cannot pass `-Clto` to the compiler because
50-
// that is an invalid request, this is simply a dependency. What we do,
51-
// however, is respect the request for whatever dependencies need to
52-
// have.
53-
//
54-
// Here if no LTO is requested then we keep it turned off. Otherwise LTO
55-
// is requested in some form, which means ideally we need just what's
56-
// requested, nothing else. It's possible, though, to have libraries
57-
// which are both a cdylib and and rlib, for example, which means that
58-
// object files are getting sent to the linker. That means that we need
59-
// to fully embed bitcode rather than simply generating just bitcode.
60-
let has_non_linkable_lib = match unit.target.kind() {
61-
TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()),
62-
_ => true,
63-
};
64-
match lto_for_deps {
65-
Lto::None => (Lto::None, Lto::None),
66-
_ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode),
67-
other => (other, other),
115+
Lto::OnlyObject
116+
} else if all_lto_types {
117+
// Note that this ignores the `parent_lto` because this isn't a
118+
// linkable crate type; this unit is not being embedded in the parent.
119+
match unit.profile.lto {
120+
profiles::Lto::Named(s) => Lto::Run(Some(s)),
121+
profiles::Lto::Off => Lto::Off,
122+
profiles::Lto::Bool(true) => Lto::Run(None),
123+
profiles::Lto::Bool(false) => Lto::OnlyObject,
68124
}
69125
} else {
70-
// Otherwise this target can perform LTO and we're going to read the
71-
// LTO value out of the profile. Note that we ignore `lto_for_deps`
72-
// here because if a unit depends on another unit than can LTO this
73-
// isn't a rustc-level dependency but rather a Cargo-level dependency.
74-
// For example this is an integration test depending on a binary.
75-
match unit.profile.lto {
76-
profiles::Lto::Named(s) => match s.as_str() {
77-
"n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None),
78-
_ => (Lto::Run(Some(s)), Lto::OnlyBitcode),
79-
},
80-
profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode),
81-
profiles::Lto::Bool(false) => (Lto::None, Lto::None),
126+
match (parent_lto, needs_object(&crate_types)) {
127+
// An rlib whose parent is running LTO, we only need bitcode.
128+
(Lto::Run(_), false) => Lto::OnlyBitcode,
129+
// LTO when something needs object code.
130+
(Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types),
131+
// LTO is disabled, no need for bitcode.
132+
(Lto::Off, _) => Lto::OnlyObject,
133+
// If this doesn't have any requirements, or the requirements are
134+
// already satisfied, then stay with our parent.
135+
(_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto,
82136
}
83137
};
84138

85-
match map.entry(unit.clone()) {
139+
// Merge the computed LTO. If this unit appears multiple times in the
140+
// graph, the merge may expand the requirements.
141+
let merged_lto = match map.entry(unit.clone()) {
86142
// If we haven't seen this unit before then insert our value and keep
87143
// going.
88-
Entry::Vacant(v) => {
89-
v.insert(lto);
90-
}
144+
Entry::Vacant(v) => *v.insert(lto),
91145

92146
Entry::Occupied(mut v) => {
93147
let result = match (lto, v.get()) {
148+
// No change in requirements.
149+
(Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode,
150+
(Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject,
151+
94152
// Once we're running LTO we keep running LTO. We should always
95153
// calculate the same thing here each iteration because if we
96154
// see this twice then it means, for example, two unit tests
97155
// depend on a binary, which is normal.
98156
(Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s),
99157

100-
// If we calculated the same thing as before then we can bail
101-
// out quickly.
102-
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()),
158+
// Off means off! This has the same reasoning as `Lto::Run`.
159+
(Lto::Off, _) | (_, Lto::Off) => Lto::Off,
160+
161+
// Once a target has requested both, that's the maximal amount
162+
// of work that can be done, so we just keep doing that work.
163+
(Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode,
103164

165+
// Upgrade so that both requirements can be met.
166+
//
104167
// This is where the trickiness happens. This unit needs
105168
// bitcode and the previously calculated value for this unit
106169
// says it didn't need bitcode (or vice versa). This means that
107170
// we're a shared dependency between some targets which require
108171
// LTO and some which don't. This means that instead of being
109172
// either only-objects or only-bitcode we have to embed both in
110173
// rlibs (used for different compilations), so we switch to
111-
// embedding bitcode.
112-
(Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode,
113-
114-
// Once a target has requested bitcode embedding that's the
115-
// maximal amount of work that can be done, so we just keep
116-
// doing that work.
117-
(Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode,
174+
// including both.
175+
(Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => {
176+
Lto::ObjectAndBitcode
177+
}
118178
};
119179
// No need to recurse if we calculated the same value as before.
120180
if result == *v.get() {
121181
return Ok(());
122182
}
123183
v.insert(result);
184+
result
124185
}
125-
}
186+
};
126187

127188
for dep in cx.unit_deps(unit) {
128-
calculate(cx, map, &dep.unit, lto_for_deps)?;
189+
calculate(cx, map, &dep.unit, merged_lto)?;
129190
}
130191
Ok(())
131192
}

src/cargo/core/compiler/mod.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pub use self::job::Freshness;
4343
use self::job::{Job, Work};
4444
use self::job_queue::{JobQueue, JobState};
4545
pub(crate) use self::layout::Layout;
46+
pub use self::lto::Lto;
4647
use self::output_depinfo::output_depinfo;
4748
use self::unit_graph::UnitDep;
4849
pub use crate::core::compiler::unit::{Unit, UnitInterner};
@@ -787,7 +788,10 @@ fn build_base_args(
787788
lto::Lto::Run(Some(s)) => {
788789
cmd.arg("-C").arg(format!("lto={}", s));
789790
}
790-
lto::Lto::EmbedBitcode => {} // this is rustc's default
791+
lto::Lto::Off => {
792+
cmd.arg("-C").arg("lto=off");
793+
}
794+
lto::Lto::ObjectAndBitcode => {} // this is rustc's default
791795
lto::Lto::OnlyBitcode => {
792796
// Note that this compiler flag, like the one below, is just an
793797
// optimization in terms of build time. If we don't pass it then
@@ -804,7 +808,7 @@ fn build_base_args(
804808
cmd.arg("-Clinker-plugin-lto");
805809
}
806810
}
807-
lto::Lto::None => {
811+
lto::Lto::OnlyObject => {
808812
if cx
809813
.bcx
810814
.target_data

src/cargo/core/profiles.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,9 @@ fn merge_profile(profile: &mut Profile, toml: &TomlProfile) {
533533
}
534534
match toml.lto {
535535
Some(StringOrBool::Bool(b)) => profile.lto = Lto::Bool(b),
536+
Some(StringOrBool::String(ref n)) if matches!(n.as_str(), "off" | "n" | "no") => {
537+
profile.lto = Lto::Off
538+
}
536539
Some(StringOrBool::String(ref n)) => profile.lto = Lto::Named(InternedString::new(n)),
537540
None => {}
538541
}
@@ -747,8 +750,10 @@ impl Profile {
747750
/// The link-time-optimization setting.
748751
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
749752
pub enum Lto {
750-
/// False = no LTO
753+
/// Explicitly no LTO, disables thin-LTO.
754+
Off,
751755
/// True = "Fat" LTO
756+
/// False = rustc default (no args), currently "thin LTO"
752757
Bool(bool),
753758
/// Named LTO settings like "thin".
754759
Named(InternedString),
@@ -760,6 +765,7 @@ impl serde::ser::Serialize for Lto {
760765
S: serde::ser::Serializer,
761766
{
762767
match self {
768+
Lto::Off => "off".serialize(s),
763769
Lto::Bool(b) => b.to_string().serialize(s),
764770
Lto::Named(n) => n.serialize(s),
765771
}

0 commit comments

Comments
 (0)