|
1 |
| -use crate::core::compiler::{Context, Unit}; |
| 1 | +use crate::core::compiler::{CompileMode, Context, CrateType, Unit}; |
2 | 2 | use crate::core::interning::InternedString;
|
3 | 3 | use crate::core::profiles;
|
4 |
| -use crate::core::TargetKind; |
| 4 | + |
5 | 5 | use crate::util::errors::CargoResult;
|
6 | 6 | use std::collections::hash_map::{Entry, HashMap};
|
7 | 7 |
|
8 | 8 | /// Possible ways to run rustc and request various parts of LTO.
|
9 |
| -#[derive(Copy, Clone, PartialEq, Eq, Hash)] |
| 9 | +/// |
| 10 | +/// Variant | Flag | Object Code | Bitcode |
| 11 | +/// -------------------|------------------------|-------------|-------- |
| 12 | +/// `Run` | `-C lto=foo` | n/a | n/a |
| 13 | +/// `Off` | `-C lto=off` | n/a | n/a |
| 14 | +/// `OnlyBitcode` | `-C linker-plugin-lto` | | ✓ |
| 15 | +/// `ObjectAndBitcode` | | ✓ | ✓ |
| 16 | +/// `OnlyObject` | `-C embed-bitcode=no` | ✓ | |
| 17 | +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] |
10 | 18 | pub enum Lto {
|
11 |
| - /// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional. |
| 19 | + /// LTO is run for this rustc, and it's `-Clto=foo`. If the given value is |
| 20 | + /// None, that corresponds to `-Clto` with no argument, which means do |
| 21 | + /// "fat" LTO. |
12 | 22 | Run(Option<InternedString>),
|
13 | 23 |
|
14 |
| - /// This rustc invocation only needs to produce bitcode, there's no need to |
15 |
| - /// produce object files, so we can pass `-Clinker-plugin-lto` |
| 24 | + /// LTO has been explicitly listed as "off". This means no thin-local-LTO, |
| 25 | + /// no LTO anywhere, I really mean it! |
| 26 | + Off, |
| 27 | + |
| 28 | + /// This rustc invocation only needs to produce bitcode (it is *only* used |
| 29 | + /// for LTO), there's no need to produce object files, so we can pass |
| 30 | + /// `-Clinker-plugin-lto` |
16 | 31 | OnlyBitcode,
|
17 | 32 |
|
18 | 33 | /// This rustc invocation needs to embed bitcode in object files. This means
|
19 | 34 | /// that object files may be used for a normal link, and the crate may be
|
20 | 35 | /// loaded for LTO later, so both are required.
|
21 |
| - EmbedBitcode, |
| 36 | + ObjectAndBitcode, |
22 | 37 |
|
23 |
| - /// Nothing related to LTO is required of this compilation. |
24 |
| - None, |
| 38 | + /// This should not include bitcode. This is primarily to reduce disk |
| 39 | + /// space usage. |
| 40 | + OnlyObject, |
25 | 41 | }
|
26 | 42 |
|
27 | 43 | pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
|
28 | 44 | let mut map = HashMap::new();
|
29 | 45 | for unit in cx.bcx.roots.iter() {
|
30 |
| - calculate(cx, &mut map, unit, Lto::None)?; |
| 46 | + let root_lto = match unit.profile.lto { |
| 47 | + // LTO not requested, no need for bitcode. |
| 48 | + profiles::Lto::Bool(false) | profiles::Lto::Off => Lto::OnlyObject, |
| 49 | + _ => { |
| 50 | + let crate_types = unit.target.rustc_crate_types(); |
| 51 | + if unit.target.for_host() { |
| 52 | + Lto::OnlyObject |
| 53 | + } else if needs_object(&crate_types) { |
| 54 | + lto_when_needs_object(&crate_types) |
| 55 | + } else { |
| 56 | + // This may or may not participate in LTO, let's start |
| 57 | + // with the minimum requirements. This may be expanded in |
| 58 | + // `calculate` below if necessary. |
| 59 | + Lto::OnlyBitcode |
| 60 | + } |
| 61 | + } |
| 62 | + }; |
| 63 | + calculate(cx, &mut map, unit, root_lto)?; |
31 | 64 | }
|
32 | 65 | cx.lto = map;
|
33 | 66 | Ok(())
|
34 | 67 | }
|
35 | 68 |
|
| 69 | +/// Whether or not any of these crate types need object code. |
| 70 | +fn needs_object(crate_types: &[CrateType]) -> bool { |
| 71 | + crate_types.iter().any(|k| k.can_lto() || k.is_dynamic()) |
| 72 | +} |
| 73 | + |
| 74 | +/// Lto setting to use when this unit needs object code. |
| 75 | +fn lto_when_needs_object(crate_types: &[CrateType]) -> Lto { |
| 76 | + if crate_types.iter().any(CrateType::can_lto) { |
| 77 | + // A mixed rlib/cdylib whose parent is running LTO. This |
| 78 | + // needs both, for bitcode in the rlib (for LTO) and the |
| 79 | + // cdylib requires object code. |
| 80 | + Lto::ObjectAndBitcode |
| 81 | + } else { |
| 82 | + // A dylib whose parent is running LTO. rustc currently |
| 83 | + // doesn't support LTO with dylibs, so bitcode is not |
| 84 | + // needed. |
| 85 | + Lto::OnlyObject |
| 86 | + } |
| 87 | +} |
| 88 | + |
36 | 89 | fn calculate(
|
37 | 90 | cx: &Context<'_, '_>,
|
38 | 91 | map: &mut HashMap<Unit, Lto>,
|
39 | 92 | unit: &Unit,
|
40 |
| - lto_for_deps: Lto, |
| 93 | + parent_lto: Lto, |
41 | 94 | ) -> CargoResult<()> {
|
42 |
| - let (lto, lto_for_deps) = if unit.target.for_host() { |
| 95 | + let crate_types = match unit.mode { |
| 96 | + // Note: Doctest ignores LTO, but for now we'll compute it as-if it is |
| 97 | + // a Bin, in case it is ever supported in the future. |
| 98 | + CompileMode::Test | CompileMode::Bench | CompileMode::Doctest => vec![CrateType::Bin], |
| 99 | + // Notes on other modes: |
| 100 | + // - Check: Treat as the underlying type, it doesn't really matter. |
| 101 | + // - Doc: LTO is N/A for the Doc unit itself since rustdoc does not |
| 102 | + // support codegen flags. We still compute the dependencies, which |
| 103 | + // are mostly `Check`. |
| 104 | + // - RunCustomBuild is ignored because it is always "for_host". |
| 105 | + _ => unit.target.rustc_crate_types(), |
| 106 | + }; |
| 107 | + // LTO can only be performed if *all* of the crate types support it. |
| 108 | + // For example, a cdylib/rlib combination won't allow LTO. |
| 109 | + let all_lto_types = crate_types.iter().all(CrateType::can_lto); |
| 110 | + // Compute the LTO based on the profile, and what our parent requires. |
| 111 | + let lto = if unit.target.for_host() { |
43 | 112 | // Disable LTO for host builds since we only really want to perform LTO
|
44 | 113 | // for the final binary, and LTO on plugins/build scripts/proc macros is
|
45 | 114 | // largely not desired.
|
46 |
| - (Lto::None, Lto::None) |
47 |
| - } else if unit.target.is_linkable() { |
48 |
| - // A "linkable" target is one that produces and rlib or dylib in this |
49 |
| - // case. In this scenario we cannot pass `-Clto` to the compiler because |
50 |
| - // that is an invalid request, this is simply a dependency. What we do, |
51 |
| - // however, is respect the request for whatever dependencies need to |
52 |
| - // have. |
53 |
| - // |
54 |
| - // Here if no LTO is requested then we keep it turned off. Otherwise LTO |
55 |
| - // is requested in some form, which means ideally we need just what's |
56 |
| - // requested, nothing else. It's possible, though, to have libraries |
57 |
| - // which are both a cdylib and and rlib, for example, which means that |
58 |
| - // object files are getting sent to the linker. That means that we need |
59 |
| - // to fully embed bitcode rather than simply generating just bitcode. |
60 |
| - let has_non_linkable_lib = match unit.target.kind() { |
61 |
| - TargetKind::Lib(kinds) => kinds.iter().any(|k| !k.is_linkable()), |
62 |
| - _ => true, |
63 |
| - }; |
64 |
| - match lto_for_deps { |
65 |
| - Lto::None => (Lto::None, Lto::None), |
66 |
| - _ if has_non_linkable_lib => (Lto::EmbedBitcode, Lto::EmbedBitcode), |
67 |
| - other => (other, other), |
| 115 | + Lto::OnlyObject |
| 116 | + } else if all_lto_types { |
| 117 | + // Note that this ignores the `parent_lto` because this isn't a |
| 118 | + // linkable crate type; this unit is not being embedded in the parent. |
| 119 | + match unit.profile.lto { |
| 120 | + profiles::Lto::Named(s) => Lto::Run(Some(s)), |
| 121 | + profiles::Lto::Off => Lto::Off, |
| 122 | + profiles::Lto::Bool(true) => Lto::Run(None), |
| 123 | + profiles::Lto::Bool(false) => Lto::OnlyObject, |
68 | 124 | }
|
69 | 125 | } else {
|
70 |
| - // Otherwise this target can perform LTO and we're going to read the |
71 |
| - // LTO value out of the profile. Note that we ignore `lto_for_deps` |
72 |
| - // here because if a unit depends on another unit than can LTO this |
73 |
| - // isn't a rustc-level dependency but rather a Cargo-level dependency. |
74 |
| - // For example this is an integration test depending on a binary. |
75 |
| - match unit.profile.lto { |
76 |
| - profiles::Lto::Named(s) => match s.as_str() { |
77 |
| - "n" | "no" | "off" => (Lto::Run(Some(s)), Lto::None), |
78 |
| - _ => (Lto::Run(Some(s)), Lto::OnlyBitcode), |
79 |
| - }, |
80 |
| - profiles::Lto::Bool(true) => (Lto::Run(None), Lto::OnlyBitcode), |
81 |
| - profiles::Lto::Bool(false) => (Lto::None, Lto::None), |
| 126 | + match (parent_lto, needs_object(&crate_types)) { |
| 127 | + // An rlib whose parent is running LTO, we only need bitcode. |
| 128 | + (Lto::Run(_), false) => Lto::OnlyBitcode, |
| 129 | + // LTO when something needs object code. |
| 130 | + (Lto::Run(_), true) | (Lto::OnlyBitcode, true) => lto_when_needs_object(&crate_types), |
| 131 | + // LTO is disabled, no need for bitcode. |
| 132 | + (Lto::Off, _) => Lto::OnlyObject, |
| 133 | + // If this doesn't have any requirements, or the requirements are |
| 134 | + // already satisfied, then stay with our parent. |
| 135 | + (_, false) | (Lto::OnlyObject, true) | (Lto::ObjectAndBitcode, true) => parent_lto, |
82 | 136 | }
|
83 | 137 | };
|
84 | 138 |
|
85 |
| - match map.entry(unit.clone()) { |
| 139 | + // Merge the computed LTO. If this unit appears multiple times in the |
| 140 | + // graph, the merge may expand the requirements. |
| 141 | + let merged_lto = match map.entry(unit.clone()) { |
86 | 142 | // If we haven't seen this unit before then insert our value and keep
|
87 | 143 | // going.
|
88 |
| - Entry::Vacant(v) => { |
89 |
| - v.insert(lto); |
90 |
| - } |
| 144 | + Entry::Vacant(v) => *v.insert(lto), |
91 | 145 |
|
92 | 146 | Entry::Occupied(mut v) => {
|
93 | 147 | let result = match (lto, v.get()) {
|
| 148 | + // No change in requirements. |
| 149 | + (Lto::OnlyBitcode, Lto::OnlyBitcode) => Lto::OnlyBitcode, |
| 150 | + (Lto::OnlyObject, Lto::OnlyObject) => Lto::OnlyObject, |
| 151 | + |
94 | 152 | // Once we're running LTO we keep running LTO. We should always
|
95 | 153 | // calculate the same thing here each iteration because if we
|
96 | 154 | // see this twice then it means, for example, two unit tests
|
97 | 155 | // depend on a binary, which is normal.
|
98 | 156 | (Lto::Run(s), _) | (_, &Lto::Run(s)) => Lto::Run(s),
|
99 | 157 |
|
100 |
| - // If we calculated the same thing as before then we can bail |
101 |
| - // out quickly. |
102 |
| - (Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()), |
| 158 | + // Off means off! This has the same reasoning as `Lto::Run`. |
| 159 | + (Lto::Off, _) | (_, Lto::Off) => Lto::Off, |
| 160 | + |
| 161 | + // Once a target has requested both, that's the maximal amount |
| 162 | + // of work that can be done, so we just keep doing that work. |
| 163 | + (Lto::ObjectAndBitcode, _) | (_, Lto::ObjectAndBitcode) => Lto::ObjectAndBitcode, |
103 | 164 |
|
| 165 | + // Upgrade so that both requirements can be met. |
| 166 | + // |
104 | 167 | // This is where the trickiness happens. This unit needs
|
105 | 168 | // bitcode and the previously calculated value for this unit
|
106 | 169 | // says it didn't need bitcode (or vice versa). This means that
|
107 | 170 | // we're a shared dependency between some targets which require
|
108 | 171 | // LTO and some which don't. This means that instead of being
|
109 | 172 | // either only-objects or only-bitcode we have to embed both in
|
110 | 173 | // rlibs (used for different compilations), so we switch to
|
111 |
| - // embedding bitcode. |
112 |
| - (Lto::OnlyBitcode, Lto::None) | (Lto::None, Lto::OnlyBitcode) => Lto::EmbedBitcode, |
113 |
| - |
114 |
| - // Once a target has requested bitcode embedding that's the |
115 |
| - // maximal amount of work that can be done, so we just keep |
116 |
| - // doing that work. |
117 |
| - (Lto::EmbedBitcode, _) | (_, Lto::EmbedBitcode) => Lto::EmbedBitcode, |
| 174 | + // including both. |
| 175 | + (Lto::OnlyObject, Lto::OnlyBitcode) | (Lto::OnlyBitcode, Lto::OnlyObject) => { |
| 176 | + Lto::ObjectAndBitcode |
| 177 | + } |
118 | 178 | };
|
119 | 179 | // No need to recurse if we calculated the same value as before.
|
120 | 180 | if result == *v.get() {
|
121 | 181 | return Ok(());
|
122 | 182 | }
|
123 | 183 | v.insert(result);
|
| 184 | + result |
124 | 185 | }
|
125 |
| - } |
| 186 | + }; |
126 | 187 |
|
127 | 188 | for dep in cx.unit_deps(unit) {
|
128 |
| - calculate(cx, map, &dep.unit, lto_for_deps)?; |
| 189 | + calculate(cx, map, &dep.unit, merged_lto)?; |
129 | 190 | }
|
130 | 191 | Ok(())
|
131 | 192 | }
|
0 commit comments