Skip to content

Commit e9e1b3d

Browse files
committed
Auto merge of #3122 - RalfJung:intrptrcast-clean, r=saethlin
intptrcast: remove information about dead allocations The discussion in #3103 convinced me we don't have to keep `int_to_ptr_map` around for dead allocations. But we should not make that depend on the GC, we can just tie it to when the allocation gets freed. That means everything still behaves deterministically, if anything weird happens (but it shouldn't). r? `@saethlin` Only the first and last commit contain logic changes, the 2nd commit just moves code around a bit.
2 parents 05cd36c + f2cb752 commit e9e1b3d

File tree

3 files changed

+129
-104
lines changed

3 files changed

+129
-104
lines changed

src/intptrcast.rs

+123-98
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;
2626

2727
#[derive(Clone, Debug)]
2828
pub struct GlobalStateInner {
29-
/// This is used as a map between the address of each allocation and its `AllocId`.
30-
/// It is always sorted
29+
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
30+
/// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
31+
/// base address, and we need to find the `AllocId` it belongs to.
32+
/// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
3133
int_to_ptr_map: Vec<(u64, AllocId)>,
3234
/// The base address for each allocation. We cannot put that into
3335
/// `AllocExtra` because function pointers also have a base address, and
@@ -62,10 +64,21 @@ impl GlobalStateInner {
6264
}
6365
}
6466

65-
impl<'mir, 'tcx> GlobalStateInner {
67+
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
68+
/// of `align` that is larger or equal to `addr`
69+
fn align_addr(addr: u64, align: u64) -> u64 {
70+
match addr % align {
71+
0 => addr,
72+
rem => addr.checked_add(align).unwrap() - rem,
73+
}
74+
}
75+
76+
impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
77+
trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
6678
// Returns the exposed `AllocId` that corresponds to the specified addr,
6779
// or `None` if the addr is out of bounds
68-
fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
80+
fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
81+
let ecx = self.eval_context_ref();
6982
let global_state = ecx.machine.intptrcast.borrow();
7083
assert!(global_state.provenance_mode != ProvenanceMode::Strict);
7184

@@ -82,94 +95,40 @@ impl<'mir, 'tcx> GlobalStateInner {
8295
let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
8396
// This never overflows because `addr >= glb`
8497
let offset = addr - glb;
85-
// If the offset exceeds the size of the allocation, don't use this `alloc_id`.
98+
// We require this to be strict in-bounds of the allocation. This arm is only
99+
// entered for addresses that are not the base address, so even zero-sized
100+
// allocations will get recognized at their base address -- but all other
101+
// allocations will *not* be recognized at their "end" address.
86102
let size = ecx.get_alloc_info(alloc_id).0;
87-
if offset <= size.bytes() { Some(alloc_id) } else { None }
103+
if offset < size.bytes() { Some(alloc_id) } else { None }
88104
}
89105
}?;
90106

91-
// We only use this provenance if it has been exposed, *and* is still live.
107+
// We only use this provenance if it has been exposed.
92108
if global_state.exposed.contains(&alloc_id) {
93-
let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
94-
match kind {
95-
AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
96-
return Some(alloc_id);
97-
}
98-
AllocKind::Dead => {}
99-
}
100-
}
101-
102-
None
103-
}
104-
105-
pub fn expose_ptr(
106-
ecx: &mut MiriInterpCx<'mir, 'tcx>,
107-
alloc_id: AllocId,
108-
tag: BorTag,
109-
) -> InterpResult<'tcx> {
110-
let global_state = ecx.machine.intptrcast.get_mut();
111-
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
112-
if global_state.provenance_mode != ProvenanceMode::Strict {
113-
trace!("Exposing allocation id {alloc_id:?}");
114-
global_state.exposed.insert(alloc_id);
115-
if ecx.machine.borrow_tracker.is_some() {
116-
ecx.expose_tag(alloc_id, tag)?;
117-
}
118-
}
119-
Ok(())
120-
}
121-
122-
pub fn ptr_from_addr_cast(
123-
ecx: &MiriInterpCx<'mir, 'tcx>,
124-
addr: u64,
125-
) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
126-
trace!("Casting {:#x} to a pointer", addr);
127-
128-
// Potentially emit a warning.
129-
let global_state = ecx.machine.intptrcast.borrow();
130-
match global_state.provenance_mode {
131-
ProvenanceMode::Default => {
132-
// The first time this happens at a particular location, print a warning.
133-
thread_local! {
134-
// `Span` is non-`Send`, so we use a thread-local instead.
135-
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
136-
}
137-
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
138-
let first = past_warnings.is_empty();
139-
if past_warnings.insert(ecx.cur_span()) {
140-
// Newly inserted, so first time we see this span.
141-
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
142-
}
143-
});
144-
}
145-
ProvenanceMode::Strict => {
146-
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
147-
}
148-
ProvenanceMode::Permissive => {}
109+
// This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
110+
debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
111+
Some(alloc_id)
112+
} else {
113+
None
149114
}
150-
151-
// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
152-
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
153-
// *then* do a memory access. So the allocation that the pointer happens to point to on a
154-
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
155-
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
156-
// allocation it might be referencing.
157-
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
158115
}
159116

160-
fn alloc_base_addr(
161-
ecx: &MiriInterpCx<'mir, 'tcx>,
162-
alloc_id: AllocId,
163-
) -> InterpResult<'tcx, u64> {
117+
fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
118+
let ecx = self.eval_context_ref();
164119
let mut global_state = ecx.machine.intptrcast.borrow_mut();
165120
let global_state = &mut *global_state;
166121

167122
Ok(match global_state.base_addr.entry(alloc_id) {
168123
Entry::Occupied(entry) => *entry.get(),
169124
Entry::Vacant(entry) => {
170-
// There is nothing wrong with a raw pointer being cast to an integer only after
171-
// it became dangling. Hence we allow dead allocations.
172-
let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
125+
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
126+
// This is either called immediately after allocation (and then cached), or when
127+
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
128+
// at a live allocation. This also ensures that we never re-assign an address to an
129+
// allocation that previously had an address, but then was freed and the address
130+
// information was removed.
131+
assert!(!matches!(kind, AllocKind::Dead));
173132

174133
// This allocation does not have a base address yet, pick one.
175134
// Leave some space to the previous allocation, to give it some chance to be less aligned.
@@ -183,7 +142,7 @@ impl<'mir, 'tcx> GlobalStateInner {
183142
.next_base_addr
184143
.checked_add(slack)
185144
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
186-
let base_addr = Self::align_addr(base_addr, align.bytes());
145+
let base_addr = align_addr(base_addr, align.bytes());
187146
entry.insert(base_addr);
188147
trace!(
189148
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
@@ -205,6 +164,7 @@ impl<'mir, 'tcx> GlobalStateInner {
205164
if global_state.next_base_addr > ecx.target_usize_max() {
206165
throw_exhaust!(AddressSpaceFull);
207166
}
167+
// Also maintain the opposite mapping in `int_to_ptr_map`.
208168
// Given that `next_base_addr` increases in each allocation, pushing the
209169
// corresponding tuple keeps `int_to_ptr_map` sorted
210170
global_state.int_to_ptr_map.push((base_addr, alloc_id));
@@ -213,15 +173,71 @@ impl<'mir, 'tcx> GlobalStateInner {
213173
}
214174
})
215175
}
176+
}
177+
178+
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
179+
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
180+
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
181+
let ecx = self.eval_context_mut();
182+
let global_state = ecx.machine.intptrcast.get_mut();
183+
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
184+
if global_state.provenance_mode != ProvenanceMode::Strict {
185+
trace!("Exposing allocation id {alloc_id:?}");
186+
global_state.exposed.insert(alloc_id);
187+
if ecx.machine.borrow_tracker.is_some() {
188+
ecx.expose_tag(alloc_id, tag)?;
189+
}
190+
}
191+
Ok(())
192+
}
193+
194+
fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
195+
trace!("Casting {:#x} to a pointer", addr);
196+
197+
let ecx = self.eval_context_ref();
198+
let global_state = ecx.machine.intptrcast.borrow();
199+
200+
// Potentially emit a warning.
201+
match global_state.provenance_mode {
202+
ProvenanceMode::Default => {
203+
// The first time this happens at a particular location, print a warning.
204+
thread_local! {
205+
// `Span` is non-`Send`, so we use a thread-local instead.
206+
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
207+
}
208+
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
209+
let first = past_warnings.is_empty();
210+
if past_warnings.insert(ecx.cur_span()) {
211+
// Newly inserted, so first time we see this span.
212+
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
213+
}
214+
});
215+
}
216+
ProvenanceMode::Strict => {
217+
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
218+
}
219+
ProvenanceMode::Permissive => {}
220+
}
221+
222+
// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
223+
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
224+
// *then* do a memory access. So the allocation that the pointer happens to point to on a
225+
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
226+
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
227+
// allocation it might be referencing.
228+
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
229+
}
216230

217231
/// Convert a relative (tcx) pointer to a Miri pointer.
218-
pub fn ptr_from_rel_ptr(
219-
ecx: &MiriInterpCx<'mir, 'tcx>,
232+
fn ptr_from_rel_ptr(
233+
&self,
220234
ptr: Pointer<AllocId>,
221235
tag: BorTag,
222236
) -> InterpResult<'tcx, Pointer<Provenance>> {
237+
let ecx = self.eval_context_ref();
238+
223239
let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
224-
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
240+
let base_addr = ecx.addr_from_alloc_id(alloc_id)?;
225241

226242
// Add offset with the right kind of pointer-overflowing arithmetic.
227243
let dl = ecx.data_layout();
@@ -231,22 +247,21 @@ impl<'mir, 'tcx> GlobalStateInner {
231247

232248
/// When a pointer is used for a memory access, this computes where in which allocation the
233249
/// access is going.
234-
pub fn ptr_get_alloc(
235-
ecx: &MiriInterpCx<'mir, 'tcx>,
236-
ptr: Pointer<Provenance>,
237-
) -> Option<(AllocId, Size)> {
250+
fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
251+
let ecx = self.eval_context_ref();
252+
238253
let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
239254

240255
let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
241256
alloc_id
242257
} else {
243258
// A wildcard pointer.
244-
GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
259+
ecx.alloc_id_from_addr(addr.bytes())?
245260
};
246261

247262
// This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
248-
// must have been called in the past.
249-
let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
263+
// must have been called in the past, so we can just look up the address in the map.
264+
let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();
250265

251266
// Wrapping "addr - base_addr"
252267
#[allow(clippy::cast_possible_wrap)] // we want to wrap here
@@ -256,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
256271
Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
257272
))
258273
}
274+
}
259275

260-
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
261-
/// of `align` that is larger or equal to `addr`
262-
fn align_addr(addr: u64, align: u64) -> u64 {
263-
match addr % align {
264-
0 => addr,
265-
rem => addr.checked_add(align).unwrap() - rem,
266-
}
276+
impl GlobalStateInner {
277+
pub fn free_alloc_id(&mut self, dead_id: AllocId) {
278+
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
279+
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
280+
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
281+
// access to determine the allocation ID and offset -- and there can still be pointers with
282+
// `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
283+
// `None` only if the pointer truly has no provenance (this ensures consistent error
284+
// messages).
285+
// However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
286+
// can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
287+
// returns a dead allocation.
288+
self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
289+
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
290+
// `alloc_id_from_addr` any more.
291+
self.exposed.remove(&dead_id);
267292
}
268293
}
269294

@@ -273,7 +298,7 @@ mod tests {
273298

274299
#[test]
275300
fn test_align_addr() {
276-
assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
277-
assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
301+
assert_eq!(align_addr(37, 4), 40);
302+
assert_eq!(align_addr(44, 4), 44);
278303
}
279304
}

src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ pub use crate::eval::{
117117
create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
118118
};
119119
pub use crate::helpers::EvalContextExt as _;
120-
pub use crate::intptrcast::ProvenanceMode;
120+
pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
121121
pub use crate::machine::{
122122
AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
123123
PrimitiveLayouts, Provenance, ProvenanceExtra,

src/machine.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -1149,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
11491149
// Value does not matter, SB is disabled
11501150
BorTag::default()
11511151
};
1152-
intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
1152+
ecx.ptr_from_rel_ptr(ptr, tag)
11531153
}
11541154

11551155
/// Called on `usize as ptr` casts.
@@ -1158,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
11581158
ecx: &MiriInterpCx<'mir, 'tcx>,
11591159
addr: u64,
11601160
) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
1161-
intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
1161+
ecx.ptr_from_addr_cast(addr)
11621162
}
11631163

11641164
/// Called on `ptr as usize` casts.
@@ -1169,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
11691169
ptr: Pointer<Self::Provenance>,
11701170
) -> InterpResult<'tcx> {
11711171
match ptr.provenance {
1172-
Provenance::Concrete { alloc_id, tag } =>
1173-
intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
1172+
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
11741173
Provenance::Wildcard => {
11751174
// No need to do anything for wildcard pointers as
11761175
// their provenances have already been previously exposed.
@@ -1191,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
11911190
ecx: &MiriInterpCx<'mir, 'tcx>,
11921191
ptr: Pointer<Self::Provenance>,
11931192
) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
1194-
let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
1193+
let rel = ecx.ptr_get_alloc(ptr);
11951194

11961195
rel.map(|(alloc_id, size)| {
11971196
let tag = match ptr.provenance {
@@ -1263,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
12631262
{
12641263
*deallocated_at = Some(machine.current_span());
12651264
}
1265+
machine.intptrcast.get_mut().free_alloc_id(alloc_id);
12661266
Ok(())
12671267
}
12681268

0 commit comments

Comments
 (0)