Skip to content

Commit

Permalink
Align the start of stackslots instead of the end
Browse files Browse the repository at this point in the history
The alignment for stackslots was commputed wrongly, because the
end of the slot was being aligned instead of the beginning.
This effectively meant that the _next_ stackslot was being aligned
on the alignment of the current slot.
  • Loading branch information
tertsdiepraam committed Sep 19, 2024
1 parent fed24ee commit 5d7b1dc
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 20 deletions.
58 changes: 42 additions & 16 deletions cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1152,42 +1152,59 @@ impl<M: ABIMachineSpec> Callee<M> {
);

// Compute sized stackslot locations and total stackslot size.
let mut sized_stack_offset: u32 = 0;
let mut end_offset: u32 = 0;
let mut sized_stackslots = PrimaryMap::new();

for (stackslot, data) in f.sized_stack_slots.iter() {
let off = sized_stack_offset;
sized_stack_offset = sized_stack_offset
.checked_add(data.size)
.ok_or(CodegenError::ImplLimitExceeded)?;
// Always at least machine-word-align slots, but also
// We start our computation possibly unaligned where the previous
// stackslot left off.
let unaligned_start_offset = end_offset;

// The start of the stackslot must be aligned.
//
// We always at least machine-word-align slots, but also
// satisfy the user's requested alignment.
debug_assert!(data.align_shift < 32);
let align = std::cmp::max(M::word_bytes(), 1u32 << data.align_shift);
let mask = align - 1;
sized_stack_offset = checked_round_up(sized_stack_offset, mask)
let start_offset = checked_round_up(unaligned_start_offset, mask)
.ok_or(CodegenError::ImplLimitExceeded)?;

// The end offset is the the start offset increased by the size
end_offset = start_offset
.checked_add(data.size)
.ok_or(CodegenError::ImplLimitExceeded)?;

debug_assert_eq!(stackslot.as_u32() as usize, sized_stackslots.len());
sized_stackslots.push(off);
sized_stackslots.push(start_offset);
}

// Compute dynamic stackslot locations and total stackslot size.
let mut dynamic_stackslots = PrimaryMap::new();
let mut dynamic_stack_offset: u32 = sized_stack_offset;
for (stackslot, data) in f.dynamic_stack_slots.iter() {
debug_assert_eq!(stackslot.as_u32() as usize, dynamic_stackslots.len());
let off = dynamic_stack_offset;

// This computation is similar to the stackslots above
let unaligned_start_offset = end_offset;

let mask = M::word_bytes() - 1;
let start_offset = checked_round_up(unaligned_start_offset, mask)
.ok_or(CodegenError::ImplLimitExceeded)?;

let ty = f.get_concrete_dynamic_ty(data.dyn_ty).ok_or_else(|| {
CodegenError::Unsupported(format!("invalid dynamic vector type: {}", data.dyn_ty))
})?;
dynamic_stack_offset = dynamic_stack_offset

end_offset = start_offset
.checked_add(isa.dynamic_vector_bytes(ty))
.ok_or(CodegenError::ImplLimitExceeded)?;
let mask = M::word_bytes() - 1;
dynamic_stack_offset = checked_round_up(dynamic_stack_offset, mask)
.ok_or(CodegenError::ImplLimitExceeded)?;
dynamic_stackslots.push(off);

dynamic_stackslots.push(start_offset);
}
let stackslots_size = dynamic_stack_offset;

// The size of the stackslots needs to be word aligned
let stackslots_size = checked_round_up(end_offset, M::word_bytes() - 1)
.ok_or(CodegenError::ImplLimitExceeded)?;

let mut dynamic_type_sizes = HashMap::with_capacity(f.dfg.dynamic_types.len());
for (dyn_ty, _data) in f.dfg.dynamic_types.iter() {
Expand Down Expand Up @@ -1731,6 +1748,15 @@ impl<M: ABIMachineSpec> Callee<M> {
let bytes = M::word_bytes();
let total_stacksize = self.stackslots_size + bytes * spillslots as u32;
let mask = M::stack_align(self.call_conv) - 1;
if total_stacksize.checked_add(mask).is_none() {
dbg!(
total_stacksize,
mask,
!mask,
self.stackslots_size,
spillslots
);
}
let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
self.frame_layout = Some(M::compute_frame_layout(
self.call_conv,
Expand Down
8 changes: 4 additions & 4 deletions cranelift/filetests/filetests/isa/x64/stackslot.clif
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ block0(v0: i64):
; block0:
; lea rsp(0 + virtual offset), %rax
; lea rsp(16 + virtual offset), %rdx
; lea rsp(32 + virtual offset), %r8
; lea rsp(40 + virtual offset), %r9
; lea rsp(24 + virtual offset), %r8
; lea rsp(32 + virtual offset), %r9
; movq %r8, 0(%rdi)
; movq %r9, 8(%rdi)
; addq %rsp, $48, %rsp
Expand All @@ -49,8 +49,8 @@ block0(v0: i64):
; block1: ; offset 0x18
; leaq (%rsp), %rax
; leaq 0x10(%rsp), %rdx
; leaq 0x20(%rsp), %r8
; leaq 0x28(%rsp), %r9
; leaq 0x18(%rsp), %r8
; leaq 0x20(%rsp), %r9
; movq %r8, (%rdi)
; movq %r9, 8(%rdi)
; addq $0x30, %rsp
Expand Down
48 changes: 48 additions & 0 deletions cranelift/filetests/filetests/isa/x64/stackslot_alignment.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
test compile precise-output
target x86_64

function %f0(i64 vmctx) -> i64, i64 {
gv0 = vmctx
stack_limit = gv0
ss0 = explicit_slot 8
ss1 = explicit_slot 32, align=16

block0(v0: i64):
v1 = stack_addr.i64 ss0
v2 = stack_addr.i64 ss1
return v1, v2
}

; VCode:
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq %r10, $48, %r10
; cmpq %rsp, %r10
; jnbe #trap=stk_ovf
; subq %rsp, $48, %rsp
; block0:
; lea rsp(0 + virtual offset), %rax
; lea rsp(16 + virtual offset), %rdx
; addq %rsp, $48, %rsp
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; movq %rdi, %r10
; addq $0x30, %r10
; cmpq %rsp, %r10
; ja 0x2a
; subq $0x30, %rsp
; block1: ; offset 0x18
; leaq (%rsp), %rax
; leaq 0x10(%rsp), %rdx
; addq $0x30, %rsp
; movq %rbp, %rsp
; popq %rbp
; retq
; ud2 ; trap: stk_ovf

0 comments on commit 5d7b1dc

Please # to comment.