Description
Disclaimer: I don't know much of the RISC-V ISA so this may be just my poor
understanding.
Some things I observed while working with the hifive1 board.
NOTE: wherever the (compilation) target is not specified,
riscv32imac-unknown-none-elf
was used
jalr
instead of jal
All direct function calls seems to use some sort of relocatable form. Example
below:
#[no_mangle]
unsafe fn _start() -> ! {
foo();
loop {}
}
#[inline(never)]
#[no_mangle]
unsafe fn foo() {
asm!("" : : : "memory" : "volatile")
}
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: addi sp, sp, -16
11002: sw ra, 12(sp)
11004: sw s0, 8(sp)
11006: addi s0, sp, 16
11008: auipc ra, 0
1100c: jalr ra, ra, 10
11010: j 0
0000000000011012 foo:
11012: addi sp, sp, -16
11014: sw ra, 12(sp)
11016: sw s0, 8(sp)
11018: addi s0, sp, 16
1101a: lw s0, 8(sp)
1101c: lw ra, 12(sp)
1101e: addi sp, sp, 16
11020: ret
That auipc
, jalr
combination looks like relocatable code to me (not that I
know much about relocatable code). I was expecting to see jal
function calls
of this form:
global_asm!(r#"
.global _start
_start:
addi sp, sp, -16
sw ra, 12(sp)
sw s0, 8(sp)
addi s0, sp, 16
jal foo
j 0
"#);
#[inline(never)]
#[no_mangle]
unsafe fn foo() {
asm!("" : : : "memory" : "volatile")
}
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: addi sp, sp, -16
11002: sw ra, 12(sp)
11004: sw s0, 8(sp)
11006: addi s0, sp, 16
11008: jal 6
1100c: j 0
000000000001100e foo:
1100e: addi sp, sp, -16
11010: sw ra, 12(sp)
11012: sw s0, 8(sp)
11014: addi s0, sp, 16
11016: lw s0, 8(sp)
11018: lw ra, 12(sp)
1101a: addi sp, sp, 16
1101c: ret
Since the target specification says relocation-model: "static"
:
$ rustc -Z unstable-options --print target-spec-json --target riscv32imac-unknown-none-elf
(..)
"relocation-model": "static",
(..)
That would save 4 bytes of .text
per function call.
atomic::compiler_fence
produces an instruction
This program shows that atomic::compiler_fence
produces a fence
instruction
#[no_mangle]
unsafe fn _start() -> ! {
atomic::compiler_fence(Ordering::SeqCst);
loop {}
}
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: addi sp, sp, -16
11002: sw ra, 12(sp)
11004: sw s0, 8(sp)
11006: addi s0, sp, 16
11008: fence rw, rw
1100c: j 0
Even though the description of the function states that "compiler_fence
does
not emit any machine code".
As it is, both atomic::fence
and atomic::compiler_fence
generate the same
machine code.
#[no_mangle]
unsafe fn _start() -> ! {
atomic::fence(Ordering::SeqCst);
loop {}
}
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: addi sp, sp, -16
11002: sw ra, 12(sp)
11004: sw s0, 8(sp)
11006: addi s0, sp, 16
11008: fence rw, rw
1100c: j 0
You don't see this behavior with the ARM Cortex-M backend:
#[no_mangle]
unsafe fn _start() -> ! {
atomic::compiler_fence(Ordering::SeqCst);
loop {}
}
$ cargo objdump --target thumbv7m-none-eabi --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: b #-4 <_start>
intrinsics::abort
!= UNIMP
Calling intrinsics::abort
produces a function call to the abort
symbol even
though the UNIMP
instruction exists.
#[no_mangle]
unsafe fn _start() -> ! {
core::intrinsics::abort()
}
global_asm!(r#"
.global abort
abort:
UNIMP
"#);
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 abort:
11000: unimp
0000000000011002 _start:
11002: addi sp, sp, -16
11004: sw ra, 12(sp)
11006: sw s0, 8(sp)
11008: addi s0, sp, 16
1100a: auipc ra, 0
1100e: jalr ra, ra, -10
11012: auipc ra, 0
11016: jalr ra, ra, -18
I was actually expecting something like this to be generated:
global_asm!(r#"
.global _start
_start:
UNIMP
"#);
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: unimp
noreturn nounwind
& divergent functions
The riscv32imac-unknown-none-elf
target uses panic-strategy: "abort"
; that
tells the backend that functions never unwind so divergent functions fn() -> !
, which never return and are marked as noreturn nounwind
in LLVM IR, should
not preserve the caller "saved registers". However, one observes register
stacking in the following program:
#[no_mangle]
unsafe fn _start() -> ! {
// (RISC-V has so many registers)
asm!("" : :
"r"(0) "r"(1) "r"(2) "r"(3) "r"(4) "r"(5) "r"(6) "r"(7) "r"(8) "r"(9)
"r"(10) "r"(11) "r"(12) "r"(13) "r"(14) "r"(15) "r"(16)
: : "volatile");
loop {}
}
$ cargo objdump --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: addi sp, sp, -16
11002: sw ra, 12(sp)
11004: sw s0, 8(sp)
11006: sw s1, 4(sp)
11008: addi s0, sp, 16
1100a: addi a6, zero, 1
1100c: addi a7, zero, 2
1100e: addi t0, zero, 3
11010: addi t1, zero, 4
11012: addi t2, zero, 5
11014: addi t3, zero, 6
11016: addi t4, zero, 7
11018: addi t5, zero, 8
1101a: addi t6, zero, 9
1101c: addi a3, zero, 10
1101e: addi a4, zero, 11
11020: addi a5, zero, 12
11022: addi a0, zero, 13
11024: addi a1, zero, 14
11026: addi a2, zero, 15
11028: addi s1, zero, 16
1102a: j 0
s0
and s1
are pushed onto the stack but never popped. Is that required by
the ISA / C ABI?
(off-topic: why is ra
also being pushed onto the stack when _start
performs
no function call?)
Compare the previous program to this ARM Cortex-M program:
#[no_mangle]
unsafe fn _start() -> ! {
asm!("" : :
"r"(0) "r"(1) "r"(2) "r"(3) "r"(4) "r"(5) "r"(6) "r"(7) "r"(8) "r"(9)
"r"(10) "r"(11) "r"(12) "r"(13)
: : "volatile");
loop {}
}
$ cargo objdump --target thumbv7m-none-eabi --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: mov.w r12, #0
11004: mov.w lr, #1
11008: mov.w r8, #2
1100c: mov.w r9, #3
11010: mov.w r10, #4
11014: mov.w r11, #5
11018: movs r4, #6
1101a: movs r5, #7
1101c: movs r6, #8
1101e: movs r7, #9
11020: movs r2, #10
11022: movs r3, #11
11024: movs r0, #12
11026: movs r1, #13
11028: b #-4 <_start+0x28>
Registers are never pushed onto the stack.
Compare that machine code to the machine code generated for a non-divergent
function:
#[no_mangle]
unsafe fn _start() {
asm!("" : :
"r"(0) "r"(1) "r"(2) "r"(3) "r"(4) "r"(5) "r"(6) "r"(7) "r"(8) "r"(9)
"r"(10) "r"(11) "r"(12) "r"(13)
: : "volatile");
}
$ cargo objdump --target thumbv7m-none-eabi --bin app --release -- -C -d -no-show-raw-insn
0000000000011000 _start:
11000: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
11004: mov.w r12, #0
11008: mov.w lr, #1
1100c: mov.w r8, #2
11010: mov.w r9, #3
11014: mov.w r10, #4
11018: mov.w r11, #5
1101c: movs r4, #6
1101e: movs r5, #7
11020: movs r6, #8
11022: movs r7, #9
11024: movs r2, #10
11026: movs r3, #11
11028: movs r0, #12
1102a: movs r1, #13
1102c: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
Registers are pushed in the prologue and then popped in the epilogue of the
function.
It would be good to check if these backend issues have been fixed in the latest
version of LLVM (using llc
) because rustc
is using a several months old
LLVM. If a recent commit shows the same issues then we may want to submit bug
reports to the LLVM project.
Metadata
$ rustc -Vv
rustc 1.37.0-nightly (04a3dd8a8 2019-06-18)
binary: rustc
commit-hash: 04a3dd8a872633ca1e4c217d11f741cc35cb19a5
commit-date: 2019-06-18
host: x86_64-unknown-linux-gnu
release: 1.37.0-nightly
LLVM version: 8.0
cc @rust-embedded/riscv @Disasm