Skip to content

Commit f0003fd

Browse files
committed
Export kernel descriptor for amdgpu kernels
The host runtime (HIP or HSA) expects a kernel descriptor object for each kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol with the name of the kernel plus a `.kd` suffix. Add it to the exported symbols in the linker script, so that it can be found.
1 parent cd805f0 commit f0003fd

File tree

4 files changed

+92
-32
lines changed

4 files changed

+92
-32
lines changed

compiler/rustc_codegen_ssa/src/back/linker.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1776,6 +1776,7 @@ fn exported_symbols_for_non_proc_macro(tcx: TyCtxt<'_>, crate_type: CrateType) -
17761776
symbols.push(symbol_export::exporting_symbol_name_for_instance_in_crate(
17771777
tcx, symbol, cnum,
17781778
));
1779+
symbol_export::extend_exported_symbols(&mut symbols, tcx, symbol, cnum);
17791780
}
17801781
});
17811782

compiler/rustc_codegen_ssa/src/back/symbol_export.rs

+60-32
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ use rustc_middle::middle::exported_symbols::{
1010
ExportedSymbol, SymbolExportInfo, SymbolExportKind, SymbolExportLevel, metadata_symbol_name,
1111
};
1212
use rustc_middle::query::LocalCrate;
13-
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, TyCtxt};
13+
use rustc_middle::ty::{self, GenericArgKind, GenericArgsRef, Instance, SymbolName, Ty, TyCtxt};
1414
use rustc_middle::util::Providers;
1515
use rustc_session::config::{CrateType, OomStrategy};
16+
use rustc_target::callconv::Conv;
1617
use rustc_target::spec::{SanitizerSet, TlsModel};
1718
use tracing::debug;
1819

@@ -551,6 +552,42 @@ pub(crate) fn symbol_name_for_instance_in_crate<'tcx>(
551552
}
552553
}
553554

555+
fn calling_convention_for_symbol<'tcx>(
556+
tcx: TyCtxt<'tcx>,
557+
symbol: ExportedSymbol<'tcx>,
558+
) -> (Conv, &'tcx [rustc_target::callconv::ArgAbi<'tcx, Ty<'tcx>>]) {
559+
let instance = match symbol {
560+
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
561+
if tcx.is_static(def_id) =>
562+
{
563+
None
564+
}
565+
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
566+
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
567+
// DropGlue always use the Rust calling convention and thus follow the target's default
568+
// symbol decoration scheme.
569+
ExportedSymbol::DropGlue(..) => None,
570+
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
571+
// target's default symbol decoration scheme.
572+
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
573+
// NoDefId always follow the target's default symbol decoration scheme.
574+
ExportedSymbol::NoDefId(..) => None,
575+
// ThreadLocalShim always follow the target's default symbol decoration scheme.
576+
ExportedSymbol::ThreadLocalShim(..) => None,
577+
};
578+
579+
instance
580+
.map(|i| {
581+
tcx.fn_abi_of_instance(
582+
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
583+
)
584+
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
585+
})
586+
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
587+
// FIXME(workingjubilee): why don't we know the convention here?
588+
.unwrap_or((Conv::Rust, &[]))
589+
}
590+
554591
/// This is the symbol name of the given instance as seen by the linker.
555592
///
556593
/// On 32-bit Windows symbols are decorated according to their calling conventions.
@@ -559,8 +596,6 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
559596
symbol: ExportedSymbol<'tcx>,
560597
instantiating_crate: CrateNum,
561598
) -> String {
562-
use rustc_target::callconv::Conv;
563-
564599
let mut undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
565600

566601
// thread local will not be a function call,
@@ -584,35 +619,7 @@ pub(crate) fn linking_symbol_name_for_instance_in_crate<'tcx>(
584619
_ => return undecorated,
585620
};
586621

587-
let instance = match symbol {
588-
ExportedSymbol::NonGeneric(def_id) | ExportedSymbol::Generic(def_id, _)
589-
if tcx.is_static(def_id) =>
590-
{
591-
None
592-
}
593-
ExportedSymbol::NonGeneric(def_id) => Some(Instance::mono(tcx, def_id)),
594-
ExportedSymbol::Generic(def_id, args) => Some(Instance::new(def_id, args)),
595-
// DropGlue always use the Rust calling convention and thus follow the target's default
596-
// symbol decoration scheme.
597-
ExportedSymbol::DropGlue(..) => None,
598-
// AsyncDropGlueCtorShim always use the Rust calling convention and thus follow the
599-
// target's default symbol decoration scheme.
600-
ExportedSymbol::AsyncDropGlueCtorShim(..) => None,
601-
// NoDefId always follow the target's default symbol decoration scheme.
602-
ExportedSymbol::NoDefId(..) => None,
603-
// ThreadLocalShim always follow the target's default symbol decoration scheme.
604-
ExportedSymbol::ThreadLocalShim(..) => None,
605-
};
606-
607-
let (conv, args) = instance
608-
.map(|i| {
609-
tcx.fn_abi_of_instance(
610-
ty::TypingEnv::fully_monomorphized().as_query_input((i, ty::List::empty())),
611-
)
612-
.unwrap_or_else(|_| bug!("fn_abi_of_instance({i:?}) failed"))
613-
})
614-
.map(|fnabi| (fnabi.conv, &fnabi.args[..]))
615-
.unwrap_or((Conv::Rust, &[]));
622+
let (conv, args) = calling_convention_for_symbol(tcx, symbol);
616623

617624
// Decorate symbols with prefixes, suffixes and total number of bytes of arguments.
618625
// Reference: https://docs.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170
@@ -644,6 +651,27 @@ pub(crate) fn exporting_symbol_name_for_instance_in_crate<'tcx>(
644651
maybe_emutls_symbol_name(tcx, symbol, &undecorated).unwrap_or(undecorated)
645652
}
646653

654+
/// On amdhsa, `gpu-kernel` functions have an associated metadata object with a `.kd` suffix.
655+
/// Add it to the symbols list for all kernel functions, so that it is exported in the linked
656+
/// object.
657+
pub(crate) fn extend_exported_symbols<'tcx>(
658+
symbols: &mut Vec<String>,
659+
tcx: TyCtxt<'tcx>,
660+
symbol: ExportedSymbol<'tcx>,
661+
instantiating_crate: CrateNum,
662+
) {
663+
let (conv, _) = calling_convention_for_symbol(tcx, symbol);
664+
665+
if conv != Conv::GpuKernel || tcx.sess.target.os != "amdhsa" {
666+
return;
667+
}
668+
669+
let undecorated = symbol_name_for_instance_in_crate(tcx, symbol, instantiating_crate);
670+
671+
// Add the symbol for the kernel descriptor (with .kd suffix)
672+
symbols.push(format!("{undecorated}.kd"));
673+
}
674+
647675
fn maybe_emutls_symbol_name<'tcx>(
648676
tcx: TyCtxt<'tcx>,
649677
symbol: ExportedSymbol<'tcx>,

tests/run-make/amdgpu-kd/foo.rs

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#![allow(internal_features)]
2+
#![feature(no_core, lang_items, abi_gpu_kernel)]
3+
#![no_core]
4+
#![no_std]
5+
6+
// This is needed because of #![no_core]:
7+
#[lang = "sized"]
8+
trait Sized {}
9+
10+
#[no_mangle]
11+
extern "gpu-kernel" fn kernel() {}

tests/run-make/amdgpu-kd/rmake.rs

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// ON the amdhsa OS, the host runtime (HIP or HSA) expects a kernel descriptor object for each
2+
// kernel in the ELF file. The amdgpu LLVM backend generates the object. It is created as a symbol
3+
// with the name of the kernel plus a .kd suffix.
4+
// Check that the produced object has the .kd symbol exported.
5+
6+
//@ needs-llvm-components: amdgpu
7+
//@ needs-rust-lld
8+
9+
use run_make_support::{llvm_readobj, rustc};
10+
11+
fn main() {
12+
rustc()
13+
.crate_name("foo")
14+
.target("amdgcn-amd-amdhsa")
15+
.arg("-Ctarget-cpu=gfx900")
16+
.crate_type("cdylib")
17+
.input("foo.rs")
18+
.run();
19+
llvm_readobj().input("foo.elf").symbols().run().assert_stdout_contains("kernel.kd");
20+
}

0 commit comments

Comments
 (0)