Skip to content

riscv: P extension intrinsics for packed SIMD (part 1) #1332

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 2 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion crates/core_arch/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
doc_cfg,
tbm_target_feature,
sse4a_target_feature,
riscv_target_feature,
arm_target_feature,
cmpxchg16b_target_feature,
avx512_target_feature,
Expand All @@ -31,7 +32,8 @@
allow_internal_unstable,
decl_macro,
bench_black_box,
asm_const
asm_const,
target_feature_11
)]
#![cfg_attr(test, feature(test, abi_vectorcall))]
#![deny(clippy::missing_inline_in_public_items)]
Expand Down
7 changes: 3 additions & 4 deletions crates/core_arch/src/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#[macro_use]
mod macros;

#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
mod riscv_shared;

#[cfg(any(target_arch = "arm", target_arch = "aarch64", doc))]
mod arm_shared;

Expand Down Expand Up @@ -276,10 +279,6 @@ mod aarch64;
#[doc(cfg(any(target_arch = "arm")))]
mod arm;

#[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
#[doc(cfg(any(target_arch = "riscv32", target_arch = "riscv64")))]
mod riscv_shared;

#[cfg(any(target_arch = "riscv64", doc))]
#[doc(cfg(any(target_arch = "riscv64")))]
mod riscv64;
Expand Down
83 changes: 36 additions & 47 deletions crates/core_arch/src/riscv_shared/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
//! Shared RISC-V intrinsics
mod p;

pub use p::*;

use crate::arch::asm;

Expand Down Expand Up @@ -469,6 +472,17 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) {
asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack))
}

/// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses
///
/// This instruction invalidates any address-translation cache entries that an
/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate.
///
/// This fence specifies all guest physical addresses and all virtual machines.
#[inline]
pub unsafe fn hinval_gvma_all() {
asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack))
}

/// Reads the floating-point control and status register `fcsr`
///
/// Register `fcsr` is a 32-bit read/write register that selects the dynamic rounding mode
Expand Down Expand Up @@ -574,17 +588,6 @@ pub fn fsflags(value: u32) -> u32 {
original
}

/// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses
///
/// This instruction invalidates any address-translation cache entries that an
/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate.
///
/// This fence specifies all guest physical addresses and all virtual machines.
#[inline]
pub unsafe fn hinval_gvma_all() {
asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack))
}

/// `P0` transformation function as is used in the SM3 hash algorithm
///
/// This function is included in `Zksh` extension. It's defined as:
Expand All @@ -602,12 +605,10 @@ pub unsafe fn hinval_gvma_all() {
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
/// this instruction must always be independent from the data it operates on.
#[inline]
#[target_feature(enable = "zksh")]
pub fn sm3p0(x: u32) -> u32 {
let ans: u32;
unsafe {
// asm!("sm3p0 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack))
asm!(".insn i 0x13, 0x1, {}, {}, 0x108", out(reg) ans, in(reg) x, options(nomem, nostack))
};
unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
ans
}

Expand All @@ -634,12 +635,10 @@ pub fn sm3p0(x: u32) -> u32 {
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
/// this instruction must always be independent from the data it operates on.
#[inline]
#[target_feature(enable = "zksh")]
pub fn sm3p1(x: u32) -> u32 {
let ans: u32;
unsafe {
// asm!("sm3p1 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack))
asm!(".insn i 0x13, 0x1, {}, {}, 0x109", out(reg) ans, in(reg) x, options(nomem, nostack))
};
unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) };
ans
}

Expand Down Expand Up @@ -674,33 +673,28 @@ pub fn sm3p1(x: u32) -> u32 {
/// It can be implemented by `sm4ed` instruction like:
///
/// ```no_run
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 {
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed;
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed;
/// let a = x1 ^ x2 ^ x3 ^ rk;
/// let c0 = sm4ed::<0>(x0, a);
/// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc.
/// let c2 = sm4ed::<2>(c1, a);
/// let c3 = sm4ed::<3>(c2, a);
/// return c3; // c3 represents c[0..=3]
/// # }
/// ```
///
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
/// this instruction must always be independent from the data it operates on.
#[inline]
#[target_feature(enable = "zksed")]
pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
static_assert!(BS: u8 where BS <= 3);
let ans: u32;
match BS {
0 => unsafe {
asm!(".insn r 0x33, 0, 0x18, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
},
1 => unsafe {
asm!(".insn r 0x33, 0, 0x38, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
},
2 => unsafe {
asm!(".insn r 0x33, 0, 0x58, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
},
3 => unsafe {
asm!(".insn r 0x33, 0, 0x78, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack))
},
_ => unreachable!(),
unsafe {
asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack))
};
ans
}
Expand Down Expand Up @@ -739,33 +733,28 @@ pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 {
/// Hence, the key schedule operation can be implemented by `sm4ks` instruction like:
///
/// ```no_run
/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 {
/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks;
/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks;
/// let k = k1 ^ k2 ^ k3 ^ ck_i;
/// let c0 = sm4ks::<0>(k0, k);
/// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc.
/// let c2 = sm4ks::<2>(c1, k);
/// let c3 = sm4ks::<3>(c2, k);
/// return c3; // c3 represents c[0..=3]
/// # }
/// ```
///
/// According to RISC-V Cryptography Extensions, Volume I, the execution latency of
/// this instruction must always be independent from the data it operates on.
#[inline]
#[target_feature(enable = "zksed")]
pub fn sm4ks<const BS: u8>(x: u32, k: u32) -> u32 {
static_assert!(BS: u8 where BS <= 3);
let ans: u32;
match BS {
0 => unsafe {
asm!(".insn r 0x33, 0, 0x1A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
},
1 => unsafe {
asm!(".insn r 0x33, 0, 0x3A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
},
2 => unsafe {
asm!(".insn r 0x33, 0, 0x5A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
},
3 => unsafe {
asm!(".insn r 0x33, 0, 0x7A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack))
},
_ => unreachable!(),
unsafe {
asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack))
};
ans
}
Loading