Skip to content

Commit

Permalink
ARM DSP: Halving parallel add/sub and multiply add/sub (#535)
Browse files Browse the repository at this point in the history
* ARM DSP: Add signed halving parallel sub.

Add:

- `shsub8`: Signed halving parallel byte-wise subtraction.
- `shsub16`: Signed halving parallel halfword-wise subtraction.

* ARM DSP: Signed halving parallel additions.

- `shadd8`: Signed halving parallel byte-wise add.
- `shadd16`: Signed halving parallel halfword-wise add.

* ARM DSP: Signed Dual Multiply Add and Signed Dual Multiply Sub.

- `SMUAD`: Signed Dual Multiply Add.
- `SMUADX`: Signed Dual Multiply Add Reversed.
- `SMUSD`: Signed Dual Multiply Subtract.
- `SMUSDX`: Signed Dual Multiply Subtract Reversed.

* ARM DSP: Restrict to Cortex-A and Cortex-R

Restrict everything to Cortex-A/R till We found a better way manage
thumb* targets.

Add 'dox' to generate docs.

* ARM DSP: fix Markdown documentation

Quote '[' and ']' where are not part of the Markdown syntax.
  • Loading branch information
paoloteti authored and alexcrichton committed Jul 23, 2018
1 parent bd6254f commit 2760409
Show file tree
Hide file tree
Showing 2 changed files with 214 additions and 3 deletions.
213 changes: 212 additions & 1 deletion coresimd/arm/dsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,32 @@ extern "C" {
#[link_name = "llvm.arm.sasx"]
fn arm_sasx(a: i32, b: i32) -> i32;

#[cfg_attr(not(target_feature = "mclass"), link_name = "llvm.arm.sel")]
#[link_name = "llvm.arm.sel"]
fn arm_sel(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.shadd8"]
fn arm_shadd8(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.shadd16"]
fn arm_shadd16(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.shsub8"]
fn arm_shsub8(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.shsub16"]
fn arm_shsub16(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.smuad"]
fn arm_smuad(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.smuadx"]
fn arm_smuadx(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.smusd"]
fn arm_smusd(a: i32, b: i32) -> i32;

#[link_name = "llvm.arm.smusdx"]
fn arm_smusdx(a: i32, b: i32) -> i32;
}

/// Signed saturating addition
Expand Down Expand Up @@ -201,6 +225,109 @@ pub unsafe fn sel(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_sel, a, b)
}

/// Signed halving parallel byte-wise addition.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
/// res\[2\] = (a\[2\] + b\[2\]) / 2
/// res\[3\] = (a\[3\] + b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd8))]
pub unsafe fn shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shadd8, a, b)
}

/// Signed halving parallel halfword-wise addition.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] + b\[0\]) / 2
/// res\[1\] = (a\[1\] + b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shadd16))]
pub unsafe fn shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shadd16, a, b)
}

/// Signed halving parallel byte-wise subtraction.
///
/// Returns the 8-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
/// res\[2\] = (a\[2\] - b\[2\]) / 2
/// res\[3\] = (a\[3\] - b\[3\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub8))]
pub unsafe fn shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t {
dsp_call!(arm_shsub8, a, b)
}

/// Signed halving parallel halfword-wise subtraction.
///
/// Returns the 16-bit signed equivalent of
///
/// res\[0\] = (a\[0\] - b\[0\]) / 2
/// res\[1\] = (a\[1\] - b\[1\]) / 2
#[inline]
#[cfg_attr(test, assert_instr(shsub16))]
pub unsafe fn shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t {
dsp_call!(arm_shsub16, a, b)
}

/// Signed Dual Multiply Add.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] + a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[cfg_attr(test, assert_instr(smuad))]
pub unsafe fn smuad(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuad(::mem::transmute(a), ::mem::transmute(b))
}

/// Signed Dual Multiply Add Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] + a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smuadx))]
pub unsafe fn smuadx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smuadx(::mem::transmute(a), ::mem::transmute(b))
}

/// Signed Dual Multiply Subtract.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[0\] - a\[1\] * b\[1\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusd))]
pub unsafe fn smusd(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusd(::mem::transmute(a), ::mem::transmute(b))
}

/// Signed Dual Multiply Subtract Reversed.
///
/// Returns the equivalent of
///
/// res = a\[0\] * b\[1\] - a\[1\] * b\[0\]
///
/// and sets the Q flag if overflow occurs on the addition.
#[inline]
#[cfg_attr(test, assert_instr(smusdx))]
pub unsafe fn smusdx(a: int16x2_t, b: int16x2_t) -> i32 {
arm_smusdx(::mem::transmute(a), ::mem::transmute(b))
}

#[cfg(test)]
mod tests {
use coresimd::arm::*;
Expand Down Expand Up @@ -337,4 +464,88 @@ mod tests {
assert_eq!(r, c);
}
}

#[test]
fn shadd8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(3, 3, 3, 3);
let r: i8x4 = dsp_call!(dsp::shadd8, a, b);
assert_eq!(r, c);
}
}

#[test]
fn shadd16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(3, 3);
let r: i16x2 = dsp_call!(dsp::shadd16, a, b);
assert_eq!(r, c);
}
}

#[test]
fn shsub8() {
unsafe {
let a = i8x4::new(1, 2, 3, 4);
let b = i8x4::new(5, 4, 3, 2);
let c = i8x4::new(-2, -1, 0, 1);
let r: i8x4 = dsp_call!(dsp::shsub8, a, b);
assert_eq!(r, c);
}
}

#[test]
fn shsub16() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let c = i16x2::new(-2, -1);
let r: i16x2 = dsp_call!(dsp::shsub16, a, b);
assert_eq!(r, c);
}
}

#[test]
fn smuad() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = dsp::smuad(::mem::transmute(a), ::mem::transmute(b));
assert_eq!(r, 13);
}
}

#[test]
fn smuadx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = dsp::smuadx(::mem::transmute(a), ::mem::transmute(b));
assert_eq!(r, 14);
}
}

#[test]
fn smusd() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = dsp::smusd(::mem::transmute(a), ::mem::transmute(b));
assert_eq!(r, -3);
}
}

#[test]
fn smusdx() {
unsafe {
let a = i16x2::new(1, 2);
let b = i16x2::new(5, 4);
let r = dsp::smusdx(::mem::transmute(a), ::mem::transmute(b));
assert_eq!(r, -6);
}
}
}
4 changes: 2 additions & 2 deletions coresimd/arm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ mod v7;
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
pub use self::v7::*;

#[cfg(all(target_arch = "arm", target_feature = "v7"))]
#[cfg(all(target_feature = "v7", not(target_feature = "mclass")))]
mod dsp;
#[cfg(all(target_arch = "arm", target_feature = "v7"))]
#[cfg(all(target_feature = "v7", not(target_feature = "mclass")))]
pub use self::dsp::*;

// NEON is supported on AArch64, and on ARM when built with the v7 and neon
Expand Down

0 comments on commit 2760409

Please # to comment.