Skip to content

LIBM - sin, cos, ln, exp and friends. #126

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
177 changes: 177 additions & 0 deletions crates/std_float/benches/bench_libm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#![feature(test)]
#![feature(portable_simd)]
#![feature(concat_idents)]

extern crate test;
use std_float::StdLibm;

use test::{black_box, Bencher};

use core_simd::{f32x16, f32x4, f64x4, f64x8};

const N: usize = 1024;

fn init_f32x4() -> Vec<f32x4> {
vec![f32x4::splat(black_box(0.5)); N / 4]
}

fn init_f32x16() -> Vec<f32x16> {
vec![f32x16::splat(black_box(0.5)); N / 16]
}

fn init_f32() -> Vec<f32> {
vec![black_box(0.5); N]
}

fn init_f64x4() -> Vec<f64x4> {
vec![f64x4::splat(black_box(0.5)); N / 4]
}

fn init_f64x8() -> Vec<f64x8> {
vec![f64x8::splat(black_box(0.5)); N / 8]
}

fn init_f64() -> Vec<f64> {
vec![black_box(1.0); N]
}

// These fuctions are not inlined to make it easier to check the asm.
//
// Build with:
//
// RUSTFLAGS="-C target-cpu=native --emit asm" cargo bench

macro_rules! benchmark_libm {
(
functions ($(
$names : ident,
$functions : expr,
$init : expr
)*)
) => {

$(
#[bench]
#[inline(never)]
fn $names(b: &mut Bencher) {
let x = $init;
let mut y = $init;
b.iter(|| {
for (x, y) in x.iter().zip(y.iter_mut()) {
*y = ($functions)(*x);
}
})
}
)*
}
}

benchmark_libm! {
functions (
sin_f32x4, |x : f32x4| x.sin(), init_f32x4()
sin_f32x16, |x : f32x16| x.sin(), init_f32x16()
sin_f32, |x : f32| x.sin(), init_f32()
sin_f64x4, |x : f64x4| x.sin(), init_f64x4()
sin_f64x8, |x : f64x8| x.sin(), init_f64x8()
sin_f64, |x : f64| x.sin(), init_f64()
)
}

benchmark_libm! {
functions (
cos_f32x4, |x : f32x4| x.cos(), init_f32x4()
cos_f32x16, |x : f32x16| x.cos(), init_f32x16()
cos_f32, |x : f32| x.cos(), init_f32()
cos_f64x4, |x : f64x4| x.cos(), init_f64x4()
cos_f64x8, |x : f64x8| x.cos(), init_f64x8()
cos_f64, |x : f64| x.cos(), init_f64()
)
}

benchmark_libm! {
functions (
tan_f32x4, |x : f32x4| x.tan(), init_f32x4()
tan_f32x16, |x : f32x16| x.tan(), init_f32x16()
tan_f32, |x : f32| x.tan(), init_f32()
tan_f64x4, |x : f64x4| x.tan(), init_f64x4()
tan_f64x8, |x : f64x8| x.tan(), init_f64x8()
tan_f64, |x : f64| x.tan(), init_f64()
)
}

benchmark_libm! {
functions (
asin_f32x4, |x : f32x4| x.asin(), init_f32x4()
asin_f32x16, |x : f32x16| x.asin(), init_f32x16()
asin_f32, |x : f32| x.asin(), init_f32()
asin_f64x4, |x : f64x4| x.asin(), init_f64x4()
asin_f64x8, |x : f64x8| x.asin(), init_f64x8()
asin_f64, |x : f64| x.asin(), init_f64()
)
}

benchmark_libm! {
functions (
acos_f32x4, |x : f32x4| x.acos(), init_f32x4()
acos_f32x16, |x : f32x16| x.acos(), init_f32x16()
acos_f32, |x : f32| x.acos(), init_f32()
acos_f64x4, |x : f64x4| x.acos(), init_f64x4()
acos_f64x8, |x : f64x8| x.acos(), init_f64x8()
acos_f64, |x : f64| x.acos(), init_f64()
)
}

benchmark_libm! {
functions (
atan_f32x4, |x : f32x4| x.atan(), init_f32x4()
atan_f32x16, |x : f32x16| x.atan(), init_f32x16()
atan_f32, |x : f32| x.atan(), init_f32()
atan_f64x4, |x : f64x4| x.atan(), init_f64x4()
atan_f64x8, |x : f64x8| x.atan(), init_f64x8()
atan_f64, |x : f64| x.atan(), init_f64()
)
}

benchmark_libm! {
functions (
exp2_f32x4, |x : f32x4| x.exp2(), init_f32x4()
exp2_f32x16, |x : f32x16| x.exp2(), init_f32x16()
exp2_f32, |x : f32| x.exp2(), init_f32()
exp2_f64x4, |x : f64x4| x.exp2(), init_f64x4()
exp2_f64x8, |x : f64x8| x.exp2(), init_f64x8()
exp2_f64, |x : f64| x.exp2(), init_f64()
)
}

benchmark_libm! {
functions (
exp_f32x4, |x : f32x4| x.exp(), init_f32x4()
exp_f32x16, |x : f32x16| x.exp(), init_f32x16()
exp_f32, |x : f32| x.exp(), init_f32()
exp_f64x4, |x : f64x4| x.exp(), init_f64x4()
exp_f64x8, |x : f64x8| x.exp(), init_f64x8()
exp_f64, |x : f64| x.exp(), init_f64()
)
}

benchmark_libm! {
functions (
log2_f32x4, |x : f32x4| x.log2(), init_f32x4()
log2_f32x16, |x : f32x16| x.log2(), init_f32x16()
log2_f32, |x : f32| x.log2(), init_f32()
log2_f64x4, |x : f64x4| x.log2(), init_f64x4()
log2_f64x8, |x : f64x8| x.log2(), init_f64x8()
log2_f64, |x : f64| x.log2(), init_f64()
)
}

benchmark_libm! {
functions (
ln_f32x4, |x : f32x4| x.ln(), init_f32x4()
ln_f32x16, |x : f32x16| x.ln(), init_f32x16()
ln_f32, |x : f32| x.ln(), init_f32()
ln_f64x4, |x : f64x4| x.ln(), init_f64x4()
ln_f64x8, |x : f64x8| x.ln(), init_f64x8()
ln_f64, |x : f64| x.ln(), init_f64()
)
}
103 changes: 103 additions & 0 deletions crates/std_float/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ use core_simd::simd;

use simd::{LaneCount, Simd, SupportedLaneCount};

mod libm32;
mod libm64;

#[cfg(test)]
mod test_libm;

#[cfg(feature = "as_crate")]
mod experimental {
pub trait Sealed {}
Expand Down Expand Up @@ -117,6 +123,102 @@ pub trait StdFloat: Sealed + Sized {
fn fract(self) -> Self;
}

pub trait StdLibm: StdFloat {
/// Signed integer type with the same number of bits as this floating point type.
type IntType;

/// Unsigned integer type with the same number of bits as this floating point type.
type UintType;

/// Computes the sine of a number (in radians).
fn sin(self) -> Self;

/// Computes the cosine of a number (in radians).
fn cos(self) -> Self;

/// Computes the tangent of a number (in radians).
fn tan(self) -> Self;

/// Computes the arcsine of a number. Return value is in radians in
/// the range [-pi/2, pi/2] or NaN if the number is outside the range
/// [-1, 1].
fn asin(self) -> Self;

/// Computes the arccosine of a number. Return value is in radians in
/// the range [0, pi] or NaN if the number is outside the range
/// [-1, 1].
fn acos(self) -> Self;

/// Computes the arctangent of a number. Return value is in radians in the
/// range [-pi/2, pi/2];
fn atan(self) -> Self;

/// Computes the four quadrant arctangent of `self` (`y`) and `other` (`x`) in radians.
///
/// * `x = 0`, `y = 0`: `0`
/// * `x >= 0`: `arctan(y/x)` -> `[-pi/2, pi/2]`
/// * `y >= 0`: `arctan(y/x) + pi` -> `(pi/2, pi]`
/// * `y < 0`: `arctan(y/x) - pi` -> `(-pi, -pi/2)`
fn atan2(self, x: Self) -> Self;

/// Returns `2^(self)`.
fn exp2(self) -> Self;

/// Returns `e^(self)`, (the exponential function).
fn exp(self) -> Self;

/// Returns `e^(self) - 1` in a way that is accurate even if the
/// number is close to zero.
fn exp_m1(self) -> Self;

/// Returns the base 2 logarithm of the number.
fn log2(self) -> Self;

/// Returns `ln(1+n)` (natural logarithm) more accurately than if
/// the operations were performed separately.
fn ln_1p(self) -> Self;

/// Returns the natural logarithm of the number.
fn ln(self) -> Self;

/// Returns the base 10 logarithm of the number.
fn log10(self) -> Self;

/// Returns the logarithm of the number with respect to an arbitrary base.
fn log(self, base: Self) -> Self;

/// Raises a number to a floating point power.
fn powf(self, y: Self) -> Self;

/// Raises a number to an integer power.
fn powi(self, y: Self::IntType) -> Self;

/// Hyperbolic sine function.
fn sinh(self) -> Self;

/// Hyperbolic cosine function.
fn cosh(self) -> Self;

/// Hyperbolic tangent function.
fn tanh(self) -> Self;

/// Inverse hyperbolic sine function.
fn asinh(self) -> Self;

/// Inverse hyperbolic cosine function.
fn acosh(self) -> Self;

/// Inverse hyperbolic tangent function.
fn atanh(self) -> Self;

/// Returns the cube root of a number.
fn cbrt(self) -> Self;

/// Calculates the length of the hypotenuse of a right-angle triangle given
/// legs of length `x` and `y`.
fn hypot(self, other: Self) -> Self;
}

impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}

Expand Down Expand Up @@ -161,5 +263,6 @@ mod tests {
let _xfma = x.mul_add(x, x);
let _xsqrt = x.sqrt();
let _ = x2.abs() * x2;
let _ = x.sin();
}
}
Loading