Skip to content

Commit

Permalink
Finish removing the BigInts from * for FD{Int128}!
Browse files Browse the repository at this point in the history
Finally implements the fast-multiplication optimization from
#45, but this
time for 128-bit FixedDecimals! :)

This is a follow-up to
#93, which
introduces an Int256 type for widemul. However, the fldmod still
required 2 BigInt allocations.

Now, this PR uses a custom implementation of the LLVM div-by-const
optimization for (U)Int256, which briefly widens to Int512 (😅) to
perform the fldmod by the constant 10^f coefficient.

This brings 128-bit FD multiply to the same performance as 64-bit. :)
  • Loading branch information
NHDaly committed Jun 13, 2024
1 parent a245651 commit 4cbd64a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 14 deletions.
24 changes: 11 additions & 13 deletions src/FixedPointDecimals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ export checked_abs, checked_add, checked_cld, checked_div, checked_fld,

using Base: decompose, BitInteger

import BitIntegers # For 128-bit _widemul / _widen
using BitIntegers: BitIntegers, UInt256, Int256
import Parsers

include("fldmod-by-const.jl")

# floats that support fma and are roughly IEEE-like
const FMAFloat = Union{Float16, Float32, Float64, BigFloat}

Expand Down Expand Up @@ -129,8 +131,10 @@ _widemul(x::Unsigned,y::Signed) = signed(_widen(x)) * _widen(y)

# Custom widen implementation to avoid the cost of widening to BigInt.
# FD{Int128} operations should widen to 256 bits internally, rather than to a BigInt.
_widen(::Type{Int128}) = BitIntegers.Int256
_widen(::Type{UInt128}) = BitIntegers.UInt256
_widen(::Type{Int128}) = Int256
_widen(::Type{UInt128}) = UInt256
_widen(::Type{Int256}) = BitIntegers.Int512
_widen(::Type{UInt256}) = BitIntegers.UInt512
_widen(t::Type) = widen(t)
_widen(x::T) where {T} = (_widen(T))(x)

Expand Down Expand Up @@ -196,18 +200,12 @@ function _round_to_nearest(quotient::T,
end
_round_to_nearest(q, r, d, m=RoundNearest) = _round_to_nearest(promote(q, r, d)..., m)

# In many of our calls to fldmod, `y` is a constant (the coefficient, 10^f). However, since
# `fldmod` is sometimes not being inlined, that constant information is not available to the
# optimizer. We need an inlined version of fldmod so that the compiler can replace expensive
# divide-by-power-of-ten instructions with the cheaper multiply-by-inverse-coefficient.
@inline fldmodinline(x,y) = (fld(x,y), mod(x,y))

# multiplication rounds to nearest even representation
# TODO: can we use floating point to speed this up? after we build a
# correctness test suite.
function Base.:*(x::FD{T, f}, y::FD{T, f}) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), Val(powt))
reinterpret(FD{T, f}, _round_to_nearest(quotient, remainder, powt))
end

Expand All @@ -234,12 +232,12 @@ function Base.round(x::FD{T, f},
RoundingMode{:NearestTiesUp},
RoundingMode{:NearestTiesAway}}=RoundNearest) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(x.i, powt)
quotient, remainder = fldmod_by_const(x.i, Val(powt))
FD{T, f}(_round_to_nearest(quotient, remainder, powt, m))
end
function Base.ceil(x::FD{T, f}) where {T, f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(x.i, powt)
quotient, remainder = fldmod_by_const(x.i, Val(powt))
if remainder > 0
FD{T, f}(quotient + one(quotient))
else
Expand Down Expand Up @@ -435,7 +433,7 @@ function Base.checked_sub(x::T, y::T) where {T<:FD}
end
function Base.checked_mul(x::FD{T,f}, y::FD{T,f}) where {T<:Integer,f}
powt = coefficient(FD{T, f})
quotient, remainder = fldmodinline(_widemul(x.i, y.i), powt)
quotient, remainder = fldmod_by_const(_widemul(x.i, y.i), Val(powt))
v = _round_to_nearest(quotient, remainder, powt)
typemin(T) <= v <= typemax(T) || Base.Checked.throw_overflowerr_binaryop(:*, x, y)
return reinterpret(FD{T, f}, T(v))
Expand Down
7 changes: 6 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,9 @@ include(joinpath(pkg_path, "test", "utils.jl"))

@testset "FixedPointDecimals" begin
include("FixedDecimal.jl")
end # global testset
end

@testset "FixedPointDecimals" begin
include("fldmod-by-const_tests.jl")
end

0 comments on commit 4cbd64a

Please # to comment.