diff --git a/Project.toml b/Project.toml index 26c8db88..e7382d34 100644 --- a/Project.toml +++ b/Project.toml @@ -18,7 +18,6 @@ MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" MappedArrays = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" NamedTupleTools = "d9ec5142-1e00-5aa0-9d6a-321866360f50" NestedTuples = "a734d2a7-8d68-409b-9419-626914d4061d" -PositiveFactorizations = "85a6dd25-e78a-55b7-8502-1745935b8125" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" @@ -39,7 +38,6 @@ MacroTools = "0.5" MappedArrays = "0.3, 0.4" NamedTupleTools = "0.13" NestedTuples = "0.3" -PositiveFactorizations = "0.2" SpecialFunctions = "0.10, 1" StatsFuns = "0.9" TransformVariables = "0.4" diff --git a/src/MeasureTheory.jl b/src/MeasureTheory.jl index 7b9a7a02..9546a780 100644 --- a/src/MeasureTheory.jl +++ b/src/MeasureTheory.jl @@ -91,11 +91,11 @@ include("parameterized/bernoulli.jl") include("parameterized/poisson.jl") include("parameterized/binomial.jl") include("parameterized/multinomial.jl") -include("parameterized/lkj-cholesky.jl") +# include("parameterized/lkj-cholesky.jl") include("parameterized/negativebinomial.jl") -include("transforms/corrcholesky.jl") -include("transforms/ordered.jl") +# include("transforms/corrcholesky.jl") +# include("transforms/ordered.jl") include("density.jl") # include("pushforward.jl") diff --git a/src/parameterized/lkj-cholesky.jl b/src/parameterized/lkj-cholesky.jl deleted file mode 100644 index 2ae2ee1e..00000000 --- a/src/parameterized/lkj-cholesky.jl +++ /dev/null @@ -1,109 +0,0 @@ -# Modified from -# https://github.com/tpapp/AltDistributions.jl - -# See also the Stan manual (the "Stanual", though nobody calls it that) -# https://mc-stan.org/docs/2_27/reference-manual/cholesky-factors-of-correlation-matrices-1.html - -export LKJCholesky -using PositiveFactorizations - - -const CorrCholeskyUpper = TV.CorrCholeskyFactor - -@doc """ - LKJCholesky(k=3, η=1.0) - LKJCholesky(k=3, logη=0.0) - -`LKJCholesky(k, ...)` gives the `k×k` LKJ distribution (Lewandowski et al 2009) -expressed as a Cholesky decomposition. As a special case, for -`C = rand(LKJCholesky(k=K, η=1.0))` (or equivalently -`C=rand(LKJCholesky{k}(k=K, logη=0.0))`), `C.L * C.U` is uniform over the set of -all `K×K` correlation matrices. Note, however, that in this case `C.L` and `C.U` -are **not** sampled uniformly (because the multiplication is nonlinear). - -The `logdensity` method for this measure applies for `LowerTriangular`, -`UpperTriangular`, or `Diagonal` matrices, and will "do the right thing". The -`logdensity` **does not check if `L*U` yields a valid correlation matrix**. - -Valid values are ``η > 0``. When ``η > 1``, the distribution is unimodal with a -peak at `I`, while ``0 < η < 1`` yields a trough. ``η = 2`` is recommended as a vague prior. - -Adapted from -https://github.com/tpapp/AltDistributions.jl -""" LKJCholesky - -@parameterized LKJCholesky(k,η) - - -@kwstruct LKJCholesky(k,η) -@kwstruct LKJCholesky(k,logη) - -LKJCholesky(k::Integer) = LKJCholesky(k, 1.0) - -asparams(::Type{<:LKJCholesky}, ::Val{:η}) = asℝ₊ -asparams(::Type{<:LKJCholesky}, ::Val{:logη}) = asℝ - -# Modified from -# https://github.com/tpapp/AltDistributions.jl - - -using LinearAlgebra - -logdensity(d::LKJCholesky, C::Cholesky) = logdensity(d, C.UL) - -function logdensity(d::LKJCholesky{(:k, :η,)}, L::Union{LinearAlgebra.AbstractTriangular, Diagonal}) - η = d.η - # z = diag(L) - # sum(log.(z) .* ((k:-1:1) .+ 2*(η-1))) - - # Note: https://github.com/cscherrer/MeasureTheory.jl/issues/100#issuecomment-852428192 - c = d.k + 2(η - 1) - n = size(L,1) - s = sum(1:n) do i - (c - i) * @inbounds log(L[i,i]) - end - return s -end - -function logdensity(d::LKJCholesky{(:k, :logη)}, L::Union{LinearAlgebra.AbstractTriangular, Diagonal}) - c = d.k + 2 * expm1(d.logη) - n = size(L,1) - s = sum(1:n) do i - (c - i) * @inbounds log(L[i,i]) - end - return s -end - - -TV.as(d::LKJCholesky) = CorrCholesky(d.k) - -function basemeasure(μ::LKJCholesky{(:k,:η)}) - t = as(μ) - f(par) = Dists.lkj_logc0(par.k, par.η) - base = Pushforward(t, Lebesgue(ℝ)^dimension(t), false) - ParamWeightedMeasure(f, (k= μ.k, η= μ.η), base) -end - -function basemeasure(μ::LKJCholesky{(:k,:logη)}) - t = as(μ) - η = exp(μ.logη) - f(par) = Dists.lkj_logc0(par.k, exp(par.logη)) - base = Pushforward(t, Lebesgue(ℝ)^dimension(t), false) - ParamWeightedMeasure(f, (k= μ.k, logη= logη), base) -end - -# Note (from @sethaxen): this can be done without the cholesky decomposition, by randomly -# sampling the canonical partial correlations, as in the LKJ paper, and then -# mapping them to the cholesky factor instead of the correlation matrix. Stan -# implements* this. But that can be for a future PR. -# -# * https://github.com/stan-dev/math/blob/develop/stan/math/prim/prob/lkj_corr_cholesky_rng.hpp -function Base.rand(rng::AbstractRNG, ::Type, d::LKJCholesky{(:k, :η,)}) - return cholesky(Positive, rand(rng, Dists.LKJ(d.k, d.η))) -end; - -function Base.rand(rng::AbstractRNG, ::Type, d::LKJCholesky{(:k, :logη)}) - return cholesky(Positive, rand(rng, Dists.LKJ(d.k, exp(d.logη)))) -end; - -ConstructionBase.constructorof(::Type{L}) where {L<:LKJCholesky} = LKJCholesky diff --git a/src/parameterized/mvnormal.jl b/src/parameterized/mvnormal.jl index 93672aed..a8bd95d4 100644 --- a/src/parameterized/mvnormal.jl +++ b/src/parameterized/mvnormal.jl @@ -8,55 +8,18 @@ using Random import Base -struct MvNormal{N, T, I, J} <: ParameterizedMeasure{N} - par::NamedTuple{N, T} -end +@parameterized MvNormal(μ, Σ) +@kwstruct MvNormal(Σ) -function MvNormal(nt::NamedTuple{N,T}) where {N,T} - I,J = mvnormaldims(nt) +@kwstruct MvNormal(μ, Σ) - cache = Vector{Float64}(undef, max(I,J)) - MvNormal{N,T,I,J}(cache, nt) -end +@kwstruct MvNormal(σ) -function Base.size(d::MvNormal{N, T, I, J}) where {N,T,I,J} - return (I,) -end +@kwstruct MvNormal(μ, σ) -mvnormaldims(nt::NamedTuple{(:A, :b)}) = size(nt.A) +@kwstruct MvNormal(μ, Ω) -function MeasureTheory.basemeasure(μ::MvNormal{N, T, I,I}) where {N, T, I} - return (1 / sqrt2π)^I * Lebesgue(ℝ)^I -end +@kwstruct MvNormal(Ω) -sampletype(d::MvNormal{N, T, I, J}) where {N,T,I,J} = Vector{Float64} - -MvNormal(; kwargs...) = begin - MvNormal((; kwargs...)) -end - - - -function Random.rand!(rng::AbstractRNG, d::MvNormal{(:A, :b),T,I,J}, x::AbstractArray) where {T,I,J} - z = getcache(d, J) - rand!(rng, Normal()^J, z) - copyto!(x, d.b) - mul!(x, d.A, z, 1.0, 1.0) - return x -end - -function logdensity(d::MvNormal{(:A,:b)}, x) - cache = getcache(d, size(d)) - z = d.A \ (x - d.b) - return logdensity(MvNormal(), z) - logabsdet(d.A) -end - -≪(::MvNormal, ::Lebesgue{ℝ}) = true - -# function logdensity(d::MvNormal{(:Σ⁻¹,)}, x) -# @tullio ℓ = -0.5 * x[i] * d.Σ⁻¹[i,j] * x[j] -# return ℓ -# end - -mvnormaldims(nt::NamedTuple{(:Σ⁻¹,)}) = size(nt.Σ⁻¹) +@kwstruct MvNormal(ω) diff --git a/src/transforms/corrcholesky.jl b/src/transforms/corrcholesky.jl deleted file mode 100644 index 8f66bc0c..00000000 --- a/src/transforms/corrcholesky.jl +++ /dev/null @@ -1,46 +0,0 @@ -# Adapted from https://github.com/tpapp/TransformVariables.jl/blob/master/src/special_arrays.jl - -export CorrCholesky - - -""" - CorrCholesky(n) -Cholesky factor of a correlation matrix of size `n`. -Transforms ``n(n-1)/2`` real numbers to an ``n×n`` lower-triangular matrix `L`, such that -`L*L'` is a correlation matrix (positive definite, with unit diagonal). - -# Notes -If -- `z` is a vector of `n` IID standard normal variates, -- `σ` is an `n`-element vector of standard deviations, -- `C` is obtained from `CorrCholesky(n)`, -then `Diagonal(σ) * C.L * z` is a zero-centered multivariate normal variate with the standard deviations `σ` and -correlation matrix `C.L * C.U`. -""" -struct CorrCholesky <: TV.VectorTransform - n::Int -end - -TV.dimension(t::CorrCholesky) = TV.dimension(CorrCholeskyUpper(t.n)) - -# TODO: For now we just transpose the CorrCholeskyUpper result. We should -# consider whether it can help performance to implement this directly for the -# lower triangular case -function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholesky, x::AbstractVector, index) - U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(t.n), x, index) - return Cholesky(U, 'U', 0), ℓ, index -end - -TV.inverse_eltype(t::CorrCholesky, x::AbstractMatrix) = TV.extended_eltype(x) - -function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, L::LowerTriangular) - return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), L') -end - -function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, U::UpperTriangular) - return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), U) -end - -function TV.inverse_at!(x::AbstractVector, index, t::CorrCholesky, C::Cholesky) - return TV.inverse_at!(x, index, t, C.UL) -end diff --git a/src/transforms/corrcholeskylower.jl b/src/transforms/corrcholeskylower.jl deleted file mode 100644 index 772ee49e..00000000 --- a/src/transforms/corrcholeskylower.jl +++ /dev/null @@ -1,40 +0,0 @@ -# Adapted from https://github.com/tpapp/TransformVariables.jl/blob/master/src/special_arrays.jl - -using LinearAlgebra -const CorrCholeskyUpper = CorrCholeskyFactor - -export CorrCholeskyLower - -""" - CorrCholeskyLower(n) -Lower Cholesky factor of a correlation matrix of size `n`. -Transforms ``n(n-1)/2`` real numbers to an ``n×n`` lower-triangular matrix `L`, such that -`L*L'` is a correlation matrix (positive definite, with unit diagonal). - -# Notes -If -- `z` is a vector of `n` IID standard normal variates, -- `σ` is an `n`-element vector of standard deviations, -- `L` is obtained from `CorrCholeskyLower(n)`, -then `Diagonal(σ) * L * z` is a zero-centered multivariate normal variate with the standard deviations `σ` and -correlation matrix `L * L'`. -""" -struct CorrCholeskyLower <: TV.VectorTransform - n::Int -end - -TV.dimension(t::CorrCholeskyLower) = TV.dimension(CorrCholeskyUpper(t.n)) - -# TODO: For now we just transpose the CorrCholeskyUpper result. We should -# consider whether it can help performance to implement this directly for the -# lower triangular case -function TV.transform_with(flag::TV.LogJacFlag, t::CorrCholeskyLower, x::AbstractVector, index) - U, ℓ, index = TV.transform_with(flag, CorrCholeskyUpper(t.n), x, index) - return U', ℓ, index -end - -TV.inverse_eltype(t::CorrCholeskyLower, L::LowerTriangular) = TV.extended_eltype(L) - -function TV.inverse_at!(x::AbstractVector, index, t::CorrCholeskyLower, L::LowerTriangular) - return TV.inverse_at!(x, index, CorrCholeskyUpper(t.n), L') -end diff --git a/src/transforms/ordered.jl b/src/transforms/ordered.jl deleted file mode 100644 index 9eb44f43..00000000 --- a/src/transforms/ordered.jl +++ /dev/null @@ -1,107 +0,0 @@ -# abstract type AbstractOrderedVector{T} <: AbstractVector{T} end - -# struct OrderedVector{T} <: AbstractOrderedVector{T} -# data::Vector{T} -# end - -# as(::Type{OrderedVector}, transformation::TV.AbstractTransform, dim::Int) = -# Ordered(transformation, dim) - -using MeasureTheory - -export Ordered - -using TransformVariables -const TV = TransformVariables - -struct Ordered{T <: TV.AbstractTransform} <: TV.VectorTransform - transformation::T - dim::Int -end - -TV.dimension(t::Ordered) = t.dim - -addlogjac(ℓ, Δℓ) = ℓ + Δℓ -addlogjac(::TV.NoLogJac, Δℓ) = TV.NoLogJac() - -using MappedArrays - -bounds(t::TV.ShiftedExp{true}) = (t.shift, TV.∞) -bounds(t::TV.ShiftedExp{false}) = (-TV.∞, t.shift) -bounds(t::TV.ScaledShiftedLogistic) = (t.shift, t.scale + t.shift) -bounds(::TV.Identity) = (-TV.∞, TV.∞) - -const OrderedΔx = -8.0 - -# See https://mc-stan.org/docs/2_27/reference-manual/ordered-vector.html -function TV.transform_with(flag::TV.LogJacFlag, t::Ordered, x, index::T) where {T} - transformation, len = (t.transformation, t.dim) - @assert dimension(transformation) == 1 - y = similar(x, len) - - (lo,hi) = bounds(transformation) - - - x = mappedarray(xj -> xj + OrderedΔx, x) - - @inbounds (y[1], ℓ, _) = TV.transform_with(flag, as(Real, lo, hi), x, index) - index += 1 - - @inbounds for i in 2:len - (y[i], Δℓ, _) = TV.transform_with(flag, as(Real, y[i-1], hi), x, index) - ℓ = addlogjac(ℓ, Δℓ) - index += 1 - end - - return (y, ℓ, index) -end - -TV.inverse_eltype(t::Ordered, y::AbstractVector) = TV.extended_eltype(y) - -Ordered(n::Int) = Ordered(asℝ, n) - -function TV.inverse_at!(x::AbstractVector, index, t::Ordered, y::AbstractVector) - (lo,hi) = bounds(t.transformation) - - @inbounds x[index] = inverse(as(Real, lo, hi), y[1]) - OrderedΔx - index += 1 - - @inbounds for i in 2:length(y) - x[index] = inverse(as(Real, y[i-1], hi), y[i]) - OrderedΔx - index += 1 - end - return index -end - -export Sorted -struct Sorted{M} <: AbstractMeasure - μ::M - n::Int -end - -logdensity(s::Sorted, x) = logdensity(s.μ ^ s.n, x) - -TV.as(s::Sorted) = Ordered(as(s.μ), s.n) - -function Random.rand!(rng::AbstractRNG, d::Sorted, x::AbstractArray) - rand!(rng, d.μ ^ d.n, x) - sort!(x) - return x -end - -function Base.rand(rng::AbstractRNG, T::Type, d::Sorted) - # TODO: Use `sampletype` for this - elT = typeof(rand(rng, T, d.μ)) - x = Vector{elT}(undef, d.n) - rand!(rng, d, x) -end - -# logdensity(d, rand(d)) - - - -# TV.transform_with(TV.LogJac(), Ordered(asℝ, 4), zeros(4), 1) -# TV.transform_with(TV.LogJac(), Ordered(asℝ, 4), randn(4), 1) - -# d = Pushforward(Ordered(10), Normal()^10, false) -# logdensity(Lebesgue(ℝ)^10, rand(d)) diff --git a/src/utils.jl b/src/utils.jl index 8bcdd295..ce565a90 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -55,3 +55,4 @@ struct NonIterable end isiterable(::Type{T}) where T = static_hasmethod(iterate, Tuple{T}) ? Iterable() : NonIterable() functioninstance(::Type{F}) where {F<:Function} = F.instance + diff --git a/test/runtests.jl b/test/runtests.jl index 32ee80f8..3d44e937 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -85,20 +85,6 @@ end @test ℓ ≈ logdensity(Normal(;μ,logσ), y) end - @testset "LKJCholesky" begin - D = LKJCholesky{(:k,:η)} - par = transform(asparams(D, (k=4,)), randn(1)) - d = D(merge((k=4,),par)) - # @test params(d) == par - - η = par.η - logη = log(η) - - y = rand(d) - η = par.η - ℓ = logdensity(LKJCholesky(4,η), y) - @test ℓ ≈ logdensity(LKJCholesky(k=4,logη=logη), y) - end end @testset "Kernel" begin @@ -268,9 +254,6 @@ end @test repro(Laplace, (:μ,:σ)) end - @testset "LKJCholesky" begin - @test repro(LKJCholesky, (:k,:η,), (k=3,)) - end @testset "Multinomial" begin @test_broken repro(Multinomial, (:n,:p,))