From d28db09db2973ac33f8b9f6f80723ef839a0098f Mon Sep 17 00:00:00 2001 From: Art Wild Date: Thu, 20 Jan 2022 15:51:15 -0500 Subject: [PATCH] Added docstrings, and updated `ICAGDeriv` as in #122 --- src/ica.jl | 205 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 142 insertions(+), 63 deletions(-) diff --git a/src/ica.jl b/src/ica.jl index eb702b5..e64ffc9 100644 --- a/src/ica.jl +++ b/src/ica.jl @@ -1,65 +1,122 @@ # Independent Component Analysis -#### FastICA type +""" +This type contains ICA model parameters: mean and component matrix ``W``. -struct ICA{T<:Real} +**Note:** Each column of the component matrix ``W`` corresponds to an independent component. +""" +struct ICA{T<:Real} <: LinearDimensionalityReduction mean::Vector{T} # mean vector, of length m (or empty to indicate zero mean) W::Matrix{T} # component coefficient matrix, of size (m, k) end +function show(io::IO, M::ICA) + indim, outdim = size(M) + print("ICA(indim=$indim, outdim=$outdim)") +end + +""" + size(M::ICA) + +Returns a tuple with the input dimension, *i.e* the number of observed mixtures, and +the output dimension, *i.e* the number of independent components. +""" +size(M::ICA) = size(M.W) + +""" + mean(M::ICA) -indim(M::ICA) = size(M.W, 1) -outdim(M::ICA) = size(M.W, 2) -mean(M::ICA) = fullmean(indim(M), M.mean) +Returns the mean vector. +""" +mean(M::ICA) = fullmean(size(M,1), M.mean) -transform(M::ICA, x::AbstractVecOrMat{<:Real}) = transpose(M.W) * centralize(x, M.mean) +""" + predict(M::ICA, x) + +Transform `x` to the output space to extract independent components, +as ``\\mathbf{W}^T (\\mathbf{x} - \\boldsymbol{\\mu})`, given the model `M`. +""" +predict(M::ICA, x::AbstractVecOrMat{<:Real}) = transpose(M.W) * centralize(x, M.mean) #### core algorithm -# the abstract type for all g functions: -# -# Let f be an instance of such type, then -# -# evaluate(f, x) --> (v, d) -# -# It returns a function value v, and derivative d -# -abstract type ICAGDeriv{T<:Real} end - -struct Tanh{T} <: ICAGDeriv{T} +""" +The abstract type for all `g` (derivative) functions. + +Let `g` be an instance of such type, then `update!(g, U, E)` given + +- `U = w'x` + +returns updated in-place `U` and `E`, s.t. + +- `g(w'x) --> U` and `E{g'(w'x)} --> E` + +""" +abstract type ICAGDeriv end + +""" +Derivative for ``(1/a_1)\\log\\cosh a_1 u`` +""" +struct Tanh{T} <: ICAGDeriv a::T end +function update!(f::Tanh{T}, U::AbstractMatrix{T}, E::AbstractVector{T}) where {T} + n,k = size(U) + a = f.a + @inbounds for j in 1:k + _s = zero(T) + @fastmath for i in 1:n + t = tanh(a * U[i,j]) + U[i,j] = t + _s += a * (1 - t^2) + end + E[j] = _s / n + end +end -evaluate(f::Tanh{T}, x::T) where T<:Real = (a = f.a; t = tanh(a * x); (t, a * (1 - t * t))) +""" +Derivative for ``-e^{-u^2/2)`` +""" +struct Gaus <: ICAGDeriv end +function update!(f::Gaus, U::AbstractMatrix{T}, E::AbstractVector{T}) where {T} + n,k = size(U) + @inbounds for j in 1:k + _s = zero(T) + for i in 1:n + u = U[i,j] + u2 = u^2 + e = exp(-u2/2) + U[i,j] = u * e + _s += (1 - u2) * e + end + E[j] = _s / n + end +end -struct Gaus{T} <: ICAGDeriv{T} end -evaluate(f::Gaus{T}, x::T) where T<:Real = (x2 = x * x; e = exp(-x2/2); (x * e, (1 - x2) * e)) -## a function to get a g-fun +# Fast ICA -icagfun(fname::Symbol, ::Type{T} = Float64) where T<:Real= - fname == :tanh ? Tanh{T}(1.0) : - fname == :gaus ? Gaus{T}() : - error("Unknown gfun $(fname)") +""" + fastica!(W, X, fun, maxiter, tol, verbose) -icagfun(fname::Symbol, a::T) where T<:Real = - fname == :tanh ? Tanh(a) : - fname == :gaus ? error("The gfun $(fname) has no parameters") : - error("Unknown gfun $(fname)") +Invoke the Fast ICA algorithm[^1]. -# Fast ICA -# -# Reference: -# -# Aapo Hyvarinen and Erkki Oja -# Independent Component Analysis: Algorithms and Applications. -# Neural Network 13(4-5), 2000. -# -function fastica!(W::DenseMatrix{T}, # initialized component matrix, size (m, k) - X::DenseMatrix{T}, # (whitened) observation sample matrix, size(m, n) - fun::ICAGDeriv{T}, # approximate neg-entropy functor - maxiter::Int, # maximum number of iterations - tol::Real) where T<:Real# convergence tolerance +**Parameters:** +- `W`: The initial un-mixing matrix, of size ``(m, k)``. The function updates this matrix inplace. +- `X`: The data matrix, of size ``(m, n)``. This matrix is input only, and won't be modified. +- `fun`: The approximate neg-entropy functor of type [`ICAGDeriv`](@ref). +- `maxiter`: Maximum number of iterations. +- `tol`: Tolerable change of `W` at convergence. + +Returns the updated `W`. + +**Note:** The number of components is inferred from `W` as `size(W, 2)`. +""" +function fastica!(W::DenseMatrix{T}, # initialized component matrix, size (m, k) + X::DenseMatrix{T}, # (whitened) observation sample matrix, size(m, n) + fun::ICAGDeriv, # approximate neg-entropy functor + maxiter::Int, # maximum number of iterations + tol::Real) where {T<:Real} # convergence tolerance # argument checking m = size(W, 1) @@ -71,7 +128,7 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, size ( @debug "FastICA Algorithm" m=m n=n k=k # pre-allocated storage - Wp = Matrix{T}(undef, m, k) # to store previous version of W + Wp = similar(W) # to store previous version of W U = Matrix{T}(undef, n, k) # to store w'x & g(w'x) Y = Matrix{T}(undef, m, k) # to store E{x g(w'x)} for components E1 = Vector{T}(undef, k) # store E{g'(w'x)} for components @@ -83,7 +140,7 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, size ( end # main loop - chg = NaN + chg = T(NaN) t = 0 converged = false while !converged && t < maxiter @@ -94,34 +151,24 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, size ( mul!(U, transpose(X), W) # u <- w'x # compute g(w'x) --> U and E{g'(w'x)} --> E1 - _s = 0.0 - for j = 1:k - for i = 1:n - u, v = evaluate(fun, U[i,j]) - U[i,j] = u - _s += v - end - E1[j] = _s / n - end + update!(fun, U, E1) # compute E{x g(w'x)} --> Y - rmul!(mul!(Y, X, U), 1.0 / n) + rmul!(mul!(Y, X, U), 1 / n) - # update w: E{x g(w'x)} - E{g'(w'x)} w := y - e1 * w + # update w: E{x g(w'x)} - E{g'(w'x)}w, i.e. w := y - e1 * w for j = 1:k w = view(W,:,j) y = view(Y,:,j) e1 = E1[j] - for i = 1:m - w[i] = y[i] - e1 * w[i] - end + @. w = y - e1 * w end # symmetric decorrelation: W <- W * (W'W)^{-1/2} copyto!(W, W * _invsqrtm!(W'W)) # compare with Wp to evaluate a conversion change - chg = maximum(abs.(abs.(diag(W*Wp')) .- 1.0)) + chg = maximum(abs.(abs.(diag(W*Wp')) .- 1)) converged = (chg < tol) @debug "Iteration $t" change=chg tolerance=tol @@ -131,16 +178,47 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, size ( end #### interface function - -function fit(::Type{ICA}, X::AbstractMatrix{T}, # sample matrix, size (m, n) +""" + fit(ICA, X, k; ...) + +Perform ICA over the data set given in `X`. + +**Parameters:** +-`X`: The data matrix, of size ``(m, n)``. Each row corresponds to a mixed signal, +while each column corresponds to an observation (*e.g* all signal value at a particular time step). +-`k`: The number of independent components to recover. + +**Keyword Arguments:** +- `alg`: The choice of algorithm (*default* `:fastica`) +- `fun`: The approx neg-entropy functor (*default* [`Tanh`](@ref)) +- `do_whiten`: Whether to perform pre-whitening (*default* `true`) +- `maxiter`: Maximum number of iterations (*default* `100`) +- `tol`: Tolerable change of ``W`` at convergence (*default* `1.0e-6`) +- `mean`: The mean vector, which can be either of: + - `0`: the input data has already been centralized + - `nothing`: this function will compute the mean (*default*) + - a pre-computed mean vector +- `winit`: Initial guess of ``W``, which should be either of: + - empty matrix: the function will perform random initialization (*default*) + - a matrix of size ``(k, k)`` (when `do_whiten`) + - a matrix of size ``(m, k)`` (when `!do_whiten`) + +Returns the resultant ICA model, an instance of type [`ICA`](@ref). + +**Note:** If `do_whiten` is `true`, the return `W` satisfies +``\\mathbf{W}^T \\mathbf{C} \\mathbf{W} = \\mathbf{I}``, +otherwise ``W`` is orthonormal, *i.e* ``\\mathbf{W}^T \\mathbf{W} = \\mathbf{I}``. +""" +function fit(::Type{ICA}, X::AbstractMatrix{T}, # sample matrix, size (m, n) k::Int; # number of independent components alg::Symbol=:fastica, # choice of algorithm - fun::ICAGDeriv=icagfun(:tanh, T), # approx neg-entropy functor + fun::ICAGDeriv=Tanh(one(T)), # approx neg-entropy functor do_whiten::Bool=true, # whether to perform pre-whitening maxiter::Integer=100, # maximum number of iterations tol::Real=1.0e-6, # convergence tolerance mean=nothing, # pre-computed mean - winit::Matrix{T}=zeros(T,0,0)) where T<:Real # init guess of W, size (m, k) + winit::Matrix{T}=zeros(T,0,0) # init guess of W, size (m, k) + ) where {T<:Real} # check input arguments m, n = size(X) @@ -177,3 +255,4 @@ function fit(::Type{ICA}, X::AbstractMatrix{T}, # sample matrix, end return ICA(mv, W) end +