diff --git a/docs/source/lreg.rst b/docs/source/lreg.rst index 614d117..858427c 100644 --- a/docs/source/lreg.rst +++ b/docs/source/lreg.rst @@ -31,7 +31,7 @@ The package provides ``llsq`` to solve these problems: This function accepts two keyword arguments: - - ``trans``: whether to use the transposed form. (default is ``false``) + - ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``) - ``bias``: whether to include the bias term ``b``. (default is ``true``) The function results the solution ``a``. @@ -43,7 +43,7 @@ For a single response vector ``y`` (without using bias): .. code-block:: julia - using MultivariateStats + using Statistics, MultivariateStats # prepare data X = rand(1000, 3) # feature matrix @@ -57,7 +57,7 @@ For a single response vector ``y`` (without using bias): yp = X * a # measure the error - rmse = sqrt(mean(abs2(y - yp))) + rmse = sqrt(mean(abs2.(y - yp))) print("rmse = $rmse") For a single response vector ``y`` (using bias): @@ -67,7 +67,7 @@ For a single response vector ``y`` (using bias): # prepare data X = rand(1000, 3) a0, b0 = rand(3), rand() - y = X * a0 + b0 + 0.1 * randn(1000) + y = X * a0 .+ b0 .+ 0.1 * randn(1000) # solve using llsq sol = llsq(X, y) @@ -76,25 +76,25 @@ For a single response vector ``y`` (using bias): a, b = sol[1:end-1], sol[end] # do prediction - yp = X * a + b' + yp = X * a .+ b' -For a matrix ``Y`` comprised of multiple columns: +For a matrix of column-stored regressors ``X`` and a matrix comprised of multiple columns of dependent variables ``Y``: .. code-block:: julia # prepare data - X = rand(1000, 3) + X = rand(3, 1000) A0, b0 = rand(3, 5), rand(1, 5) - Y = (X * A0 .+ b0) + 0.1 * randn(1000, 5) + Y = (X' * A0 .+ b0) + 0.1 * randn(1000, 5) # solve using llsq - sol = llsq(X, Y) + sol = llsq(X, Y, dims=2) # extract results A, b = sol[1:end-1,:], sol[end,:] # do prediction - Yp = X * A .+ b' + Yp = X'*A .+ b' Ridge Regression @@ -132,7 +132,7 @@ The package provides ``ridge`` to solve these problems: This function accepts two keyword arguments: - - ``trans``: whether to use the transposed form. (default is ``false``) + - ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``) - ``bias``: whether to include the bias term ``b``. (default is ``true``) The function results the solution ``a``. diff --git a/src/lreg.jl b/src/lreg.jl index 2cddefa..b157a8b 100644 --- a/src/lreg.jl +++ b/src/lreg.jl @@ -20,8 +20,13 @@ _haug(X::AbstractMatrix{T}) where T = hcat(X, ones(T, size(X,1), 1))::Matrix{T} ## linear least square function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}; - trans::Bool=false, bias::Bool=true) where {T<:Real} - if trans + trans::Bool=false, bias::Bool=true, + dims::Union{Integer,Nothing}=nothing) where {T<:Real} + if dims === nothing && trans + Base.depwarn("`trans` argument is deprecated, use llsq(X, Y, dims=d) instead.", :trans) + dims = 1 + end + if dims == 2 mX, nX = size(X) size(Y, 1) == nX || throw(DimensionMismatch("Dimensions of X and Y mismatch.")) mX <= nX || error("mX <= nX is required when trans is false.") @@ -30,30 +35,28 @@ function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}; size(Y, 1) == mX || throw(DimensionMismatch("Dimensions of X and Y mismatch.")) mX >= nX || error("mX >= nX is required when trans is false.") end - _ridge(X, Y, zero(T), trans, bias) + _ridge(X, Y, zero(T), dims == 2, bias) end ## ridge regression -function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Real; - trans::Bool=false, bias::Bool=true) where {T<:Real} - lreg_chkdims(X, Y, trans) - r >= zero(r) || error("r must be non-negative.") - _ridge(X, Y, convert(T, r), trans, bias) -end - -function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractVector; - trans::Bool=false, bias::Bool=true) - d = lreg_chkdims(X, Y, trans) - length(r) == d || throw(DimensionMismatch("Incorrect length of r.")) - _ridge(X, Y, r, trans, bias) -end - -function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractMatrix; - trans::Bool=false, bias::Bool=true) - d = lreg_chkdims(X, Y, trans) - size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r.")) - _ridge(X, Y, r, trans, bias) +function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, AbstractVecOrMat}; + trans::Bool=false, bias::Bool=true, + dims::Union{Integer,Nothing}=nothing) where {T<:Real} + if dims === nothing && trans + Base.depwarn("`trans` argument is deprecated, use ridge(X, Y, r, dims=d) instead.", :trans) + dims = 1 + end + d = lreg_chkdims(X, Y, dims == 2) + if isa(r, Real) + r >= zero(r) || error("r must be non-negative.") + r = convert(T, r) + elseif isa(r, AbstractVector) + length(r) == d || throw(DimensionMismatch("Incorrect length of r.")) + elseif isa(r, AbstractMatrix) + size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r.")) + end + _ridge(X, Y, r, dims == 2, bias) end ## implementation diff --git a/test/lreg.jl b/test/lreg.jl index c9652e7..ae161d6 100644 --- a/test/lreg.jl +++ b/test/lreg.jl @@ -28,110 +28,122 @@ import Random ## llsq - A = llsq(X, Y0; trans=false, bias=false) + A = llsq(X, Y0; dims=1, bias=false) A_r = copy(A) @test size(A) == (n, n2) @test X'Y0 ≈ X'X * A - a = llsq(X, y0; trans=false, bias=false) + a = llsq(X, y0; dims=1, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - A = llsq(Xt, Y0; trans=true, bias=false) + A = llsq(Xt, Y0; dims=2, bias=false) @test size(A) == (n, n2) @test A ≈ A_r - a = llsq(Xt, y0; trans=true, bias=false) + a = llsq(Xt, y0; dims=2, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - Aa = llsq(X, Y1; trans=false, bias=true) + Aa = llsq(X, Y1; dims=1, bias=true) Aa_r = copy(Aa) @test size(Aa) == (n+1, n2) A, b = Aa[1:end-1,:], Aa[end:end,:] @test X' * (Y1 .- b) ≈ X'X * A - aa = llsq(X, y1; trans=false, bias=true) + aa = llsq(X, y1; dims=1, bias=true) @test aa ≈ Aa[:,1] - Aa = llsq(Xt, Y1; trans=true, bias=true) + Aa = llsq(Xt, Y1; dims=2, bias=true) @test Aa ≈ Aa_r - aa = llsq(Xt, y1; trans=true, bias=true) + aa = llsq(Xt, y1; dims=2, bias=true) @test aa ≈ Aa[:,1] + # test default dims=2 argument + aa = llsq(X, y1) + @test aa ≈ Aa[:,1] + @test_throws DimensionMismatch llsq(X, y1; dims=2) + @test_throws DimensionMismatch llsq(Xt, y1) + ## ridge (with Real r) r = 0.1 - A = ridge(X, Y0, r; trans=false, bias=false) + A = ridge(X, Y0, r; dims=1, bias=false) A_r = copy(A) @test size(A) == (n, n2) @test X'Y0 ≈ (X'X + r * I) * A - a = ridge(X, y0, r; trans=false, bias=false) + a = ridge(X, y0, r; dims=1, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - A = ridge(Xt, Y0, r; trans=true, bias=false) + A = ridge(Xt, Y0, r; dims=2, bias=false) @test size(A) == (n, n2) @test A ≈ A_r - a = ridge(Xt, y0, r; trans=true, bias=false) + a = ridge(Xt, y0, r; dims=2, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - Aa = ridge(X, Y1, r; trans=false, bias=true) + Aa = ridge(X, Y1, r; dims=1, bias=true) Aa_r = copy(Aa) @test size(Aa) == (n+1, n2) A, b = Aa[1:end-1,:], Aa[end:end,:] @test X' * (Y1 .- b) ≈ (X'X + r * I) * A - aa = ridge(X, y1, r; trans=false, bias=true) + aa = ridge(X, y1, r; dims=1, bias=true) @test aa ≈ Aa[:,1] - Aa = ridge(Xt, Y1, r; trans=true, bias=true) + Aa = ridge(Xt, Y1, r; dims=2, bias=true) @test Aa ≈ Aa_r - aa = ridge(Xt, y1, r; trans=true, bias=true) + aa = ridge(Xt, y1, r; dims=2, bias=true) + @test aa ≈ Aa[:,1] + + # test default dims=2 argument + aa = ridge(X, y1, r) @test aa ≈ Aa[:,1] + @test_throws DimensionMismatch ridge(X, y1, r; dims=2) + @test_throws DimensionMismatch ridge(Xt, y1, r) ## ridge (with diagonal r) r = 0.05 .+ 0.1 .* rand(n) - A = ridge(X, Y0, r; trans=false, bias=false) + A = ridge(X, Y0, r; dims=1, bias=false) A_r = copy(A) @test size(A) == (n, n2) @test X'Y0 ≈ (X'X + diagm(0=>r)) * A - a = ridge(X, y0, r; trans=false, bias=false) + a = ridge(X, y0, r; dims=1, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - A = ridge(Xt, Y0, r; trans=true, bias=false) + A = ridge(Xt, Y0, r; dims=2, bias=false) @test size(A) == (n, n2) @test A ≈ A_r - a = ridge(Xt, y0, r; trans=true, bias=false) + a = ridge(Xt, y0, r; dims=2, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - Aa = ridge(X, Y1, r; trans=false, bias=true) + Aa = ridge(X, Y1, r; dims=1, bias=true) Aa_r = copy(Aa) @test size(Aa) == (n+1, n2) A, b = Aa[1:end-1,:], Aa[end:end,:] @test X' * (Y1 .- b) ≈ (X'X + diagm(0=>r)) * A - aa = ridge(X, y1, r; trans=false, bias=true) + aa = ridge(X, y1, r; dims=1, bias=true) @test aa ≈ Aa[:,1] - Aa = ridge(Xt, Y1, r; trans=true, bias=true) + Aa = ridge(Xt, Y1, r; dims=2, bias=true) @test Aa ≈ Aa_r - aa = ridge(Xt, y1, r; trans=true, bias=true) + aa = ridge(Xt, y1, r; dims=2, bias=true) @test aa ≈ Aa[:,1] @@ -140,36 +152,36 @@ import Random Q = qr(randn(n, n)).Q r = Q' * diagm(0=>r) * Q - A = ridge(X, Y0, r; trans=false, bias=false) + A = ridge(X, Y0, r; dims=1, bias=false) A_r = copy(A) @test size(A) == (n, n2) @test X'Y0 ≈ (X'X + r) * A - a = ridge(X, y0, r; trans=false, bias=false) + a = ridge(X, y0, r; dims=1, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - A = ridge(Xt, Y0, r; trans=true, bias=false) + A = ridge(Xt, Y0, r; dims=2, bias=false) @test size(A) == (n, n2) @test A ≈ A_r - a = ridge(Xt, y0, r; trans=true, bias=false) + a = ridge(Xt, y0, r; dims=2, bias=false) @test size(a) == (n,) @test a ≈ A[:,1] - Aa = ridge(X, Y1, r; trans=false, bias=true) + Aa = ridge(X, Y1, r; dims=1, bias=true) Aa_r = copy(Aa) @test size(Aa) == (n+1, n2) A, b = Aa[1:end-1,:], Aa[end:end,:] @test X' * (Y1 .- b) ≈ (X'X + r) * A - aa = ridge(X, y1, r; trans=false, bias=true) + aa = ridge(X, y1, r; dims=1, bias=true) @test aa ≈ Aa[:,1] - Aa = ridge(Xt, Y1, r; trans=true, bias=true) + Aa = ridge(Xt, Y1, r; dims=2, bias=true) @test Aa ≈ Aa_r - aa = ridge(Xt, y1, r; trans=true, bias=true) + aa = ridge(Xt, y1, r; dims=2, bias=true) @test aa ≈ Aa[:,1] end