Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

LREG: deprecate trans argument #100

Merged
merged 5 commits into from
Oct 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions docs/source/lreg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ The package provides ``llsq`` to solve these problems:

This function accepts two keyword arguments:

- ``trans``: whether to use the transposed form. (default is ``false``)
- ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``)
- ``bias``: whether to include the bias term ``b``. (default is ``true``)

The function results the solution ``a``.
Expand All @@ -43,7 +43,7 @@ For a single response vector ``y`` (without using bias):

.. code-block:: julia

using MultivariateStats
using Statistics, MultivariateStats

# prepare data
X = rand(1000, 3) # feature matrix
Expand All @@ -57,7 +57,7 @@ For a single response vector ``y`` (without using bias):
yp = X * a

# measure the error
rmse = sqrt(mean(abs2(y - yp)))
rmse = sqrt(mean(abs2.(y - yp)))
print("rmse = $rmse")

For a single response vector ``y`` (using bias):
Expand All @@ -67,7 +67,7 @@ For a single response vector ``y`` (using bias):
# prepare data
X = rand(1000, 3)
a0, b0 = rand(3), rand()
y = X * a0 + b0 + 0.1 * randn(1000)
y = X * a0 .+ b0 .+ 0.1 * randn(1000)

# solve using llsq
sol = llsq(X, y)
Expand All @@ -76,25 +76,25 @@ For a single response vector ``y`` (using bias):
a, b = sol[1:end-1], sol[end]

# do prediction
yp = X * a + b'
yp = X * a .+ b'

For a matrix ``Y`` comprised of multiple columns:
For a matrix of column-stored regressors ``X`` and a matrix comprised of multiple columns of dependent variables ``Y``:

.. code-block:: julia

# prepare data
X = rand(1000, 3)
X = rand(3, 1000)
A0, b0 = rand(3, 5), rand(1, 5)
Y = (X * A0 .+ b0) + 0.1 * randn(1000, 5)
Y = (X' * A0 .+ b0) + 0.1 * randn(1000, 5)

# solve using llsq
sol = llsq(X, Y)
sol = llsq(X, Y, dims=2)

# extract results
A, b = sol[1:end-1,:], sol[end,:]

# do prediction
Yp = X * A .+ b'
Yp = X'*A .+ b'


Ridge Regression
Expand Down Expand Up @@ -132,7 +132,7 @@ The package provides ``ridge`` to solve these problems:

This function accepts two keyword arguments:

- ``trans``: whether to use the transposed form. (default is ``false``)
- ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``)
- ``bias``: whether to include the bias term ``b``. (default is ``true``)

The function results the solution ``a``.
Expand Down
47 changes: 25 additions & 22 deletions src/lreg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ _haug(X::AbstractMatrix{T}) where T = hcat(X, ones(T, size(X,1), 1))::Matrix{T}
## linear least square

function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T};
trans::Bool=false, bias::Bool=true) where {T<:Real}
if trans
trans::Bool=false, bias::Bool=true,
dims::Union{Integer,Nothing}=nothing) where {T<:Real}
if dims === nothing && trans
Base.depwarn("`trans` argument is deprecated, use llsq(X, Y, dims=d) instead.", :trans)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT this warning will be printed even if the caller didn't set trans, in which case it will be confusing. I guess it should say that dims is required, and only mention trans if it's true, right?

Also, the default for dims should be 1 until we remove the deprecation, or that will break code.

dims = 1
end
if dims == 2
mX, nX = size(X)
size(Y, 1) == nX || throw(DimensionMismatch("Dimensions of X and Y mismatch."))
mX <= nX || error("mX <= nX is required when trans is false.")
Expand All @@ -30,30 +35,28 @@ function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T};
size(Y, 1) == mX || throw(DimensionMismatch("Dimensions of X and Y mismatch."))
mX >= nX || error("mX >= nX is required when trans is false.")
end
_ridge(X, Y, zero(T), trans, bias)
_ridge(X, Y, zero(T), dims == 2, bias)
end

## ridge regression

function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Real;
trans::Bool=false, bias::Bool=true) where {T<:Real}
lreg_chkdims(X, Y, trans)
r >= zero(r) || error("r must be non-negative.")
_ridge(X, Y, convert(T, r), trans, bias)
end

function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractVector;
trans::Bool=false, bias::Bool=true)
d = lreg_chkdims(X, Y, trans)
length(r) == d || throw(DimensionMismatch("Incorrect length of r."))
_ridge(X, Y, r, trans, bias)
end

function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractMatrix;
trans::Bool=false, bias::Bool=true)
d = lreg_chkdims(X, Y, trans)
size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r."))
_ridge(X, Y, r, trans, bias)
function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, AbstractVecOrMat};
trans::Bool=false, bias::Bool=true,
dims::Union{Integer,Nothing}=nothing) where {T<:Real}
if dims === nothing && trans
Base.depwarn("`trans` argument is deprecated, use ridge(X, Y, r, dims=d) instead.", :trans)
dims = 1
end
d = lreg_chkdims(X, Y, dims == 2)
if isa(r, Real)
r >= zero(r) || error("r must be non-negative.")
r = convert(T, r)
elseif isa(r, AbstractVector)
length(r) == d || throw(DimensionMismatch("Incorrect length of r."))
elseif isa(r, AbstractMatrix)
size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r."))
end
_ridge(X, Y, r, dims == 2, bias)
end

## implementation
Expand Down
76 changes: 44 additions & 32 deletions test/lreg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,110 +28,122 @@ import Random

## llsq

A = llsq(X, Y0; trans=false, bias=false)
A = llsq(X, Y0; dims=1, bias=false)
A_r = copy(A)
@test size(A) == (n, n2)
@test X'Y0 X'X * A

a = llsq(X, y0; trans=false, bias=false)
a = llsq(X, y0; dims=1, bias=false)
@test size(a) == (n,)
@test a A[:,1]

A = llsq(Xt, Y0; trans=true, bias=false)
A = llsq(Xt, Y0; dims=2, bias=false)
@test size(A) == (n, n2)
@test A A_r

a = llsq(Xt, y0; trans=true, bias=false)
a = llsq(Xt, y0; dims=2, bias=false)
@test size(a) == (n,)
@test a A[:,1]

Aa = llsq(X, Y1; trans=false, bias=true)
Aa = llsq(X, Y1; dims=1, bias=true)
Aa_r = copy(Aa)
@test size(Aa) == (n+1, n2)
A, b = Aa[1:end-1,:], Aa[end:end,:]
@test X' * (Y1 .- b) X'X * A

aa = llsq(X, y1; trans=false, bias=true)
aa = llsq(X, y1; dims=1, bias=true)
@test aa Aa[:,1]

Aa = llsq(Xt, Y1; trans=true, bias=true)
Aa = llsq(Xt, Y1; dims=2, bias=true)
@test Aa Aa_r

aa = llsq(Xt, y1; trans=true, bias=true)
aa = llsq(Xt, y1; dims=2, bias=true)
@test aa Aa[:,1]

# test default dims=2 argument
aa = llsq(X, y1)
@test aa Aa[:,1]
@test_throws DimensionMismatch llsq(X, y1; dims=2)
@test_throws DimensionMismatch llsq(Xt, y1)


## ridge (with Real r)

r = 0.1

A = ridge(X, Y0, r; trans=false, bias=false)
A = ridge(X, Y0, r; dims=1, bias=false)
A_r = copy(A)
@test size(A) == (n, n2)
@test X'Y0 (X'X + r * I) * A

a = ridge(X, y0, r; trans=false, bias=false)
a = ridge(X, y0, r; dims=1, bias=false)
@test size(a) == (n,)
@test a A[:,1]

A = ridge(Xt, Y0, r; trans=true, bias=false)
A = ridge(Xt, Y0, r; dims=2, bias=false)
@test size(A) == (n, n2)
@test A A_r

a = ridge(Xt, y0, r; trans=true, bias=false)
a = ridge(Xt, y0, r; dims=2, bias=false)
@test size(a) == (n,)
@test a A[:,1]

Aa = ridge(X, Y1, r; trans=false, bias=true)
Aa = ridge(X, Y1, r; dims=1, bias=true)
Aa_r = copy(Aa)
@test size(Aa) == (n+1, n2)
A, b = Aa[1:end-1,:], Aa[end:end,:]
@test X' * (Y1 .- b) (X'X + r * I) * A

aa = ridge(X, y1, r; trans=false, bias=true)
aa = ridge(X, y1, r; dims=1, bias=true)
@test aa Aa[:,1]

Aa = ridge(Xt, Y1, r; trans=true, bias=true)
Aa = ridge(Xt, Y1, r; dims=2, bias=true)
@test Aa Aa_r

aa = ridge(Xt, y1, r; trans=true, bias=true)
aa = ridge(Xt, y1, r; dims=2, bias=true)
@test aa Aa[:,1]

# test default dims=2 argument
aa = ridge(X, y1, r)
@test aa Aa[:,1]
@test_throws DimensionMismatch ridge(X, y1, r; dims=2)
@test_throws DimensionMismatch ridge(Xt, y1, r)


## ridge (with diagonal r)

r = 0.05 .+ 0.1 .* rand(n)

A = ridge(X, Y0, r; trans=false, bias=false)
A = ridge(X, Y0, r; dims=1, bias=false)
A_r = copy(A)
@test size(A) == (n, n2)
@test X'Y0 (X'X + diagm(0=>r)) * A

a = ridge(X, y0, r; trans=false, bias=false)
a = ridge(X, y0, r; dims=1, bias=false)
@test size(a) == (n,)
@test a A[:,1]

A = ridge(Xt, Y0, r; trans=true, bias=false)
A = ridge(Xt, Y0, r; dims=2, bias=false)
@test size(A) == (n, n2)
@test A A_r

a = ridge(Xt, y0, r; trans=true, bias=false)
a = ridge(Xt, y0, r; dims=2, bias=false)
@test size(a) == (n,)
@test a A[:,1]

Aa = ridge(X, Y1, r; trans=false, bias=true)
Aa = ridge(X, Y1, r; dims=1, bias=true)
Aa_r = copy(Aa)
@test size(Aa) == (n+1, n2)
A, b = Aa[1:end-1,:], Aa[end:end,:]
@test X' * (Y1 .- b) (X'X + diagm(0=>r)) * A

aa = ridge(X, y1, r; trans=false, bias=true)
aa = ridge(X, y1, r; dims=1, bias=true)
@test aa Aa[:,1]

Aa = ridge(Xt, Y1, r; trans=true, bias=true)
Aa = ridge(Xt, Y1, r; dims=2, bias=true)
@test Aa Aa_r

aa = ridge(Xt, y1, r; trans=true, bias=true)
aa = ridge(Xt, y1, r; dims=2, bias=true)
@test aa Aa[:,1]


Expand All @@ -140,36 +152,36 @@ import Random
Q = qr(randn(n, n)).Q
r = Q' * diagm(0=>r) * Q

A = ridge(X, Y0, r; trans=false, bias=false)
A = ridge(X, Y0, r; dims=1, bias=false)
A_r = copy(A)
@test size(A) == (n, n2)
@test X'Y0 (X'X + r) * A

a = ridge(X, y0, r; trans=false, bias=false)
a = ridge(X, y0, r; dims=1, bias=false)
@test size(a) == (n,)
@test a A[:,1]

A = ridge(Xt, Y0, r; trans=true, bias=false)
A = ridge(Xt, Y0, r; dims=2, bias=false)
@test size(A) == (n, n2)
@test A A_r

a = ridge(Xt, y0, r; trans=true, bias=false)
a = ridge(Xt, y0, r; dims=2, bias=false)
@test size(a) == (n,)
@test a A[:,1]

Aa = ridge(X, Y1, r; trans=false, bias=true)
Aa = ridge(X, Y1, r; dims=1, bias=true)
Aa_r = copy(Aa)
@test size(Aa) == (n+1, n2)
A, b = Aa[1:end-1,:], Aa[end:end,:]
@test X' * (Y1 .- b) (X'X + r) * A

aa = ridge(X, y1, r; trans=false, bias=true)
aa = ridge(X, y1, r; dims=1, bias=true)
@test aa Aa[:,1]

Aa = ridge(Xt, Y1, r; trans=true, bias=true)
Aa = ridge(Xt, Y1, r; dims=2, bias=true)
@test Aa Aa_r

aa = ridge(Xt, y1, r; trans=true, bias=true)
aa = ridge(Xt, y1, r; dims=2, bias=true)
@test aa Aa[:,1]

end