diff --git a/docs/source/lreg.rst b/docs/source/lreg.rst
index 614d117..858427c 100644
--- a/docs/source/lreg.rst
+++ b/docs/source/lreg.rst
@@ -31,7 +31,7 @@ The package provides ``llsq`` to solve these problems:
 
     This function accepts two keyword arguments:
 
-    - ``trans``: whether to use the transposed form. (default is ``false``)
+    - ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``)
     - ``bias``: whether to include the bias term ``b``. (default is ``true``)
 
     The function results the solution ``a``.
@@ -43,7 +43,7 @@ For a single response vector ``y`` (without using bias):
 
 .. code-block:: julia
 
-    using MultivariateStats
+    using Statistics, MultivariateStats
 
     # prepare data
     X = rand(1000, 3)               # feature matrix
@@ -57,7 +57,7 @@ For a single response vector ``y`` (without using bias):
     yp = X * a
 
     # measure the error
-    rmse = sqrt(mean(abs2(y - yp)))
+    rmse = sqrt(mean(abs2.(y - yp)))
     print("rmse = $rmse")
 
 For a single response vector ``y`` (using bias):
@@ -67,7 +67,7 @@ For a single response vector ``y`` (using bias):
     # prepare data
     X = rand(1000, 3)
     a0, b0 = rand(3), rand()
-    y = X * a0 + b0 + 0.1 * randn(1000)
+    y = X * a0 .+ b0 .+ 0.1 * randn(1000)
 
     # solve using llsq
     sol = llsq(X, y)
@@ -76,25 +76,25 @@ For a single response vector ``y`` (using bias):
     a, b = sol[1:end-1], sol[end]
 
     # do prediction
-    yp = X * a + b'
+    yp = X * a .+ b'
 
-For a matrix ``Y`` comprised of multiple columns:
+For a matrix of column-stored regressors ``X`` and a matrix comprised of multiple columns of dependent variables ``Y``:
 
 .. code-block:: julia
 
     # prepare data
-    X = rand(1000, 3)
+    X = rand(3, 1000)
     A0, b0 = rand(3, 5), rand(1, 5)
-    Y = (X * A0 .+ b0) + 0.1 * randn(1000, 5)
+    Y = (X' * A0 .+ b0) + 0.1 * randn(1000, 5)
 
     # solve using llsq
-    sol = llsq(X, Y)
+    sol = llsq(X, Y, dims=2)
 
     # extract results
     A, b = sol[1:end-1,:], sol[end,:]
 
     # do prediction
-    Yp = X * A .+ b'
+    Yp = X'*A .+ b'
 
 
 Ridge Regression
@@ -132,7 +132,7 @@ The package provides ``ridge`` to solve these problems:
 
     This function accepts two keyword arguments:
 
-    - ``trans``: whether to use the transposed form. (default is ``false``)
+    - ``dims``: whether input observations are stored as rows (``1``) or columns (``2``). (default is ``1``)
     - ``bias``: whether to include the bias term ``b``. (default is ``true``)
 
     The function results the solution ``a``.
diff --git a/src/lreg.jl b/src/lreg.jl
index 2cddefa..b157a8b 100644
--- a/src/lreg.jl
+++ b/src/lreg.jl
@@ -20,8 +20,13 @@ _haug(X::AbstractMatrix{T}) where T = hcat(X, ones(T, size(X,1), 1))::Matrix{T}
 ## linear least square
 
 function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T};
-              trans::Bool=false, bias::Bool=true) where {T<:Real}
-    if trans
+              trans::Bool=false, bias::Bool=true,
+              dims::Union{Integer,Nothing}=nothing) where {T<:Real}
+    if dims === nothing && trans
+        Base.depwarn("`trans` argument is deprecated, use llsq(X, Y, dims=d) instead.", :trans)
+        dims = 1
+    end
+    if dims == 2
         mX, nX = size(X)
         size(Y, 1) == nX || throw(DimensionMismatch("Dimensions of X and Y mismatch."))
         mX <= nX || error("mX <= nX is required when trans is false.")
@@ -30,30 +35,28 @@ function llsq(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T};
         size(Y, 1) == mX || throw(DimensionMismatch("Dimensions of X and Y mismatch."))
         mX >= nX || error("mX >= nX is required when trans is false.")
     end
-    _ridge(X, Y, zero(T), trans, bias)
+    _ridge(X, Y, zero(T), dims == 2, bias)
 end
 
 ## ridge regression
 
-function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Real;
-               trans::Bool=false, bias::Bool=true) where {T<:Real}
-    lreg_chkdims(X, Y, trans)
-    r >= zero(r) || error("r must be non-negative.")
-    _ridge(X, Y, convert(T, r), trans, bias)
-end
-
-function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractVector;
-               trans::Bool=false, bias::Bool=true)
-    d = lreg_chkdims(X, Y, trans)
-    length(r) == d || throw(DimensionMismatch("Incorrect length of r."))
-    _ridge(X, Y, r, trans, bias)
-end
-
-function ridge(X::AbstractMatrix, Y::AbstractVecOrMat, r::AbstractMatrix;
-               trans::Bool=false, bias::Bool=true)
-    d = lreg_chkdims(X, Y, trans)
-    size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r."))
-    _ridge(X, Y, r, trans, bias)
+function ridge(X::AbstractMatrix{T}, Y::AbstractVecOrMat{T}, r::Union{Real, AbstractVecOrMat};
+               trans::Bool=false, bias::Bool=true,
+               dims::Union{Integer,Nothing}=nothing) where {T<:Real}
+    if dims === nothing && trans
+        Base.depwarn("`trans` argument is deprecated, use ridge(X, Y, r, dims=d) instead.", :trans)
+        dims = 1
+    end
+    d = lreg_chkdims(X, Y, dims == 2)
+    if isa(r, Real)
+        r >= zero(r) || error("r must be non-negative.")
+        r = convert(T, r)
+    elseif isa(r, AbstractVector)
+        length(r) == d || throw(DimensionMismatch("Incorrect length of r."))
+    elseif isa(r, AbstractMatrix)
+        size(r) == (d, d) || throw(DimensionMismatch("Incorrect size of r."))
+    end
+    _ridge(X, Y, r, dims == 2, bias)
 end
 
 ## implementation
diff --git a/test/lreg.jl b/test/lreg.jl
index c9652e7..ae161d6 100644
--- a/test/lreg.jl
+++ b/test/lreg.jl
@@ -28,110 +28,122 @@ import Random
 
     ## llsq
 
-    A = llsq(X, Y0; trans=false, bias=false)
+    A = llsq(X, Y0; dims=1, bias=false)
     A_r = copy(A)
     @test size(A) == (n, n2)
     @test X'Y0 ≈ X'X * A
 
-    a = llsq(X, y0; trans=false, bias=false)
+    a = llsq(X, y0; dims=1, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    A = llsq(Xt, Y0; trans=true, bias=false)
+    A = llsq(Xt, Y0; dims=2, bias=false)
     @test size(A) == (n, n2)
     @test A ≈ A_r
 
-    a = llsq(Xt, y0; trans=true, bias=false)
+    a = llsq(Xt, y0; dims=2, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    Aa = llsq(X, Y1; trans=false, bias=true)
+    Aa = llsq(X, Y1; dims=1, bias=true)
     Aa_r = copy(Aa)
     @test size(Aa) == (n+1, n2)
     A, b = Aa[1:end-1,:], Aa[end:end,:]
     @test X' * (Y1 .- b) ≈ X'X * A
 
-    aa = llsq(X, y1; trans=false, bias=true)
+    aa = llsq(X, y1; dims=1, bias=true)
     @test aa ≈ Aa[:,1]
 
-    Aa = llsq(Xt, Y1; trans=true, bias=true)
+    Aa = llsq(Xt, Y1; dims=2, bias=true)
     @test Aa ≈ Aa_r
 
-    aa = llsq(Xt, y1; trans=true, bias=true)
+    aa = llsq(Xt, y1; dims=2, bias=true)
     @test aa ≈ Aa[:,1]
 
+    # test default dims=2 argument
+    aa = llsq(X, y1)
+    @test aa ≈ Aa[:,1]
+    @test_throws DimensionMismatch llsq(X, y1; dims=2)
+    @test_throws DimensionMismatch llsq(Xt, y1)
+
 
     ## ridge (with Real r)
 
     r = 0.1
 
-    A = ridge(X, Y0, r; trans=false, bias=false)
+    A = ridge(X, Y0, r; dims=1, bias=false)
     A_r = copy(A)
     @test size(A) == (n, n2)
     @test X'Y0 ≈ (X'X + r * I) * A
 
-    a = ridge(X, y0, r; trans=false, bias=false)
+    a = ridge(X, y0, r; dims=1, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    A = ridge(Xt, Y0, r; trans=true, bias=false)
+    A = ridge(Xt, Y0, r; dims=2, bias=false)
     @test size(A) == (n, n2)
     @test A ≈ A_r
 
-    a = ridge(Xt, y0, r; trans=true, bias=false)
+    a = ridge(Xt, y0, r; dims=2, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    Aa = ridge(X, Y1, r; trans=false, bias=true)
+    Aa = ridge(X, Y1, r; dims=1, bias=true)
     Aa_r = copy(Aa)
     @test size(Aa) == (n+1, n2)
     A, b = Aa[1:end-1,:], Aa[end:end,:]
     @test X' * (Y1 .- b) ≈ (X'X + r * I) * A
 
-    aa = ridge(X, y1, r; trans=false, bias=true)
+    aa = ridge(X, y1, r; dims=1, bias=true)
     @test aa ≈ Aa[:,1]
 
-    Aa = ridge(Xt, Y1, r; trans=true, bias=true)
+    Aa = ridge(Xt, Y1, r; dims=2, bias=true)
     @test Aa ≈ Aa_r
 
-    aa = ridge(Xt, y1, r; trans=true, bias=true)
+    aa = ridge(Xt, y1, r; dims=2, bias=true)
+    @test aa ≈ Aa[:,1]
+
+    # test default dims=2 argument
+    aa = ridge(X, y1, r)
     @test aa ≈ Aa[:,1]
+    @test_throws DimensionMismatch ridge(X, y1, r; dims=2)
+    @test_throws DimensionMismatch ridge(Xt, y1, r)
 
 
     ## ridge (with diagonal r)
 
     r = 0.05 .+ 0.1 .* rand(n)
 
-    A = ridge(X, Y0, r; trans=false, bias=false)
+    A = ridge(X, Y0, r; dims=1, bias=false)
     A_r = copy(A)
     @test size(A) == (n, n2)
     @test X'Y0 ≈ (X'X + diagm(0=>r)) * A
 
-    a = ridge(X, y0, r; trans=false, bias=false)
+    a = ridge(X, y0, r; dims=1, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    A = ridge(Xt, Y0, r; trans=true, bias=false)
+    A = ridge(Xt, Y0, r; dims=2, bias=false)
     @test size(A) == (n, n2)
     @test A ≈ A_r
 
-    a = ridge(Xt, y0, r; trans=true, bias=false)
+    a = ridge(Xt, y0, r; dims=2, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    Aa = ridge(X, Y1, r; trans=false, bias=true)
+    Aa = ridge(X, Y1, r; dims=1, bias=true)
     Aa_r = copy(Aa)
     @test size(Aa) == (n+1, n2)
     A, b = Aa[1:end-1,:], Aa[end:end,:]
     @test X' * (Y1 .- b) ≈ (X'X + diagm(0=>r)) * A
 
-    aa = ridge(X, y1, r; trans=false, bias=true)
+    aa = ridge(X, y1, r; dims=1, bias=true)
     @test aa ≈ Aa[:,1]
 
-    Aa = ridge(Xt, Y1, r; trans=true, bias=true)
+    Aa = ridge(Xt, Y1, r; dims=2, bias=true)
     @test Aa ≈ Aa_r
 
-    aa = ridge(Xt, y1, r; trans=true, bias=true)
+    aa = ridge(Xt, y1, r; dims=2, bias=true)
     @test aa ≈ Aa[:,1]
 
 
@@ -140,36 +152,36 @@ import Random
     Q = qr(randn(n, n)).Q
     r = Q' * diagm(0=>r) * Q
 
-    A = ridge(X, Y0, r; trans=false, bias=false)
+    A = ridge(X, Y0, r; dims=1, bias=false)
     A_r = copy(A)
     @test size(A) == (n, n2)
     @test X'Y0 ≈ (X'X + r) * A
 
-    a = ridge(X, y0, r; trans=false, bias=false)
+    a = ridge(X, y0, r; dims=1, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    A = ridge(Xt, Y0, r; trans=true, bias=false)
+    A = ridge(Xt, Y0, r; dims=2, bias=false)
     @test size(A) == (n, n2)
     @test A ≈ A_r
 
-    a = ridge(Xt, y0, r; trans=true, bias=false)
+    a = ridge(Xt, y0, r; dims=2, bias=false)
     @test size(a) == (n,)
     @test a ≈ A[:,1]
 
-    Aa = ridge(X, Y1, r; trans=false, bias=true)
+    Aa = ridge(X, Y1, r; dims=1, bias=true)
     Aa_r = copy(Aa)
     @test size(Aa) == (n+1, n2)
     A, b = Aa[1:end-1,:], Aa[end:end,:]
     @test X' * (Y1 .- b) ≈ (X'X + r) * A
 
-    aa = ridge(X, y1, r; trans=false, bias=true)
+    aa = ridge(X, y1, r; dims=1, bias=true)
     @test aa ≈ Aa[:,1]
 
-    Aa = ridge(Xt, Y1, r; trans=true, bias=true)
+    Aa = ridge(Xt, Y1, r; dims=2, bias=true)
     @test Aa ≈ Aa_r
 
-    aa = ridge(Xt, y1, r; trans=true, bias=true)
+    aa = ridge(Xt, y1, r; dims=2, bias=true)
     @test aa ≈ Aa[:,1]
 
 end