diff --git a/README.md b/README.md
index 1ed1407..7e7e6cf 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,8 @@ A Julia package for multivariate statistics and data analysis (e.g. dimensionali
 [![Coverage Status](https://coveralls.io/repos/JuliaStats/MultivariateStats.jl/badge.svg?branch=master)](https://coveralls.io/r/JuliaStats/MultivariateStats.jl?branch=master)
 [![Build Status](https://travis-ci.org/JuliaStats/MultivariateStats.jl.svg?branch=master)](https://travis-ci.org/JuliaStats/MultivariateStats.jl)
 [![CI](https://github.com/JuliaStats/MultivariateStats.jl/actions/workflows/ci.yml/badge.svg)](https://github.com/JuliaStats/MultivariateStats.jl/actions/workflows/ci.yml)
+[![](https://img.shields.io/badge/docs-stable-blue.svg)](https://juliastats.org/MultivariateStats.jl/stable)
+[![](https://img.shields.io/badge/docs-dev-blue.svg)](https://juliastats.org/MultivariateStats.jl/dev)
 
 -------
 
diff --git a/docs/make.jl b/docs/make.jl
index a22af8d..d4cdf4b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -7,7 +7,7 @@ end
 makedocs(
     sitename = "MultivariateStats.jl",
     modules = [MultivariateStats],
-    pages = ["Home"=>"index.md", "lda.md", "Development"=>"api.md"]
+    pages = ["Home"=>"index.md", "whiten.md", "lda.md", "Development"=>"api.md"]
 )
 
 deploydocs(
diff --git a/docs/src/api.md b/docs/src/api.md
index 4566df4..ca70af7 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -46,7 +46,6 @@ Note: `?` refers to a possible implementation that is missing or called differen
 |length            |  +  |     |  x  |      |      |     |     |     |     |     |     |
 |size              |  +  |     |     |      |      |     |     |     |     |     |     |
 |                  |     |     |     |      |      |     |     |     |     |     |     |
-|eee               |     |     |     |      |      |     |     |     |     |     |     |
 
 - StatsBase.AbstractDataTransform
     - Whitening
diff --git a/docs/src/index.md b/docs/src/index.md
index 0eb8a4f..47f7ffd 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -10,9 +10,8 @@ end
 
 [MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) is a Julia package for multivariate statistical analysis. It provides a rich set of useful analysis techniques, such as PCA, CCA, LDA, ICA, etc.
 
-
 ```@contents
-Pages = ["lda.md", "api.md"]
+Pages = ["whiten.md", "lda.md", "api.md"]
 Depth = 2
 ```
 
diff --git a/docs/src/whiten.md b/docs/src/whiten.md
new file mode 100644
index 0000000..e655971
--- /dev/null
+++ b/docs/src/whiten.md
@@ -0,0 +1,35 @@
+# Data Transformation
+
+## Whitening
+
+A [whitening transformation](http://en.wikipedia.org/wiki/Whitening_transformation>) is a decorrelation transformation that transforms a set of random variables into a set of new random variables with identity covariance (uncorrelated with unit variances).
+
+In particular, suppose a random vector has covariance ``\mathbf{C}``, then a whitening transform ``\mathbf{W}`` is one that satisfy:
+
+```math
+   \mathbf{W}^T \mathbf{C} \mathbf{W} = \mathbf{I}
+```
+
+Note that ``\mathbf{W}`` is generally not unique. In particular, if ``\mathbf{W}`` is a whitening transform, so is any of its rotation ``\mathbf{W} \mathbf{R}`` with ``\mathbf{R}^T \mathbf{R} = \mathbf{I}``.
+
+The package uses [`Whitening`](@ref) to represent a whitening transform.
+
+```@docs
+Whitening
+```
+
+Whitening transformation can be fitted to data using the `fit` method.
+
+```@docs
+fit(::Type{Whitening}, X::AbstractMatrix{T}; kwargs...) where {T<:Real}
+transform(::Whitening, ::AbstractVecOrMat)
+length(::Whitening)
+mean(::Whitening)
+size(::Whitening)
+```
+
+Additional methods
+```@docs
+cov_whitening
+cov_whitening!
+```
diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl
index 34372e8..31f6ae5 100644
--- a/src/MultivariateStats.jl
+++ b/src/MultivariateStats.jl
@@ -1,9 +1,10 @@
 module MultivariateStats
     using LinearAlgebra
-    using StatsBase: SimpleCovariance, CovarianceEstimator, pairwise, pairwise!
+    using StatsBase: SimpleCovariance, CovarianceEstimator, RegressionModel,
+                     AbstractDataTransform, pairwise!
     import Statistics: mean, var, cov, covm
     import Base: length, size, show, dump
-    import StatsBase: RegressionModel, fit, predict, ConvergenceException, dof, coef, weights, pairwise
+    import StatsBase: fit, predict, predict!, ConvergenceException, dof_residual, coef, weights, dof, pairwise
     import SparseArrays
     import LinearAlgebra: eigvals
 
@@ -111,7 +112,6 @@ module MultivariateStats
     faem,                   # Maximum likelihood probabilistic PCA
     facm                    # EM algorithm for probabilistic PCA
 
-
     ## source files
     include("common.jl")
     include("lreg.jl")
@@ -125,4 +125,10 @@ module MultivariateStats
     include("ica.jl")
     include("fa.jl")
 
+    ## deprecations
+    @deprecate indim(f::Whitening) length(f::Whitening)
+    @deprecate outdim(f::Whitening) length(f::Whitening)
+    # @deprecate transform(m, x; kwargs...) predict(m, x; kwargs...) #ex=false
+    # @deprecate transform(m; kwargs...) predict(m; kwargs...) #ex=false
+
 end # module
diff --git a/src/common.jl b/src/common.jl
index b54b8eb..51becac 100644
--- a/src/common.jl
+++ b/src/common.jl
@@ -20,10 +20,10 @@ decentralize(x::AbstractMatrix, m::AbstractVector) = (isempty(m) ? x : x .+ m)
 
 # get a full mean vector
 
-fullmean(d::Int, mv::Vector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
+fullmean(d::Int, mv::AbstractVector{T}) where T = (isempty(mv) ? zeros(T, d) : mv)
 
-preprocess_mean(X::AbstractMatrix{T}, m) where T<:Real =
-    (m === nothing ? vec(mean(X, dims=2)) : m == 0 ? T[] :  m)
+preprocess_mean(X::AbstractMatrix{T}, m; dims=2) where T<:Real =
+    (m === nothing ? vec(mean(X, dims=dims)) : m == 0 ? T[] :  m)
 
 # choose the first k values and columns
 #
diff --git a/src/whiten.jl b/src/whiten.jl
index 51037be..f2c4bdb 100644
--- a/src/whiten.jl
+++ b/src/whiten.jl
@@ -1,27 +1,51 @@
 # Whitening
 
-## Solve whitening based on covariance
-#
-# finds W, such that W'CW = I
-#
+"""
+    cov_whitening(C)
+
+Derive the whitening transform coefficient matrix `W` given the covariance matrix `C`. Here, `C` can be either a square matrix, or an instance of `Cholesky`.
+
+Internally, this function solves the whitening transform using Cholesky factorization. The rationale is as follows: let ``\\mathbf{C} = \\mathbf{U}^T \\mathbf{U}`` and ``\\mathbf{W} = \\mathbf{U}^{-1}``, then ``\\mathbf{W}^T \\mathbf{C} \\mathbf{W} = \\mathbf{I}``.
+
+**Note:** The return matrix `W` is an upper triangular matrix.
+"""
 function cov_whitening(C::Cholesky{T}) where {T<:Real}
     cf = C.UL
     Matrix{T}(inv(istriu(cf) ? cf : cf'))
 end
 
-cov_whitening!(C::DenseMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
-cov_whitening(C::DenseMatrix{<:Real}) = cov_whitening!(copy(C))
+"""
+    cov_whitening!(C)
+
+In-place version of `cov_whitening(C)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}) = cov_whitening(cholesky!(Hermitian(C, :U)))
+cov_whitening(C::AbstractMatrix{<:Real}) = cov_whitening!(copy(C))
+
+"""
+    cov_whitening!(C, regcoef)
+
+In-place version of `cov_whitening(C, regcoef)`, in which the input matrix `C` will be overwritten during computation. This can be more efficient when `C` is no longer used.
+"""
+cov_whitening!(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
+
+"""
+    cov_whitening(C, regcoef)
 
-cov_whitening!(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(regularize_symmat!(C, regcoef))
-cov_whitening(C::DenseMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
+Derive a whitening transform based on a regularized covariance, as `C + (eigmax(C) * regcoef) * eye(d)`.
+"""
+cov_whitening(C::AbstractMatrix{<:Real}, regcoef::Real) = cov_whitening!(copy(C), regcoef)
 
 ## Whitening type
 
-struct Whitening{T<:Real}
-    mean::Vector{T}
-    W::Matrix{T}
+"""
+A whitening transform representation.
+"""
+struct Whitening{T<:Real} <: AbstractDataTransform
+    mean::AbstractVector{T}
+    W::AbstractMatrix{T}
 
-    function Whitening{T}(mean::Vector{T}, W::Matrix{T}) where {T<:Real}
+    function Whitening{T}(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real}
         d, d2 = size(W)
         d == d2 || error("W must be a square matrix.")
         isempty(mean) || length(mean) == d ||
@@ -29,29 +53,99 @@ struct Whitening{T<:Real}
         return new(mean, W)
     end
 end
-Whitening(mean::Vector{T}, W::Matrix{T}) where {T<:Real} = Whitening{T}(mean, W)
+Whitening(mean::AbstractVector{T}, W::AbstractMatrix{T}) where {T<:Real} = Whitening{T}(mean, W)
+
+"""
+    length(f)
+
+Get the dimension of the  whitening transform `f`.
+"""
+length(f::Whitening) = size(f.W, 1)
+
+"""
+    size(f)
+
+Dimensions of the coefficient matrix of the whitening transform `f`.
+"""
+size(f::Whitening) = size(f.W)
+
+"""
+    mean(f)
+
+Get the mean vector of the whitening transformation `f`.
 
-indim(f::Whitening) = size(f.W, 1)
-outdim(f::Whitening) = size(f.W, 2)
+**Note:** if mean is empty, this function returns a zero vector of length [`outdim`](@ref) .
+"""
 mean(f::Whitening) = fullmean(indim(f), f.mean)
 
-transform(f::Whitening, x::AbstractVecOrMat{<:Real}) = transpose(f.W) * centralize(x, f.mean)
 
-## Fit whitening to data
+"""
+    transform(f, x)
 
-function fit(::Type{Whitening}, X::DenseMatrix{T};
+Apply the whitening transform `f` to a vector or a matrix `x` with samples in columns, as ``\\mathbf{W}^T (\\mathbf{x} - \\boldsymbol{\\mu})``.
+"""
+function transform(f::Whitening, x::AbstractVecOrMat{<:Real})
+    s = size(x)
+    Z, dims = if length(s) == 1
+        length(f.mean) == s[1] || throw(DimensionMismatch("Inconsistent dimensions."))
+        x - f.mean, 2
+    else
+        dims = (s[1] == length(f.mean)) + 1
+        length(f.mean) == s[3-dims] || throw(DimensionMismatch("Inconsistent dimensions."))
+        x .- (dims == 2 ? f.mean : transpose(f.mean)), dims
+    end
+    if dims == 2
+        transpose(f.W) * Z
+    else
+        Z * f.W
+    end
+end
+
+"""
+    fit(::Type{Whitening},  X::AbstractMatrix{T}; kwargs...)
+
+Estimate a whitening transform from the data given in `X`.
+
+This function returns an instance of [`Whitening`](@ref)
+
+**Keyword Arguments:**
+- `regcoef`: The regularization coefficient. The covariance will be regularized as follows when `regcoef` is positive `C + (eigmax(C) * regcoef) * eye(d)`. Default values is `zero(T)`.
+
+- `dims`: if `1` the transformation calculated from the row samples. fit standardization parameters in column-wise fashion;
+  if `2` the transformation calculated from the column samples. The default is `nothing`, which is equivalent to `dims=2` with a deprecation warning.
+
+- `mean`: The mean vector, which can be either of:
+    - `0`: the input data has already been centralized
+    - `nothing`: this function will compute the mean (**default**)
+    - a pre-computed mean vector
+
+**Note:** This function internally relies on [`cov_whitening`](@ref) to derive the transformation `W`.
+"""
+function fit(::Type{Whitening}, X::AbstractMatrix{T};
+             dims::Union{Integer,Nothing}=nothing,
              mean=nothing, regcoef::Real=zero(T)) where {T<:Real}
-    n = size(X, 2)
-    n > 1 || error("X must contain more than one sample.")
-    mv = preprocess_mean(X, mean)
-    Z = centralize(X, mv)
+    if dims === nothing
+        Base.depwarn("fit(Whitening, x) is deprecated: use fit(Whitening, x, dims=2) instead", :fit)
+        dims = 2
+    end
+    if dims == 1
+        n = size(X,1)
+        n >= 2 || error("X must contain at least two rows.")
+    elseif dims == 2
+        n = size(X, 2)
+        n >= 2 || error("X must contain at least two columns.")
+    else
+        throw(DomainError(dims, "fit only accept dims to be 1 or 2."))
+    end
+    mv = preprocess_mean(X, mean; dims=dims)
+    Z = centralize((dims==1 ? transpose(X) : X), mv)
     C = rmul!(Z * transpose(Z), one(T) / (n - 1))
     return Whitening(mv, cov_whitening!(C, regcoef))
 end
 
 # invsqrtm
 
-function _invsqrtm!(C::Matrix{<:Real})
+function _invsqrtm!(C::AbstractMatrix{<:Real})
     n = size(C, 1)
     size(C, 2) == n || error("C must be a square matrix.")
     E = eigen!(Symmetric(C))
@@ -64,4 +158,9 @@ function _invsqrtm!(C::Matrix{<:Real})
     return U * transpose(U)
 end
 
-invsqrtm(C::DenseMatrix{<:Real}) = _invsqrtm!(copy(C))
+"""
+    invsqrtm(C)
+
+Compute `inv(sqrtm(C))` through symmetric eigenvalue decomposition.
+"""
+invsqrtm(C::AbstractMatrix{<:Real}) = _invsqrtm!(copy(C))
diff --git a/test/whiten.jl b/test/whiten.jl
index 69f296e..fb6209c 100644
--- a/test/whiten.jl
+++ b/test/whiten.jl
@@ -1,5 +1,5 @@
 using MultivariateStats
-using LinearAlgebra
+using LinearAlgebra, StatsBase, SparseArrays
 using Test
 import Statistics: mean, cov
 import Random
@@ -55,6 +55,8 @@ import Random
     W = f.W
     @test isa(f, Whitening{Float64})
     @test mean(f) === f.mean
+    @test length(f) == d
+    @test size(f) == (d,d)
     @test istriu(W)
     @test W'C * W ≈ Matrix(I, d, d)
     @test transform(f, X) ≈ W' * (X .- f.mean)
@@ -92,4 +94,25 @@ import Random
     # type consistency
     @test eltype(mean(M)) == Float64
     @test eltype(mean(MM)) == Float32
+
+    # sparse arrays
+    SX = sprand(Float32, d, n, 0.75)
+    SM = fit(Whitening, SX; mean=sprand(Float32, 3, 0.75))
+    Y = transform(SM, SX)
+    @test eltype(Y) == Float32
+
+    # different dimensions
+    @test_throws DomainError fit(Whitening, X'; dims=3)
+    M1 = fit(Whitening, X'; dims=1)
+    M2 = fit(Whitening, X; dims=2)
+    @test M1.W == M2.W
+    @test_throws DimensionMismatch transform(M1, rand(6,4))
+    @test_throws DimensionMismatch transform(M2, rand(4,6))
+    Y1 = transform(M1,X')
+    Y2 = transform(M2,X)
+    @test Y1' == Y2
+    @test_throws DimensionMismatch transform(M1, rand(7))
+    V1 = transform(M1,X[:,1])
+    V2 = transform(M2,X[:,1])
+    @test V1 == V2
 end