diff --git a/src/array.jl b/src/array.jl index 40b34998..ffbf66b8 100644 --- a/src/array.jl +++ b/src/array.jl @@ -753,14 +753,29 @@ end leveltype(::Type{T}) where {T <: CategoricalArray} = leveltype(nonmissingtype(eltype(T))) """ - levels(x::CategoricalArray) + levels(x::CategoricalArray; skipmissing=true) levels(x::CategoricalValue) Return the levels of categorical array or value `x`. This may include levels which do not actually appear in the data (see [`droplevels!`](@ref)). +`missing` will be included only if it appears in the data and +`skipmissing=false` is passed. + +The returned vector is an internal field of `x` which must not be mutated +as doing so would corrupt it. """ -DataAPI.levels(A::CategoricalArray) = levels(A.pool) +@inline function DataAPI.levels(A::CatArrOrSub{T}; skipmissing::Bool=true) where T + if eltype(A) >: Missing && !skipmissing + if any(==(0), refs(A)) + T[levels(pool(A)); missing] + else + convert(Vector{T}, levels(pool(A))) + end + else + levels(pool(A)) + end +end """ levels!(A::CategoricalArray, newlevels::Vector; allowmissing::Bool=false) diff --git a/src/subarray.jl b/src/subarray.jl index 00b38480..3e5f3f39 100644 --- a/src/subarray.jl +++ b/src/subarray.jl @@ -1,6 +1,5 @@ # delegate methods for SubArrays to support view -DataAPI.levels(sa::SubArray{T,N,P}) where {T,N,P<:CategoricalArray} = levels(parent(sa)) isordered(sa::SubArray{T,N,P}) where {T,N,P<:CategoricalArray} = isordered(parent(sa)) # This method cannot support allowmissing=true since that would modify the parent levels!(sa::SubArray{T,N,P}, newlevels::Vector) where {T,N,P<:CategoricalArray} = diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index aed6ad86..20d61ef0 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -2260,4 +2260,32 @@ end Vector{CategoricalVector{<:Any, <:Integer, <:Any, <:Any, Union{}}} end +@testset "levels with skipmissing argument" begin + for x in (categorical(["a", "b", "a"], levels=["b", "c", "a"]), + view(categorical(["c", "b", "a"], levels=["b", "c", "a"]), 2:3)) + @test @inferred(levels(x)) == ["b", "c", "a"] + @test @inferred(levels(x, skipmissing=true)) == ["b", "c", "a"] + @test @inferred(levels(x, skipmissing=false)) == ["b", "c", "a"] + end + + for x in (categorical(Union{String, Missing}["a", "b", "a"], levels=["b", "c", "a"]), + view(categorical(Union{String, Missing}["c", "b", "a"], levels=["b", "c", "a"]), 2:3), + view(categorical(Union{String, Missing}[missing, "b", "a"], levels=["b", "c", "a"]), 2:3)) + @test @inferred(levels(x)) == ["b", "c", "a"] + @test levels(x, skipmissing=true) == ["b", "c", "a"] + @test levels(x, skipmissing=true) isa Vector{String} + @test levels(x, skipmissing=false) == ["b", "c", "a"] + @test levels(x, skipmissing=false) isa Vector{Union{String, Missing}} + end + + for x in (categorical(Union{String, Missing}["a", "b", missing], levels=["b", "c", "a"]), + view(categorical(Union{String, Missing}["c", "b", missing], levels=["b", "c", "a"]), 2:3)) + @test @inferred(levels(x)) == ["b", "c", "a"] + @test levels(x, skipmissing=true) == ["b", "c", "a"] + @test levels(x, skipmissing=true) isa Vector{String} + @test levels(x, skipmissing=false) ≅ ["b", "c", "a", missing] + @test levels(x, skipmissing=false) isa Vector{Union{String, Missing}} + end +end + end