Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add skipmissing argument to levels #46

Merged
merged 6 commits into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 53 additions & 15 deletions src/DataAPI.jl
Original file line number Diff line number Diff line change
Expand Up @@ -105,29 +105,67 @@ definition.
"""
function describe end

# Sentinel type needed to make `levels` inferrable
struct _Default end

"""
levels(x)
levels(x; skipmissing=true)

Return a vector of unique values which occur or could occur in collection `x`.
`missing` values are skipped unless `skipmissing=false` is passed.

Return a vector of unique values which occur or could occur in collection `x`,
omitting `missing` even if present. Values are returned in the preferred order
for the collection, with the result of [`sort`](@ref) as a default.
Values are returned in the preferred order for the collection,
with the result of [`sort`](@ref) as a default.
If the collection is not sortable then the order of levels is unspecified.

Contrary to [`unique`](@ref), this function may return values which do not
actually occur in the data, and does not preserve their order of appearance in `x`.
"""
function levels(x)
T = Base.nonmissingtype(eltype(x))
u = unique(x)
# unique returns its input with copying for ranges
# (and possibly for other types guaranteed to hold unique values)
nmu = (u === x || Base.mightalias(u, x)) ? filter(!ismissing, u) : filter!(!ismissing, u)
levs = convert(AbstractArray{T}, nmu)
try
sort!(levs)
catch
@inline levels(x; skipmissing::Union{Bool, _Default}=_Default()) =
skipmissing isa _Default || skipmissing ?
_levels_skipmissing(x) : _levels_missing(x)

# The `which` check is here for backward compatibility:
# if a type implements a custom `levels` method but does not support
# keyword arguments, `levels(x, skipmissing=true/false)` will dispatch
# to the fallback methods here, and we take care of calling that custom method
function _levels_skipmissing(x)
if which(DataAPI.levels, Tuple{typeof(x)}) === which(DataAPI.levels, Tuple{Any})
T = Base.nonmissingtype(eltype(x))
u = unique(x)
# unique returns its input with copying for ranges
# (and possibly for other types guaranteed to hold unique values)
nmu = (u isa AbstractRange || u === x || Base.mightalias(u, x)) ?
filter(!ismissing, u) : filter!(!ismissing, u)
levs = convert(AbstractArray{T}, nmu)
try
sort!(levs)
catch
end
return levs
else
return levels(x)
end
end

function _levels_missing(x)
if which(DataAPI.levels, Tuple{typeof(x)}) === which(DataAPI.levels, Tuple{Any})
u = convert(AbstractArray{eltype(x)}, unique(x))
# unique returns its input with copying for ranges
# (and possibly for other types guaranteed to hold unique values)
levs = (x isa AbstractRange || u === x || Base.mightalias(u, x)) ?
Base.copymutable(u) : u
try
sort!(levs)
catch
end
return levs
# This is a suboptimal fallback since it does a second pass over the data
elseif any(ismissing, x)
return [levels(x); missing]
else
return convert(AbstractArray{eltype(x)}, levels(x))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this line seems not to be covered by tests

end
levs
end

"""
Expand Down
85 changes: 77 additions & 8 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
using Test, DataAPI

const ≅ = isequal

# For `levels` tests
struct TestArray{T} <: AbstractVector{T}
x::Vector{T}
end
Base.size(x::TestArray) = size(x.x)
Base.getindex(x::TestArray, i) = x.x[i]
DataAPI.levels(x::TestArray) = reverse(DataAPI.levels(x.x))

@testset "DataAPI" begin

@testset "defaultarray" begin
Expand Down Expand Up @@ -29,15 +39,15 @@ end

@testset "levels" begin

@test DataAPI.levels(1:1) ==
DataAPI.levels([1]) ==
DataAPI.levels([1, missing]) ==
DataAPI.levels([missing, 1]) ==
@test @inferred(DataAPI.levels(1:1)) ==
@inferred(DataAPI.levels([1])) ==
@inferred(DataAPI.levels([1, missing])) ==
@inferred(DataAPI.levels([missing, 1])) ==
[1]
@test DataAPI.levels(2:-1:1) ==
DataAPI.levels([2, 1]) ==
DataAPI.levels(Any[2, 1]) ==
DataAPI.levels([2, missing, 1]) ==
@test @inferred(DataAPI.levels(2:-1:1)) ==
@inferred(DataAPI.levels([2, 1])) ==
@inferred(DataAPI.levels(Any[2, 1])) ==
@inferred(DataAPI.levels([2, missing, 1])) ==
[1, 2]
@test DataAPI.levels([missing, "a", "c", missing, "b"]) == ["a", "b", "c"]
@test DataAPI.levels([Complex(0, 1), Complex(1, 0), missing]) ==
Expand All @@ -55,6 +65,65 @@ end
@test isempty(DataAPI.levels([missing]))
@test isempty(DataAPI.levels([]))

levels_missing(x) = DataAPI.levels(x, skipmissing=false)
@test @inferred(levels_missing(1:1)) ≅
@inferred(levels_missing([1])) ≅
[1]
if VERSION >= v"1.6.0"
@test @inferred(levels_missing([1, missing])) ≅
@inferred(levels_missing([missing, 1])) ≅
[1, missing]
else
@test levels_missing([1, missing]) ≅
levels_missing([missing, 1]) ≅
[1, missing]
end
@test @inferred(levels_missing(2:-1:1)) ≅
@inferred(levels_missing([2, 1])) ≅
[1, 2]
@test @inferred(levels_missing(Any[2, 1])) ≅
[1, 2]
@test DataAPI.levels([2, missing, 1], skipmissing=false) ≅
[1, 2, missing]
@test DataAPI.levels([missing, "a", "c", missing, "b"], skipmissing=false) ≅
["a", "b", "c", missing]
@test DataAPI.levels([Complex(0, 1), Complex(1, 0), missing], skipmissing=false) ≅
[Complex(0, 1), Complex(1, 0), missing]
@test typeof(DataAPI.levels([1], skipmissing=false)) === Vector{Int}
@test typeof(DataAPI.levels([1, missing], skipmissing=false)) ===
Vector{Union{Int, Missing}}
@test typeof(DataAPI.levels(["a"], skipmissing=false)) === Vector{String}
@test typeof(DataAPI.levels(["a", missing], skipmissing=false)) ===
Vector{Union{String, Missing}}
@test typeof(DataAPI.levels(Real[1], skipmissing=false)) === Vector{Real}
@test typeof(DataAPI.levels(Union{Real,Missing}[1, missing], skipmissing=false)) ===
Vector{Union{Real, Missing}}
@test typeof(DataAPI.levels(trues(1), skipmissing=false)) === Vector{Bool}
@test DataAPI.levels([missing], skipmissing=false) ≅ [missing]
@test DataAPI.levels([missing], skipmissing=false) isa Vector{Missing}
@test typeof(DataAPI.levels(Union{Int,Missing}[missing], skipmissing=false)) ===
Vector{Union{Int,Missing}}
@test isempty(DataAPI.levels([], skipmissing=false))
@test typeof(DataAPI.levels(Int[], skipmissing=false)) === Vector{Int}

# Backward compatibility test:
# check that an array type which implements a `levels` method
# which does not accept keyword arguments works thanks to fallbacks
@test DataAPI.levels(TestArray([1, 2])) ==
DataAPI.levels(TestArray([1, 2]), skipmissing=true) ==
DataAPI.levels(TestArray([1, 2]), skipmissing=false) == [2, 1]
@test DataAPI.levels(TestArray([1, 2])) isa Vector{Int}
@test DataAPI.levels(TestArray([1, 2]), skipmissing=true) isa Vector{Int}
@test DataAPI.levels(TestArray([1, 2]), skipmissing=false) isa Vector{Int}
@test DataAPI.levels(TestArray([missing, 1, 2])) ==
DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=true) == [2, 1]
@test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=false) ≅
[2, 1, missing]
@test DataAPI.levels(TestArray([missing, 1, 2])) isa Vector{Int}
@test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=true) isa
Vector{Int}
@test DataAPI.levels(TestArray([missing, 1, 2]), skipmissing=false) isa
Vector{Union{Int, Missing}}
end

@testset "Between" begin
Expand Down