From 91d04b855a37aa116b6f871c8be0a1b4ab770434 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 2 Jan 2025 11:08:24 +0100 Subject: [PATCH] fix: Fix global cat unique (#20524) --- .../src/chunked_array/logical/categorical/ops/unique.rs | 7 ++++--- py-polars/tests/unit/datatypes/test_categorical.py | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs b/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs index 6d337e3570e3..076099a9c33e 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/ops/unique.rs @@ -31,10 +31,11 @@ impl CategoricalChunked { Ok(out) } } else { + let has_nulls = (self.null_count() > 0) as u32; let mut state = match cat_map.as_ref() { RevMapping::Global(map, values, _) => { if self.is_enum() { - PrimitiveRangedUniqueState::new(0, values.len() as u32 + 1) + PrimitiveRangedUniqueState::new(0, values.len() as u32 + has_nulls) } else { let mut min = u32::MAX; let mut max = 0u32; @@ -44,11 +45,11 @@ impl CategoricalChunked { max = max.max(v); } - PrimitiveRangedUniqueState::new(min, max) + PrimitiveRangedUniqueState::new(min, max + has_nulls) } }, RevMapping::Local(values, _) => { - PrimitiveRangedUniqueState::new(0, values.len() as u32 + 1) + PrimitiveRangedUniqueState::new(0, values.len() as u32 + has_nulls) }, }; diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 505986422c55..64b789281a21 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -898,3 +898,10 @@ def test_perfect_group_by_19950() -> None: "y": ["b"], "x": ["a"], } + + +@StringCache() +def test_categorical_unique() -> None: + s = pl.Series(["a", "b", None], dtype=pl.Categorical) + assert s.n_unique() == 3 + assert s.unique().to_list() == ["a", "b", None]