Skip to content

Commit

Permalink
Add sort parameter to points model (#672)
Browse files Browse the repository at this point in the history
add sort parameter to points model
  • Loading branch information
LucaMarconato authored Aug 12, 2024
1 parent 2e1347d commit cf16027
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions src/spatialdata/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,31 +656,35 @@ def _(
)
ndim = len(coordinates)
axes = [X, Y, Z][:ndim]
index_monotonically_increasing = data.index.is_monotonic_increasing
if not isinstance(index_monotonically_increasing, bool):
index_monotonically_increasing = index_monotonically_increasing.compute()
if not index_monotonically_increasing:
if "sort" not in kwargs:
index_monotonically_increasing = data.index.is_monotonic_increasing
if not isinstance(index_monotonically_increasing, bool):
index_monotonically_increasing = index_monotonically_increasing.compute()
sort = index_monotonically_increasing
else:
sort = kwargs["sort"]
if not sort:
warnings.warn(
"The index of the dataframe is not monotonic increasing. It is recommended to sort the data to "
"adjust the order of the index before calling .parse() to avoid possible problems due to unknown "
"divisions",
"adjust the order of the index before calling .parse() (or call `parse(sort=True)`) to avoid possible "
"problems due to unknown divisions.",
UserWarning,
stacklevel=2,
)
if isinstance(data, pd.DataFrame):
table: DaskDataFrame = dd.from_pandas( # type: ignore[attr-defined]
pd.DataFrame(data[[coordinates[ax] for ax in axes]].to_numpy(), columns=axes, index=data.index),
# we need to pass sort=True also when the index is sorted to ensure that the divisions are computed
sort=index_monotonically_increasing,
sort=sort,
**kwargs,
)
# we cannot compute the divisions whne the index is not monotonically increasing and npartitions > 1
if not table.known_divisions and (index_monotonically_increasing or table.npartitions == 1):
if not table.known_divisions and (sort or table.npartitions == 1):
table.divisions = table.compute_current_divisions()
if feature_key is not None:
feature_categ = dd.from_pandas(
data[feature_key].astype(str).astype("category"),
sort=index_monotonically_increasing,
sort=sort,
**kwargs,
) # type: ignore[attr-defined]
table[feature_key] = feature_categ
Expand Down

0 comments on commit cf16027

Please # to comment.