Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics #20235

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion superset/utils/pandas_postprocessing/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import numpy as np
from flask_babel import gettext as _
from pandas import DataFrame, Series
from pandas import DataFrame, Series, to_numeric

from superset.exceptions import InvalidPostProcessingError
from superset.utils.core import PostProcessingBoxplotWhiskerType
Expand Down Expand Up @@ -122,4 +122,11 @@ def outliers(series: Series) -> Set[float]:
for operator_name, operator in operators.items()
for metric in metrics
}

# nanpercentile needs numeric values, otherwise the isnan function
# that's used in the underlying function will fail
for column in metrics:
if df.dtypes[column] == np.object:
df[column] = to_numeric(df[column], errors="coerce")

return aggregate(df, groupby=groupby, aggregates=aggregates)
25 changes: 25 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,28 @@ def test_boxplot_percentile_incorrect_params():
metrics=["cars"],
percentiles=[10, 90, 10],
)


def test_boxplot_type_coercion():
df = names_df
df["cars"] = df["cars"].astype(str)
df = boxplot(
df=df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.TUKEY,
metrics=["cars"],
)

columns = {column for column in df.columns}
assert columns == {
"cars__mean",
"cars__median",
"cars__q1",
"cars__q3",
"cars__max",
"cars__min",
"cars__count",
"cars__outliers",
"region",
}
assert len(df) == 4