Skip to content

Commit

Permalink
fix(api): Return most native type for metrics in EstimatorReport (#1283)
Browse files Browse the repository at this point in the history
closes #1275 

Make metric methods from `EstimatorReport` return the most native type
possible: `float`, `dict` or numpy array.
  • Loading branch information
glemaitre authored Feb 10, 2025
1 parent 0a2b82a commit 24b2bc9
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 269 deletions.
43 changes: 15 additions & 28 deletions examples/model_evaluation/plot_estimator_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
# Loading our dataset and defining our estimator
# ==============================================
#
# First, we load a dataset from skrub. Our goal is to predict if a company paid a physician. The ultimate goal is to
# detect potential conflict of interest when it comes to the actual problem that we want to solve.
# First, we load a dataset from skrub. Our goal is to predict if a company paid a
# physician. The ultimate goal is to detect potential conflict of interest when it comes
# to the actual problem that we want to solve.

# %%
from skrub.datasets import fetch_open_payments
Expand Down Expand Up @@ -79,15 +80,12 @@
report = EstimatorReport(
estimator, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test
)
report

# %%
#
# Once the report is created, we get some information regarding the available tools
# allowing us to get some insights from our specific model on our specific task.
#
# We can get a similar information if we call the :meth:`~skore.EstimatorReport.help`
# method.
# allowing us to get some insights from our specific model on our specific task by
# calling the :meth:`~skore.EstimatorReport.help` method.
report.help()

# %%
Expand Down Expand Up @@ -257,10 +255,7 @@ def operational_decision_cost(y_true, y_pred, amount):
# We can now compute the cost of our operational decision.
start = time.time()
cost = report.metrics.custom_metric(
metric_function=operational_decision_cost,
metric_name="Operational Decision Cost",
response_method="predict",
amount=amount,
metric_function=operational_decision_cost, response_method="predict", amount=amount
)
end = time.time()
cost
Expand All @@ -276,10 +271,7 @@ def operational_decision_cost(y_true, y_pred, amount):
# %%
start = time.time()
cost = report.metrics.custom_metric(
metric_function=operational_decision_cost,
metric_name="Operational Decision Cost",
response_method="predict",
amount=amount,
metric_function=operational_decision_cost, response_method="predict", amount=amount
)
end = time.time()
cost
Expand All @@ -294,12 +286,9 @@ def operational_decision_cost(y_true, y_pred, amount):
# the predictions.
report.metrics.report_metrics(
scoring=["precision", "recall", operational_decision_cost],
scoring_names=["Precision", "Recall", "Operational Decision Cost"],
pos_label=pos_label,
scoring_kwargs={
"amount": amount,
"response_method": "predict",
"metric_name": "Operational Decision Cost",
},
scoring_kwargs={"amount": amount, "response_method": "predict"},
)

# %%
Expand All @@ -310,16 +299,14 @@ def operational_decision_cost(y_true, y_pred, amount):
# function.
from sklearn.metrics import make_scorer, f1_score

f1_scorer = make_scorer(
f1_score, response_method="predict", metric_name="F1 Score", pos_label=pos_label
)
f1_scorer = make_scorer(f1_score, response_method="predict", pos_label=pos_label)
operational_decision_cost_scorer = make_scorer(
operational_decision_cost,
response_method="predict",
metric_name="Operational Decision Cost",
amount=amount,
operational_decision_cost, response_method="predict", amount=amount
)
report.metrics.report_metrics(
scoring=[f1_scorer, operational_decision_cost_scorer],
scoring_names=["F1 Score", "Operational Decision Cost"],
)
report.metrics.report_metrics(scoring=[f1_scorer, operational_decision_cost_scorer])

# %%
#
Expand Down
1 change: 1 addition & 0 deletions skore/src/skore/persistence/view/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class View:
Examples
--------
>>> from skore.persistence.view import View
>>> View(layout=["a", "b"])
View(...)
"""
Expand Down
105 changes: 55 additions & 50 deletions skore/src/skore/sklearn/_cross_validation/metrics_accessor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import joblib
import numpy as np
import pandas as pd
from sklearn.metrics import make_scorer
from sklearn.utils.metaestimators import available_if

from skore.externals._pandas_accessors import DirNamesMixin
Expand All @@ -20,17 +21,17 @@ class _MetricsAccessor(_BaseAccessor, DirNamesMixin):
You can access this accessor using the `metrics` attribute.
"""

_SCORE_OR_LOSS_ICONS = {
"accuracy": "(↗︎)",
"precision": "(↗︎)",
"recall": "(↗︎)",
"brier_score": "(↘︎)",
"roc_auc": "(↗︎)",
"log_loss": "(↘︎)",
"r2": "(↗︎)",
"rmse": "(↘︎)",
"report_metrics": "",
"custom_metric": "",
_SCORE_OR_LOSS_INFO = {
"accuracy": {"name": "Accuracy", "icon": "(↗︎)"},
"precision": {"name": "Precision", "icon": "(↗︎)"},
"recall": {"name": "Recall", "icon": "(↗︎)"},
"brier_score": {"name": "Brier score", "icon": "(↘︎)"},
"roc_auc": {"name": "ROC AUC", "icon": "(↗︎)"},
"log_loss": {"name": "Log loss", "icon": "(↘︎)"},
"r2": {"name": "R²", "icon": "(↗︎)"},
"rmse": {"name": "RMSE", "icon": "(↘︎)"},
"custom_metric": {"name": "Custom metric", "icon": ""},
"report_metrics": {"name": "Report metrics", "icon": ""},
}

def __init__(self, parent):
Expand Down Expand Up @@ -210,8 +211,8 @@ def accuracy(self, *, data_source="test", aggregate=None):
LogisticRegression Split #0 0.94...
Split #1 0.94...
"""
return self._compute_metric_scores(
report_metric_name="accuracy",
return self.report_metrics(
scoring=["accuracy"],
data_source=data_source,
aggregate=aggregate,
)
Expand Down Expand Up @@ -285,16 +286,16 @@ def precision(
>>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2)
>>> report.metrics.precision()
Metric Precision (↗︎)
Class label 0 1
Label / Average 0 1
LogisticRegression Split #0 0.96... 0.93...
Split #1 0.90... 0.96...
"""
return self._compute_metric_scores(
report_metric_name="precision",
return self.report_metrics(
scoring=["precision"],
data_source=data_source,
aggregate=aggregate,
average=average,
pos_label=pos_label,
scoring_kwargs={"average": average},
)

@available_if(
Expand Down Expand Up @@ -367,16 +368,16 @@ def recall(
>>> report = CrossValidationReport(classifier, X=X, y=y, cv_splitter=2)
>>> report.metrics.recall()
Metric Recall (↗︎)
Class label 0 1
Label / Average 0 1
LogisticRegression Split #0 0.87... 0.98...
Split #1 0.94... 0.94...
"""
return self._compute_metric_scores(
report_metric_name="recall",
return self.report_metrics(
scoring=["recall"],
data_source=data_source,
aggregate=aggregate,
average=average,
pos_label=pos_label,
scoring_kwargs={"average": average},
)

@available_if(
Expand Down Expand Up @@ -414,8 +415,8 @@ def brier_score(self, *, data_source="test", aggregate=None):
LogisticRegression Split #0 0.04...
Split #1 0.04...
"""
return self._compute_metric_scores(
report_metric_name="brier_score",
return self.report_metrics(
scoring=["brier_score"],
data_source=data_source,
aggregate=aggregate,
)
Expand Down Expand Up @@ -443,8 +444,7 @@ def roc_auc(
- "test" : use the test set provided when creating the report.
- "train" : use the train set provided when creating the report.
average : {"auto", "macro", "micro", "weighted", "samples"}, \
default=None
average : {"macro", "micro", "weighted", "samples"}, default=None
Average to compute the ROC AUC score in a multiclass setting. By default,
no average is computed. Otherwise, this determines the type of averaging
performed on the data.
Expand Down Expand Up @@ -498,12 +498,11 @@ def roc_auc(
LogisticRegression Split #0 0.99...
Split #1 0.98...
"""
return self._compute_metric_scores(
report_metric_name="roc_auc",
return self.report_metrics(
scoring=["roc_auc"],
data_source=data_source,
aggregate=aggregate,
average=average,
multi_class=multi_class,
scoring_kwargs={"average": average, "multi_class": multi_class},
)

@available_if(
Expand Down Expand Up @@ -543,8 +542,8 @@ def log_loss(self, *, data_source="test", aggregate=None):
LogisticRegression Split #0 0.1...
Split #1 0.1...
"""
return self._compute_metric_scores(
report_metric_name="log_loss",
return self.report_metrics(
scoring=["log_loss"],
data_source=data_source,
aggregate=aggregate,
)
Expand Down Expand Up @@ -598,11 +597,11 @@ def r2(
Ridge Split #0 0.36...
Split #1 0.39...
"""
return self._compute_metric_scores(
report_metric_name="r2",
return self.report_metrics(
scoring=["r2"],
data_source=data_source,
aggregate=aggregate,
multioutput=multioutput,
scoring_kwargs={"multioutput": multioutput},
)

@available_if(_check_supported_ml_task(supported_ml_tasks=["regression"]))
Expand Down Expand Up @@ -654,11 +653,11 @@ def rmse(
Ridge Split #0 59.9...
Split #1 61.4...
"""
return self._compute_metric_scores(
report_metric_name="rmse",
return self.report_metrics(
scoring=["rmse"],
data_source=data_source,
aggregate=aggregate,
multioutput=multioutput,
scoring_kwargs={"multioutput": multioutput},
)

def custom_metric(
Expand Down Expand Up @@ -732,15 +731,20 @@ def custom_metric(
Ridge Split #0 50.1...
Split #1 52.6...
"""
return self._compute_metric_scores(
report_metric_name="custom_metric",
data_source=data_source,
aggregate=aggregate,
metric_function=metric_function,
# create a scorer with `greater_is_better=True` to not alter the output of
# `metric_function`
scorer = make_scorer(
metric_function,
greater_is_better=True,
response_method=response_method,
metric_name=metric_name,
**kwargs,
)
return self.report_metrics(
scoring=[scorer],
data_source=data_source,
aggregate=aggregate,
scoring_names=[metric_name],
)

####################################################################################
# Methods related to the help tree
Expand Down Expand Up @@ -768,15 +772,16 @@ def _format_method_name(self, name):
"""Override format method for metrics-specific naming."""
method_name = f"{name}(...)"
method_name = method_name.ljust(22)
if name in self._SCORE_OR_LOSS_ICONS and self._SCORE_OR_LOSS_ICONS[name] in (
"(↗︎)",
"(↘︎)",
):
if self._SCORE_OR_LOSS_ICONS[name] == "(↗︎)":
method_name += f"[cyan]{self._SCORE_OR_LOSS_ICONS[name]}[/cyan]"
if name in self._SCORE_OR_LOSS_INFO and self._SCORE_OR_LOSS_INFO[name][
"icon"
] in ("(↗︎)", "(↘︎)"):
if self._SCORE_OR_LOSS_INFO[name]["icon"] == "(↗︎)":
method_name += f"[cyan]{self._SCORE_OR_LOSS_INFO[name]['name']}[/cyan]"
return method_name.ljust(43)
else: # (↘︎)
method_name += f"[orange1]{self._SCORE_OR_LOSS_ICONS[name]}[/orange1]"
method_name += (
f"[orange1]{self._SCORE_OR_LOSS_INFO[name]['name']}[/orange1]"
)
return method_name.ljust(49)
else:
return method_name.ljust(29)
Expand Down
Loading

0 comments on commit 24b2bc9

Please # to comment.