probabl-ai · auguste-probabl · Aug 19, 2025 · Aug 19, 2025 · Aug 19, 2025 · Aug 21, 2025
diff --git a/skore/src/skore/_sklearn/_comparison/metrics_accessor.py b/skore/src/skore/_sklearn/_comparison/metrics_accessor.py
@@ -187,6 +187,8 @@ def _compute_metric_scores(
         aggregate: Aggregate | None = ("mean", "std"),
         **metric_kwargs: Any,
     ):
+        assert report_metric_name == "summarize"
+
         # build the cache key components to finally create a tuple that will be used
         # to check if the metric has already been computed
         cache_key_parts: list[Any] = [
@@ -232,11 +234,34 @@ def _compute_metric_scores(
                 data_source=data_source,
                 X=X,
                 y=y,
+                # To avoid crashing if one of the sub-reports does not support a metric
+                on_unavailable_metric="nan",
                 **metric_kwargs,
             )
             if self._parent._reports_type == "CrossValidationReport":
                 kwargs["aggregate"] = None
 
+            # Check whether metrics are available in sub-reports
+            # FIXME: These checks are already done by the sub-reports
+
+            if isinstance(kwargs["scoring"], str):
+                scorings = [kwargs["scoring"]]
+            elif kwargs["scoring"] is None or callable(kwargs["scoring"]):
+                scorings = []  # No processing needed
+            else:
+                scorings = kwargs["scoring"]
+
+            for scoring in scorings:
+                if isinstance(scoring, str):
+                    if any(
+                        hasattr(report.metrics, scoring)
+                        for report in self._parent.reports_
+                    ):
+                        continue
+                    raise ValueError(
+                        f"None of the compared reports support metric '{scoring}'"
+                    )
+
             generator = parallel(
                 joblib.delayed(getattr(report.metrics, report_metric_name))(**kwargs)
                 for report in self._parent.reports_

diff --git a/skore/src/skore/_sklearn/_comparison/utils.py b/skore/src/skore/_sklearn/_comparison/utils.py
@@ -328,7 +328,11 @@ def sort_by_split(df: pd.DataFrame) -> pd.DataFrame:
             values="Value",
             aggfunc=aggregate,
             sort=False,
+            dropna=False,
         )
+
+        # Remove NaN rows
+        df = df.dropna(axis="index", how="all")
     else:
         df = sort_by_split(df)
         df = df.set_index(list(df.columns.drop("Value")))

diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py
@@ -62,6 +62,7 @@ def summarize(
         indicator_favorability: bool = False,
         flat_index: bool = False,
         aggregate: Aggregate | None = ("mean", "std"),
+        on_unavailable_metric: Literal["raise", "nan"] = "raise",
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for our estimator.
 
@@ -125,6 +126,11 @@ class is set to the one provided when creating the report. If `None`,
             Function to aggregate the scores across the cross-validation splits.
             None will return the scores for each split.
 
+        on_unavailable_metric : {"raise", "nan"}, default="raise"
+            Whether to raise or return `numpy.nan` when the metric cannot be computed.
+            For example, "brier_score" cannot be computed for estimators without a
+            `predict_proba` method.
+
         Returns
         -------
         MetricsSummaryDisplay
@@ -163,6 +169,7 @@ class is set to the one provided when creating the report. If `None`,
             scoring_kwargs=scoring_kwargs,
             scoring_names=scoring_names,
             indicator_favorability=indicator_favorability,
+            on_unavailable_metric=on_unavailable_metric,
         )
         if flat_index:
             if isinstance(results.columns, pd.MultiIndex):
@@ -184,6 +191,7 @@ def _compute_metric_scores(
         X: ArrayLike | None = None,
         y: ArrayLike | None = None,
         aggregate: Aggregate | None = None,
+        on_unavailable_metric: Literal["raise", "nan"] = "raise",
         **metric_kwargs: Any,
     ) -> pd.DataFrame:
         if data_source == "X_y":
@@ -231,7 +239,11 @@ def _compute_metric_scores(
             )
             generator = parallel(
                 delayed(getattr(report.metrics, report_metric_name))(
-                    data_source=data_source, X=X, y=y, **metric_kwargs
+                    data_source=data_source,
+                    X=X,
+                    y=y,
+                    on_unavailable_metric=on_unavailable_metric,
+                    **metric_kwargs,
                 )
                 for report in self._parent.estimator_reports_
             )

diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py
@@ -44,6 +44,14 @@
 DataSource = Literal["test", "train", "X_y"]
 
 
+def _nan(*args, **kwargs):
+    """Return NaN.
+
+    Used when metric is not known and on_unavailable_metric is set to "nan".
+    """
+    return np.nan
+
+
 class _MetricsAccessor(
     _BaseMetricsAccessor, _BaseAccessor["EstimatorReport"], DirNamesMixin
 ):
@@ -72,6 +80,7 @@ def summarize(
         pos_label: PositiveLabel | None = _DEFAULT,
         indicator_favorability: bool = False,
         flat_index: bool = False,
+        on_unavailable_metric: Literal["raise", "nan"] = "raise",
     ) -> MetricsSummaryDisplay:
         """Report a set of metrics for our estimator.
 
@@ -130,6 +139,11 @@ class is set to the one provided when creating the report. If `None`,
             Whether to flatten the multi-index columns. Flat index will always be lower
             case, do not include spaces and remove the hash symbol to ease indexing.
 
+        on_unavailable_metric : {"raise", "nan"}, default="raise"
+            Whether to raise or return `numpy.nan` when the metric cannot be computed.
+            For example, "brier_score" cannot be computed for estimators without a
+            `predict_proba` method.
+
         Returns
         -------
         MetricsSummaryDisplay
@@ -288,7 +302,16 @@ class is set to the one provided when creating the report. If `None`,
 
                     # Handle built-in metrics (without underscore prefix)
                     elif metric in self._score_or_loss_info:
-                        metric_fn = getattr(self, f"_{metric}")
+                        try:
+                            metric_fn = getattr(self, f"_{metric}")
+                        except AttributeError as e:
+                            if on_unavailable_metric == "nan":
+                                metric_fn = _nan  # type: ignore
+                            else:
+                                raise AttributeError(
+                                    f"This estimator does not support metric '{metric}'"
+                                ) from e
+
                         metrics_kwargs = {"data_source_hash": data_source_hash}
                         if metric_name is None:
                             metric_name = f"{self._score_or_loss_info[metric]['name']}"

diff --git a/skore/tests/unit/reports/comparison/test_metrics.py b/skore/tests/unit/reports/comparison/test_metrics.py
@@ -2,8 +2,10 @@
 Common test for the metrics accessor of a ComparisonReport.
 """
 
+import numpy as np
 import pytest
 from sklearn.datasets import make_classification
+from sklearn.dummy import DummyClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import LinearSVC
 from skore import ComparisonReport, CrossValidationReport, EstimatorReport
@@ -46,3 +48,158 @@ def test_favorability_undefined_metrics(report):
     expected_values = {"(↗︎)", "(↘︎)"}
     actual_values = set(metrics_df["Favorability"].to_numpy())
     assert actual_values.issubset(expected_values)
+
+
+class TestUnsupportedMetric:
+    """Test the behaviour of ComparisonReport metrics when some or none of the compared
+    reports support the requested metric.
+
+    Originates from <https://github.com/probabl-ai/skore/issues/1473>
+    """
+
+    @pytest.fixture
+    def estimator_reports_no_brier_score(self, binary_classification_train_test_split):
+        """Reports where no estimator supports Brier score."""
+        X_train, X_test, y_train, y_test = binary_classification_train_test_split
+        report_1 = EstimatorReport(
+            LinearSVC(),
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
+        )
+        report_2 = EstimatorReport(
+            LinearSVC(),
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
+        )
+        return report_1, report_2
+
+    @pytest.fixture
+    def estimator_reports_some_brier_score(
+        self, binary_classification_train_test_split
+    ):
+        """Reports where some estimator supports Brier score."""
+        X_train, X_test, y_train, y_test = binary_classification_train_test_split
+        report_1 = EstimatorReport(
+            DummyClassifier(strategy="uniform", random_state=0),
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
+        )
+        report_2 = EstimatorReport(
+            LinearSVC(),
+            X_train=X_train,
+            y_train=y_train,
+            X_test=X_test,
+            y_test=y_test,
+        )
+        return report_1, report_2
+
+    @pytest.fixture
+    def cv_reports_no_brier_score(self, binary_classification_data):
+        """Reports where no estimator supports Brier score."""
+        X, y = binary_classification_data
+        report_1 = CrossValidationReport(LinearSVC(), X, y)
+        report_2 = CrossValidationReport(LinearSVC(), X, y)
+        return report_1, report_2
+
+    @pytest.fixture
+    def cv_reports_some_brier_score(self, binary_classification_data):
+        """Reports where some estimator supports Brier score."""
+        X, y = binary_classification_data
+        report_1 = CrossValidationReport(
+            DummyClassifier(strategy="uniform", random_state=0), X, y
+        )
+        report_2 = CrossValidationReport(LinearSVC(), X, y)
+        return report_1, report_2
+
+    @pytest.mark.parametrize(
+        "compared_report", [EstimatorReport, CrossValidationReport]
+    )
+    def test_no_report_supports_metric(
+        self,
+        compared_report,
+        estimator_reports_no_brier_score,
+        cv_reports_no_brier_score,
+    ):
+        """If you call Brier score and none of the sub-reports support it,
+        you should get an AttributeError."""
+        if compared_report is EstimatorReport:
+            report_1, report_2 = estimator_reports_no_brier_score
+        else:
+            report_1, report_2 = cv_reports_no_brier_score
+        report = ComparisonReport([report_1, report_2])
+
+        with pytest.raises(AttributeError):
+            report.metrics.brier_score()
+
+    @pytest.mark.parametrize(
+        "compared_report", [EstimatorReport, CrossValidationReport]
+    )
+    def test_summarize_no_report_supports_metric(
+        self,
+        compared_report,
+        estimator_reports_no_brier_score,
+        cv_reports_no_brier_score,
+    ):
+        """If you call `summarize` with Brier score and none of the sub-reports support
+        it, you should get an AttributeError"""
+        if compared_report is EstimatorReport:
+            report_1, report_2 = estimator_reports_no_brier_score
+        else:
+            report_1, report_2 = cv_reports_no_brier_score
+        report = ComparisonReport([report_1, report_2])
+
+        with pytest.raises(ValueError):
+            report.metrics.summarize(scoring="brier_score")
+
+    @pytest.mark.parametrize(
+        "compared_report", [EstimatorReport, CrossValidationReport]
+    )
+    def test_some_reports_support_metric(
+        self,
+        compared_report,
+        estimator_reports_some_brier_score,
+        cv_reports_some_brier_score,
+    ):
+        """If you call `brier_score` and some of the sub-reports support it,
+        you should get a dataframe with NaN"""
+        if compared_report is EstimatorReport:
+            report_1, report_2 = estimator_reports_some_brier_score
+        else:
+            report_1, report_2 = cv_reports_some_brier_score
+        report = ComparisonReport([report_1, report_2])
+        summary = report.metrics.brier_score()
+
+        if compared_report is EstimatorReport:
+            assert np.isnan(summary.loc["Brier score"]["LinearSVC"])
+        else:
+            assert np.isnan(summary.loc["Brier score"][("mean", "LinearSVC")])
+
+    @pytest.mark.parametrize(
+        "compared_report", [EstimatorReport, CrossValidationReport]
+    )
+    def test_summarize_some_reports_support_metric(
+        self,
+        compared_report,
+        estimator_reports_some_brier_score,
+        cv_reports_some_brier_score,
+    ):
+        """If you call `summarize` with Brier score and some of the sub-reports
+        support it, you should get a dataframe with NaN"""
+        if compared_report is EstimatorReport:
+            report_1, report_2 = estimator_reports_some_brier_score
+        else:
+            report_1, report_2 = cv_reports_some_brier_score
+
+        report = ComparisonReport([report_1, report_2])
+        summary = report.metrics.summarize(scoring="brier_score")
+
+        if compared_report is EstimatorReport:
+            assert np.isnan(summary.frame().loc["Brier score"]["LinearSVC"])
+        else:
+            assert np.isnan(summary.frame().loc["Brier score"][("mean", "LinearSVC")])