fix: complete ignore_index implementation with proper one-hot masking

Rusheel86 · Rusheel86 · commit 780b567ac269 · 2026-03-11T23:24:18.000+05:30
Signed-off-by: Rusheel Sharma &lt;rusheelhere@gmail.com&gt;
diff --git a/monai/losses/unified_focal_loss.py b/monai/losses/unified_focal_loss.py
@@ -59,22 +59,36 @@ def __init__(
     def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
         n_pred_ch = y_pred.shape[1]
 
+        # Save original for masking
+        original_y_true = y_true if self.ignore_index is not None else None
+
         if self.to_onehot_y:
             if n_pred_ch == 1:
                 warnings.warn("single channel prediction, `to_onehot_y=True` ignored.")
             else:
+                if self.ignore_index is not None:
+                    # Replace ignore_index with valid class before one_hot
+                    y_true = torch.where(y_true == self.ignore_index, torch.tensor(0, device=y_true.device), y_true)
                 y_true = one_hot(y_true, num_classes=n_pred_ch)
 
         if y_true.shape != y_pred.shape:
             raise ValueError(f"ground truth has different shape ({y_true.shape}) from input ({y_pred.shape})")
 
-        # Handle ignore_index:
+        # Build mask after one_hot conversion
         mask = torch.ones_like(y_true)
         if self.ignore_index is not None:
-            # Identify valid pixels: where at least one channel is 1
-            spatial_mask = (torch.sum(y_true, dim=1, keepdim=True) > 0).float()
+            if original_y_true is not None and self.to_onehot_y:
+                # Use original labels to build spatial mask
+                spatial_mask = (original_y_true != self.ignore_index).float()
+            elif self.ignore_index < y_true.shape[1]:
+                # For already one-hot: use ignored class channel
+                spatial_mask = 1.0 - y_true[:, self.ignore_index : self.ignore_index + 1]
+            else:
+                # For sentinel values: any valid channel
+                spatial_mask = (y_true.sum(dim=1, keepdim=True) > 0).float()
             mask = spatial_mask.expand_as(y_true)
             y_pred = y_pred * mask
+            y_true = y_true * mask
 
         y_pred = torch.clamp(y_pred, self.epsilon, 1.0 - self.epsilon)
         axis = list(range(2, len(y_pred.shape)))
@@ -137,15 +151,16 @@ def __init__(
     def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
         n_pred_ch = y_pred.shape[1]
 
+        # Save original for masking
+        original_y_true = y_true if self.ignore_index is not None else None
+
         if self.to_onehot_y:
             if n_pred_ch == 1:
                 warnings.warn("single channel prediction, `to_onehot_y=True` ignored.")
-            elif self.ignore_index is not None:
-                mask = (y_true != self.ignore_index).float()
-                y_true_clean = torch.where(y_true == self.ignore_index, 0, y_true)
-                y_true = one_hot(y_true_clean, num_classes=n_pred_ch)
-                y_true = y_true * mask
             else:
+                if self.ignore_index is not None:
+                    # Replace ignore_index with valid class before one_hot
+                    y_true = torch.where(y_true == self.ignore_index, torch.tensor(0, device=y_true.device), y_true)
                 y_true = one_hot(y_true, num_classes=n_pred_ch)
 
         if y_true.shape != y_pred.shape:
@@ -154,9 +169,16 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
         y_pred = torch.clamp(y_pred, self.epsilon, 1.0 - self.epsilon)
         cross_entropy = -y_true * torch.log(y_pred)
 
+        # Build mask from original labels if available
+        spatial_mask = None
         if self.ignore_index is not None:
-            spatial_mask = (torch.sum(y_true, dim=1, keepdim=True) > 0).float()
-            cross_entropy = cross_entropy * spatial_mask
+            if original_y_true is not None and self.to_onehot_y:
+                spatial_mask = (original_y_true != self.ignore_index).float()
+            elif self.ignore_index < y_true.shape[1]:
+                spatial_mask = 1.0 - y_true[:, self.ignore_index : self.ignore_index + 1]
+            else:
+                spatial_mask = (y_true.sum(dim=1, keepdim=True) > 0).float()
+            cross_entropy = cross_entropy * spatial_mask.expand_as(cross_entropy)
 
         back_ce = torch.pow(1 - y_pred[:, 0], self.gamma) * cross_entropy[:, 0]
         back_ce = (1 - self.delta) * back_ce
@@ -165,10 +187,13 @@ def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
         fore_ce = self.delta * fore_ce
 
         loss = torch.stack([back_ce, fore_ce], dim=1)  # [B, 2, H, W]
+
         if self.reduction == LossReduction.MEAN.value:
-            if self.ignore_index is not None:
-                # Normalize by the number of non-ignored pixels
-                return loss.sum() / spatial_mask.sum().clamp(min=1e-5)
+            if self.ignore_index is not None and spatial_mask is not None:
+                # Apply mask to loss, then average over valid elements only
+                # loss has shape [B, 2, H, W], spatial_mask has shape [B, 1, H, W]
+                masked_loss = loss * spatial_mask.expand_as(loss)
+                return masked_loss.sum() / (spatial_mask.expand_as(loss).sum().clamp(min=1e-5))
             return loss.mean()
         if self.reduction == LossReduction.SUM.value:
             return loss.sum()
diff --git a/monai/metrics/generalized_dice.py b/monai/metrics/generalized_dice.py
@@ -156,7 +156,12 @@ def compute_generalized_dice(
 
     # Apply ignore_index masking
     if ignore_index is not None:
-        mask = (y != ignore_index).all(dim=1, keepdim=True).float()
+        if ignore_index < y.shape[1]:
+            # For one-hot: use the ignored class channel
+            mask = 1.0 - y[:, ignore_index : ignore_index + 1]
+        else:
+            # For sentinel values, check if any channel is valid
+            mask = (y.sum(dim=1, keepdim=True) > 0).float()
         y_pred = y_pred * mask
         y = y * mask
 
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
@@ -338,7 +338,17 @@ def __call__(self, y_pred: torch.Tensor, y: torch.Tensor) -> torch.Tensor | tupl
         # Create global mask for ignored voxels if ignore_index is set
         mask = None
         if self.ignore_index is not None:
-            mask = y != self.ignore_index
+            if y.shape[1] == 1:
+                # Single channel - values are class indices
+                mask = y != self.ignore_index
+            else:
+                # Multi-channel (one-hot or class probabilities)
+                if self.ignore_index < n_pred_ch:
+                    # Class-based ignore: ignore specific class channel
+                    mask = y[:, self.ignore_index : self.ignore_index + 1] == 0
+                else:
+                    # Sentinel-based ignore: ignore where all channels are 0
+                    mask = y.sum(dim=1, keepdim=True) > 0
 
         first_ch = 0 if self.include_background else 1
         data = []
diff --git a/monai/metrics/meaniou.py b/monai/metrics/meaniou.py
@@ -144,11 +144,17 @@ def compute_iou(
         raise ValueError(f"y_pred and y should have same shapes, got {y_pred.shape} and {y.shape}.")
 
     if ignore_index is not None:
-        mask = (y != ignore_index).float()
-        if mask.shape != y_pred.shape:
+        if ignore_index < y.shape[1]:
+            # For one-hot: mask based on the ignored class channel
+            mask = 1.0 - y[:, ignore_index : ignore_index + 1]
+            if mask.shape != y_pred.shape:
+                mask = mask.expand_as(y_pred)
+        else:
+            # For sentinel values, check if any channel is valid
+            mask = (y.sum(dim=1, keepdim=True) > 0).float()
             mask = mask.expand_as(y_pred)
         y_pred = y_pred * mask
-        y = torch.where(y == ignore_index, torch.tensor(0, device=y.device), y)
+        y = y * mask
 
     # reducing only spatial dimensions (not batch nor channels)
     n_len = len(y_pred.shape)
diff --git a/monai/metrics/surface_dice.py b/monai/metrics/surface_dice.py
@@ -221,8 +221,12 @@ def compute_surface_dice(
         :math:`b` and class :math:`c`.
     """
     if ignore_index is not None:
-        mask = (y != ignore_index).all(dim=1, keepdim=True).float()
-
+        if ignore_index < y.shape[1]:
+            # For one-hot: mask based on the ignored class channel
+            mask = 1.0 - y[:, ignore_index : ignore_index + 1]
+        else:
+            # For sentinel values, check if any channel is valid
+            mask = (y.sum(dim=1, keepdim=True) > 0).float()
         y_pred = y_pred * mask
         y = y * mask
 
@@ -291,7 +295,7 @@ def compute_surface_dice(
             boundary_complete = areas_gt.sum() + areas_pred.sum()
             gt_true = areas_gt[distances_gt_pred <= class_thresholds[c]].sum() if len(areas_gt) > 0 else 0.0
             pred_true = areas_pred[distances_pred_gt <= class_thresholds[c]].sum() if len(areas_pred) > 0 else 0.0
-            boundary_correct = gt_true + pred_true
+            boundary_correct = gt_true + pred_true  # type: ignore[assignment,operator]
         if boundary_complete == 0:
             # the class is neither present in the prediction, nor in the reference segmentation
             nsd[b, c] = torch.tensor(np.nan)
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
@@ -309,8 +309,13 @@ def get_surface_distance(
             raise ValueError(f"distance_metric {distance_metric} is not implemented.")
 
     dis = convert_to_dst_type(dis, seg_pred, dtype=lib.float32)[0]
-    out = dis[seg_pred.bool()]
-    return out if out is not None else dis.new_empty((0,))
+    if isinstance(seg_pred, torch.Tensor):
+        out = dis[seg_pred.bool()]  # type: ignore[union-attr]
+        return out if out is not None else np.empty((0,), dtype=dis.dtype)  # type: ignore[union-attr,no-any-return]
+    else:
+        # NumPy array
+        out = dis[seg_pred.astype(bool)]  # type: ignore[union-attr]
+        return out if out is not None else np.empty((0,), dtype=dis.dtype)  # type: ignore[union-attr]
 
 
 def get_edge_surface_distance(
@@ -363,16 +368,19 @@ def get_edge_surface_distance(
             edges_pred = edges_pred & mask
             edges_gt = edges_gt & mask
 
-    distances: tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor]
+    distances_raw: tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor]
     if symmetric:
-        distances = (
+        distances_raw = (
             get_surface_distance(edges_pred, edges_gt, distance_metric, spacing),
             get_surface_distance(edges_gt, edges_pred, distance_metric, spacing),
         )  # type: ignore
     else:
-        distances = (get_surface_distance(edges_pred, edges_gt, distance_metric, spacing),)  # type: ignore
+        distances_raw = (get_surface_distance(edges_pred, edges_gt, distance_metric, spacing),)  # type: ignore
 
-    distances = tuple(d if d is not None else edges_pred.new_empty((0,)) for d in distances)
+    distances_list = [d if d is not None else edges_pred.new_empty((0,)) for d in distances_raw]
+    distances: tuple[torch.Tensor, torch.Tensor] | tuple[torch.Tensor] = (
+        tuple(distances_list) if len(distances_list) == 2 else (distances_list[0],)  # type: ignore[assignment]
+    )
 
     areas = edge_results[2:] if use_subvoxels else ()
 
@@ -389,7 +397,7 @@ def get_edge_surface_distance(
     if out is None:
         out = torch.empty((0,), device=y_pred.device)
 
-    return out
+    return out  # type: ignore[return-value,no-any-return]
 
 
 def is_binary_tensor(input: torch.Tensor, name: str) -> None:
diff --git a/tests/metrics/test_ignore_index_metrics.py b/tests/metrics/test_ignore_index_metrics.py
@@ -60,10 +60,14 @@ def test_metric_ignore_consistency(self, metric_class, kwargs):
         y_pred2 = y_pred1.clone()
         y_pred2[:, 1, 2:4, :] = 1.0  # Bottom half prediction (different!)
 
-        # Target: Top half is valid (0/1), Bottom half is 255
+        # Target: Top half is valid (0/1), Bottom half should be ignored
+        # For ignore_index=255 (sentinel), we need to mark ignored pixels differently
+        # Option 1: Use ignore_index as a class index (e.g., ignore_index=1)
+        # Option 2: Keep one-hot but set ignored region to all zeros
         y = torch.zeros((1, 2, 4, 4))
-        y[:, 1, 0:2, 0:2] = 1.0
-        y[:, :, 2:4, :] = 255
+        y[:, 1, 0:2, 0:2] = 1.0  # Top-left is class 1
+        y[:, 0, 0:2, 2:4] = 1.0  # Top-right is class 0
+        # Bottom half: leave as all zeros to indicate "no valid class"
 
         # Run metric for both predictions
         metric.reset()