fix(rtx): add WAR to fall back grouped 3D deconvolutions to PyTorch

tp5uiuc · claude · tp5uiuc · commit b6f346bbc7aa · 2026-04-15T03:02:05.000-07:00
Grouped 3D transposed convolutions (ConvTranspose3d with groups &gt; 1)
crash on TensorRT-RTX. This adds a convolution_capability_validator
that detects these ops and rejects them from TRT conversion, causing
the partitioner to keep them in PyTorch while other ops remain on TRT.

Also renames depthwise_bf16_validator to convolution_capability_validator
to reflect its broader scope, and removes the blanket skip on all 3D
deconv tests — non-grouped cases now run through TRT on RTX.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -2754,39 +2754,55 @@ def aten_ops_le(
     )
 
 
-def depthwise_bf16_validator(
+def convolution_capability_validator(
     node: Node, settings: Optional[CompilationSettings] = None
 ) -> bool:
-    """Reject depthwise conv/deconv with BF16 on TensorRT-RTX.
+    """Reject unsupported convolution variants on TensorRT-RTX.
 
-    TensorRT-RTX does not support depthwise convolutions in BF16. Returning
-    False causes the partitioner to fall back to PyTorch for these specific
-    nodes, while all other convolutions remain on TRT.
+    Falls back to PyTorch for:
+    1. Depthwise convolutions in BF16 (no kernel support on TRT-RTX).
+    2. Grouped 3D deconvolutions (crash on TRT-RTX).
     """
     if not ENABLED_FEATURES.tensorrt_rtx:
         return True
-    # Check if the input tensor is BF16 (via FX node metadata)
-    input_node = node.args[0]
-    input_meta = getattr(input_node, "meta", {}).get("tensor_meta")
-    if input_meta is None or input_meta.dtype != torch.bfloat16:
+
+    if (input_meta := getattr(node.args[0], "meta", {}).get("tensor_meta")) is None:
         return True
+
     groups = args_bounds_check(node.args, 8)
-    if groups is not None and groups > 1:
-        weight_node = node.args[1]
-        weight_meta = getattr(weight_node, "meta", {}).get("tensor_meta")
-        if weight_meta is not None and groups == weight_meta.shape[0]:
+    is_grouped = groups is not None and groups > 1
+    is_transposed = bool(args_bounds_check(node.args, 6))
+    is_3d = input_meta.shape is not None and len(input_meta.shape) == 5
+    is_bf16 = input_meta.dtype == torch.bfloat16
+
+    # WAR: Grouped 3D deconvolutions crash on TRT-RTX (any dtype).
+    if is_transposed and is_grouped and is_3d:
+        _LOGGER.debug(
+            "Grouped 3D deconvolution '%s' (groups=%d) is not supported on "
+            "TensorRT-RTX. Falling back to PyTorch for this layer.",
+            node.name,
+            groups,
+        )
+        return False
+
+    # WAR: Depthwise convolutions in BF16 are not supported on TRT-RTX.
+    if is_bf16 and is_grouped:
+        if (
+            weight_meta := getattr(node.args[1], "meta", {}).get("tensor_meta")
+        ) is not None and groups == weight_meta.shape[0]:
             _LOGGER.debug(
                 "Depthwise convolution '%s' with BF16 is not supported on "
                 "TensorRT-RTX. Falling back to PyTorch for this layer.",
                 node.name,
             )
             return False
+
     return True
 
 
 @dynamo_tensorrt_converter(
     torch.ops.aten.convolution.default,
-    capability_validator=depthwise_bf16_validator,
+    capability_validator=convolution_capability_validator,
     supports_dynamic_shapes=True,
 )
 @enforce_tensor_types(
diff --git a/tests/py/dynamo/conversion/test_deconvolution_aten.py b/tests/py/dynamo/conversion/test_deconvolution_aten.py
@@ -227,9 +227,6 @@ def forward(self, x):
             ),
         ]
     )
-    @unittest.skipIf(
-        torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on deconv3d"
-    )
     def test_deconv3d(
         self,
         _,
@@ -241,6 +238,9 @@ def test_deconv3d(
         bias=True,
         output_padding=0,
     ):
+        if groups > 1 and torch_tensorrt.ENABLED_FEATURES.tensorrt_rtx:
+            self.skipTest("Grouped 3D deconvolutions fall back to PyTorch on TRT-RTX")
+
         class TestModule(torch.nn.Module):
             def __init__(self):
                 super().__init__()
diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py
@@ -609,3 +609,54 @@ def forward(self, x):
 
     # Clean up model env
     torch._dynamo.reset()
+
+
+@pytest.mark.unit
+@unittest.skipIf(
+    not torchtrt.ENABLED_FEATURES.tensorrt_rtx,
+    "Grouped 3D deconv fallback WAR is TensorRT-RTX specific",
+)
+def test_grouped_deconv3d_fallback(ir):
+    """Grouped 3D deconvolutions fall back to PyTorch on TRT-RTX.
+
+    The convolution_capability_validator rejects grouped ConvTranspose3d ops
+    so that the partitioner keeps them in PyTorch while other ops run on TRT.
+    """
+
+    class MyModule(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+            self.conv = torch.nn.Conv3d(3, 16, 3, padding=1)
+            self.relu = torch.nn.ReLU()
+            self.deconv = torch.nn.ConvTranspose3d(16, 16, 3, padding=1, groups=16)
+
+        def forward(self, x):
+            out = self.conv(x)
+            out = self.relu(out)
+            out = self.deconv(out)
+            return out
+
+    model = MyModule().eval().cuda()
+    input = torch.randn((1, 3, 16, 16, 16), device="cuda")
+
+    compile_spec = {
+        "inputs": [torchtrt.Input(input.shape, dtype=torch.float32)],
+        "device": torchtrt.Device("cuda:0"),
+        "enabled_precisions": {torch.float32},
+        "ir": ir,
+        "pass_through_build_failures": True,
+        "min_block_size": 1,
+        "cache_built_engines": False,
+        "reuse_cached_engines": False,
+    }
+
+    trt_mod = torchtrt.compile(model, **compile_spec)
+    cos_sim = cosine_similarity(model(input), trt_mod(input))
+
+    assertions.assertTrue(
+        cos_sim > COSINE_THRESHOLD,
+        msg=f"Grouped 3D deconv fallback model TRT outputs don't match with the original model. Cosine sim score: {cos_sim} Threshold: {COSINE_THRESHOLD}",
+    )
+
+    # Clean up model env
+    torch._dynamo.reset()