pytorch · lanluo-nvidia · Apr 7, 2026 · Mar 31, 2026 · Apr 3, 2026
diff --git a/MODULE.bazel b/MODULE.bazel
@@ -142,7 +142,7 @@ http_archive(
     build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
     strip_prefix = "TensorRT-RTX-1.4.0.76",
     urls = [
-        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-linux-x86_64-cuda-13.2-release-external.tar.gz",
+        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Linux-x86_64-cuda-13.2-Release-external.tar.gz",
     ],
 )
 
@@ -178,6 +178,6 @@ http_archive(
     build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
     strip_prefix = "TensorRT-RTX-1.4.0.76",
     urls = [
-        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-win10-amd64-cuda-13.2-release-external.zip",
+        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Windows-amd64-cuda-13.2-Release-external.zip",
     ],
 )
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
@@ -6,7 +6,6 @@
 from tensorrt import ITensor as TRTTensor
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch.fx.node import Target
-from torch_tensorrt import ENABLED_FEATURES
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -35,19 +34,6 @@ def get_ir(target: Target) -> SourceIR:
     return SourceIR.UNKNOWN
 
 
-def validate_int8_activation_quantization(name: str, dtype: trt.DataType) -> None:
-    if (
-        dtype == trt.DataType.INT8
-        and ".input_quantizer" in name
-        and ENABLED_FEATURES.tensorrt_rtx
-    ):
-        # RTX does not support int8 activation quantization
-        # TODO: lan to remove this once rtx team has added the support for int8 activation quantization
-        raise NotImplementedError(
-            "TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization"
-        )
-
-
 def quantize(
     ctx: ConversionContext,
     target: Target,
@@ -91,8 +77,6 @@ def quantize(
             dtype = trt.DataType.FP8
             max_bound = 448
 
-        validate_int8_activation_quantization(name, dtype)
-
         axis = None
         # int8 weight quantization is per-channel quantization(it can have one or multiple amax values)
         if dtype == trt.DataType.INT8 and amax.numel() > 1:

diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py
@@ -413,9 +413,6 @@ def test_base_int8(ir, dtype):
     import modelopt.torch.quantization as mtq
     from modelopt.torch.quantization.utils import export_torch_mode
 
-    if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16:
-        pytest.skip("TensorRT-RTX does not support bfloat16")
-
     class SimpleNetwork(torch.nn.Module):
         def __init__(self):
             super(SimpleNetwork, self).__init__()
@@ -435,9 +432,6 @@ def calibrate_loop(model):
     input_tensor = torch.randn(1, 10).cuda().to(dtype)
     model = SimpleNetwork().eval().cuda().to(dtype)
     quant_cfg = mtq.INT8_DEFAULT_CFG
-    # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization
-    if torchtrt.ENABLED_FEATURES.tensorrt_rtx:
-        quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False}
     mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
     # model has INT8 qdq nodes at this point
     output_pyt = model(input_tensor)
@@ -474,9 +468,6 @@ def test_base_int8_dynamic_shape(ir, dtype):
     import modelopt.torch.quantization as mtq
     from modelopt.torch.quantization.utils import export_torch_mode
 
-    if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16:
-        pytest.skip("TensorRT-RTX does not support bfloat16")
-
     class SimpleNetwork(torch.nn.Module):
         def __init__(self):
             super(SimpleNetwork, self).__init__()

diff --git a/tests/py/dynamo/runtime/test_004_weight_streaming.py b/tests/py/dynamo/runtime/test_004_weight_streaming.py
@@ -292,9 +292,6 @@ def test_weight_streaming_cudagraphs(self, _, use_python_runtime):
             ("cpp_runtime", False),
         ]
     )
-    @unittest.skipIf(
-        torchtrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on cudagraphs"
-    )
     @unittest.skipIf(
         is_orin(), "There is a bug on Orin platform, skip for now until bug is fixed"
     )

diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl
@@ -86,7 +86,7 @@ http_archive(
     build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
     strip_prefix = "TensorRT-RTX-1.4.0.76",
     urls = [
-        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-linux-x86_64-cuda-${CU_UPPERBOUND}-release-external.tar.gz",
+        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Linux-x86_64-cuda-${CU_UPPERBOUND}-Release-external.tar.gz",
     ],
 )
 
@@ -122,7 +122,7 @@ http_archive(
     build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
     strip_prefix = "TensorRT-RTX-1.4.0.76",
     urls = [
-        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-win10-amd64-cuda-${CU_UPPERBOUND}-release-external.zip",
+        "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Windows-amd64-cuda-${CU_UPPERBOUND}-Release-external.zip",
     ],
 )