Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ http_archive(
build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
strip_prefix = "TensorRT-RTX-1.4.0.76",
urls = [
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-linux-x86_64-cuda-13.2-release-external.tar.gz",
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Linux-x86_64-cuda-13.2-Release-external.tar.gz",
],
)

Expand Down Expand Up @@ -178,6 +178,6 @@ http_archive(
build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
strip_prefix = "TensorRT-RTX-1.4.0.76",
urls = [
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-win10-amd64-cuda-13.2-release-external.zip",
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Windows-amd64-cuda-13.2-Release-external.zip",
],
)
16 changes: 0 additions & 16 deletions py/torch_tensorrt/dynamo/conversion/impl/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from tensorrt import ITensor as TRTTensor
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
from torch.fx.node import Target
from torch_tensorrt import ENABLED_FEATURES
from torch_tensorrt.dynamo._SourceIR import SourceIR
from torch_tensorrt.dynamo.conversion import impl
from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
Expand Down Expand Up @@ -35,19 +34,6 @@ def get_ir(target: Target) -> SourceIR:
return SourceIR.UNKNOWN


def validate_int8_activation_quantization(name: str, dtype: trt.DataType) -> None:
if (
dtype == trt.DataType.INT8
and ".input_quantizer" in name
and ENABLED_FEATURES.tensorrt_rtx
):
# RTX does not support int8 activation quantization
# TODO: lan to remove this once rtx team has added the support for int8 activation quantization
raise NotImplementedError(
"TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization"
)


def quantize(
ctx: ConversionContext,
target: Target,
Expand Down Expand Up @@ -91,8 +77,6 @@ def quantize(
dtype = trt.DataType.FP8
max_bound = 448

validate_int8_activation_quantization(name, dtype)

axis = None
# int8 weight quantization is per-channel quantization(it can have one or multiple amax values)
if dtype == trt.DataType.INT8 and amax.numel() > 1:
Expand Down
9 changes: 0 additions & 9 deletions tests/py/dynamo/models/test_models_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,6 @@ def test_base_int8(ir, dtype):
import modelopt.torch.quantization as mtq
from modelopt.torch.quantization.utils import export_torch_mode

if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16:
pytest.skip("TensorRT-RTX does not support bfloat16")

class SimpleNetwork(torch.nn.Module):
def __init__(self):
super(SimpleNetwork, self).__init__()
Expand All @@ -435,9 +432,6 @@ def calibrate_loop(model):
input_tensor = torch.randn(1, 10).cuda().to(dtype)
model = SimpleNetwork().eval().cuda().to(dtype)
quant_cfg = mtq.INT8_DEFAULT_CFG
# RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization
if torchtrt.ENABLED_FEATURES.tensorrt_rtx:
quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False}
mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop)
# model has INT8 qdq nodes at this point
output_pyt = model(input_tensor)
Expand Down Expand Up @@ -474,9 +468,6 @@ def test_base_int8_dynamic_shape(ir, dtype):
import modelopt.torch.quantization as mtq
from modelopt.torch.quantization.utils import export_torch_mode

if torchtrt.ENABLED_FEATURES.tensorrt_rtx and dtype == torch.bfloat16:
pytest.skip("TensorRT-RTX does not support bfloat16")

class SimpleNetwork(torch.nn.Module):
def __init__(self):
super(SimpleNetwork, self).__init__()
Expand Down
3 changes: 0 additions & 3 deletions tests/py/dynamo/runtime/test_004_weight_streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,6 @@ def test_weight_streaming_cudagraphs(self, _, use_python_runtime):
("cpp_runtime", False),
]
)
@unittest.skipIf(
torchtrt.ENABLED_FEATURES.tensorrt_rtx, "TensorRT-RTX has bug on cudagraphs"
)
@unittest.skipIf(
is_orin(), "There is a bug on Orin platform, skip for now until bug is fixed"
)
Expand Down
4 changes: 2 additions & 2 deletions toolchains/ci_workspaces/MODULE.bazel.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ http_archive(
build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
strip_prefix = "TensorRT-RTX-1.4.0.76",
urls = [
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-linux-x86_64-cuda-${CU_UPPERBOUND}-release-external.tar.gz",
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Linux-x86_64-cuda-${CU_UPPERBOUND}-Release-external.tar.gz",
],
)

Expand Down Expand Up @@ -122,7 +122,7 @@ http_archive(
build_file = "@//third_party/tensorrt_rtx/archive:BUILD",
strip_prefix = "TensorRT-RTX-1.4.0.76",
urls = [
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/tensorrt-rtx-1.4.0.76-win10-amd64-cuda-${CU_UPPERBOUND}-release-external.zip",
"https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.4/TensorRT-RTX-1.4.0.76-Windows-amd64-cuda-${CU_UPPERBOUND}-Release-external.zip",
],
)

Expand Down
Loading