From d4e15ed654b46c01664d9c6e07318364019e4fec Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Sun, 28 Dec 2025 02:19:10 -0800
Subject: [PATCH 1/6] fix referencerunner for external data

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/autocast/referencerunner.py | 64 +++++++++++++++++++++--
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/modelopt/onnx/autocast/referencerunner.py b/modelopt/onnx/autocast/referencerunner.py
index 8dc91ff08..896066a73 100644
--- a/modelopt/onnx/autocast/referencerunner.py
+++ b/modelopt/onnx/autocast/referencerunner.py
@@ -24,11 +24,13 @@
 import copy
 import io
 import sys
+import tempfile
 from collections import OrderedDict
 
 import numpy as np
 import onnx
 
+from modelopt.onnx import utils as onnx_utils
 from modelopt.onnx.autocast.logging_config import configure_logging, logger
 from modelopt.onnx.quantization.ort_utils import _prepare_ep_list
 
@@ -118,13 +120,65 @@ def _load_inputs(self, inputs):
 
         return data_loader
 
+    def _get_ort_runner(self, model):
+        import onnxruntime as ort
+        from polygraphy.backend.onnx import BytesFromOnnx
+        from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
+
+        # Check if model has external data by checking:
+        # 1. If any initializer has data_location set to EXTERNAL (even if data is loaded)
+        # 2. If model size would exceed 2GB (indicating need for external data)
+        has_external_data = any(
+            init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL
+            for init in self.model.graph.initializer
+        )
+
+        # Also check if model would be too large (>2GB) for SerializeToString
+        # This handles cases where model was loaded with external data already loaded
+        if not has_external_data:
+            try:
+                # Try to estimate size by serializing the model
+                # If it fails or exceeds 2GB, we need file-based approach
+                model_size = len(self.model.SerializeToString())
+                if model_size > 2 * (1024**3):  # 2GB threshold
+                    has_external_data = True
+                    logger.debug(
+                        f"Model size ({model_size / (1024**3):.2f} GB) exceeds 2GB, using file-based approach"
+                    )
+            except (ValueError, AttributeError) as e:
+                # SerializeToString failed (likely >2GB limit), use file-based approach
+                if "exceeds maximum protobuf size" in str(e) or "2GB" in str(e):
+                    has_external_data = True
+                    logger.debug("Model exceeds protobuf 2GB limit, using file-based approach")
+
+        if has_external_data:
+            logger.debug("Model has external data, using file-based approach")
+            # Get the actual ONNX ModelProto from ModifyOutputs wrapper
+            modified_model = model()
+
+            # Use a persistent temp file to handle external data files properly
+            tmp_file = tempfile.NamedTemporaryFile(suffix=".onnx", delete=False)
+            tmp_file.close()
+            tmp_file_path = tmp_file.name
+            onnx_utils.save_onnx(modified_model, tmp_file_path, save_as_external_data=True)
+            logger.debug(f"Model with all outputs saved to {tmp_file_path}")
+            session = ort.InferenceSession(tmp_file_path, providers=self.providers)
+            runners = [OnnxrtRunner(lambda: session)]
+
+        else:
+            # For models without external data, use the original BytesFromOnnx approach (no tmp files)
+            logger.debug("Model has no external data, using BytesFromOnnx approach")
+            serialize_onnx = BytesFromOnnx(model)
+            build_onnxrt_session = SessionFromOnnx(serialize_onnx, providers=self.providers)
+            runners = [OnnxrtRunner(build_onnxrt_session)]
+
+        return runners
+
     def run(self, inputs=None):
         """Run FP32 inference with provided or random inputs."""
         import onnxruntime as ort
         from polygraphy import constants
-        from polygraphy.backend.onnx import BytesFromOnnx
         from polygraphy.backend.onnx import ModifyOutputs as ModifyOnnxOutputs
-        from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx
         from polygraphy.comparator import Comparator
 
         logger.info("Running ONNX Runtime to obtain reference outputs (this may take a while)...")
@@ -133,9 +187,9 @@ def run(self, inputs=None):
 
         model_copy = copy.deepcopy(self.model)
         modify_outputs = ModifyOnnxOutputs(model_copy, outputs=constants.MARK_ALL)
-        serialize_onnx = BytesFromOnnx(modify_outputs)
-        build_onnxrt_session = SessionFromOnnx(serialize_onnx, providers=self.providers)
-        runners = [OnnxrtRunner(build_onnxrt_session)]
+
+        # Load the modified model and create an inference session
+        runners = self._get_ort_runner(modify_outputs)
 
         # Comparator is used despite the fact that we are using ONNXRuntime
         # because it provides the ability to generate random inputs using DataLoader

From 65325227d6dd0f9743580e0f3bec157c7af54943 Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Sun, 28 Dec 2025 02:25:26 -0800
Subject: [PATCH 2/6] draft: skip model checker for models with external data

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/autocast/precisionconverter.py |  8 ++++++-
 modelopt/onnx/utils.py                       | 22 ++++++++++----------
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
index 5b13ff080..056cd9cd0 100644
--- a/modelopt/onnx/autocast/precisionconverter.py
+++ b/modelopt/onnx/autocast/precisionconverter.py
@@ -82,6 +82,10 @@ class PrecisionConverter:
     Public Methods:
         convert: Convert specified nodes to FP16/BF16 precision while keeping others in FP32.
     """
+    def print_byte_size(self, label: str):
+        model_proto = self.model.SerializeToString()
+        model_size = len(model_proto)
+        print(f"GAGAM {label} ByteSize: {model_size}")
 
     def __init__(
         self,
@@ -175,7 +179,7 @@ def convert(
             onnx.ModelProto: The converted mixed precision model.
         """
         try:
-            self.model = onnx_utils.check_model(self.model)
+            onnx_utils.check_model(self.model)
         except onnx.checker.ValidationError as e:
             logger.error(f"Internal error: onnx.checker failed on input model {e}")
             raise Exception(
@@ -1294,7 +1298,9 @@ def _fix_network_output_names(self):
     def _sanity_check(self):
         sanity_ok = True
         try:
+            self.print_byte_size("before check_model")
             onnx_utils.check_model(self.model)
+            self.print_byte_size("after check_model")
         except onnx.checker.ValidationError as e:
             logger.error(f"Internal error: onnx.checker failed: {e}")
             sanity_ok = False
diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
index 02306792a..ad077dc05 100644
--- a/modelopt/onnx/utils.py
+++ b/modelopt/onnx/utils.py
@@ -552,19 +552,19 @@ def _get_unique_name(old_name):
     return onnx_model, is_modified
 
 
-def check_model(model: onnx.ModelProto) -> onnx.ModelProto:
+def check_model(model: onnx.ModelProto) -> None:
     """Checks if the given model is valid."""
     if model.ByteSize() > (2 * (1024**3)):  # 2GB limit
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # ONNX also looks in CWD, so we need to use a unique id
-            unique_id = str(uuid.uuid4())[:8]
-            onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx")
-            save_onnx(model, onnx_tmp_path, save_as_external_data=True)
-            onnx.checker.check_model(onnx_tmp_path)
-            return onnx.load(onnx_tmp_path)
+        logger.warning("Model exceeds 2GB limit, skipping check_model")
+        # with tempfile.TemporaryDirectory() as temp_dir:
+        #     # ONNX also looks in CWD, so we need to use a unique id
+        #     unique_id = str(uuid.uuid4())[:8]
+        #     onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx")
+        #     save_onnx(model, onnx_tmp_path, save_as_external_data=True)
+        #     onnx.checker.check_model(onnx_tmp_path)
+
     else:
         onnx.checker.check_model(model)
-        return model
 
 
 def find_lowest_common_ancestor(node1: Node, node2: Node) -> tuple[str | None, int, int]:
@@ -644,7 +644,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo
         model_proto = model.SerializeToString()
         model_size = len(model_proto)
         save_as_external_data = save_as_external_data or model_size > size_threshold
-        logger.debug(
+        logger.warning(
             f"Model size: {model_size} bytes, using external data: {save_as_external_data}"
         )
 
@@ -658,7 +658,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo
 
     # Set ir_version to 10, remove it once ORT supports ir_version 11
     model.ir_version = 10
-
+    save_as_external_data = True # GAGAM: for debug
     if save_as_external_data:
         external_data_path = os.path.basename(onnx_path) + "_data"
         if os.path.exists(external_data_path):

From 7a2d91a563e8b20415bf6a055556cbc5ff588a1f Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Mon, 19 Jan 2026 11:21:20 -0800
Subject: [PATCH 3/6] fix check_model for external data

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/utils.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
index ad077dc05..8ebeaff19 100644
--- a/modelopt/onnx/utils.py
+++ b/modelopt/onnx/utils.py
@@ -15,6 +15,7 @@
 
 """Utility functions related to onnx."""
 
+import copy
 import io
 import os
 import tempfile
@@ -555,14 +556,13 @@ def _get_unique_name(old_name):
 def check_model(model: onnx.ModelProto) -> None:
     """Checks if the given model is valid."""
     if model.ByteSize() > (2 * (1024**3)):  # 2GB limit
-        logger.warning("Model exceeds 2GB limit, skipping check_model")
-        # with tempfile.TemporaryDirectory() as temp_dir:
-        #     # ONNX also looks in CWD, so we need to use a unique id
-        #     unique_id = str(uuid.uuid4())[:8]
-        #     onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx")
-        #     save_onnx(model, onnx_tmp_path, save_as_external_data=True)
-        #     onnx.checker.check_model(onnx_tmp_path)
-
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # ONNX also looks in CWD, so we need to use a unique id
+            unique_id = str(uuid.uuid4())[:8]
+            onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx")
+            model_copy = copy.deepcopy(model)
+            save_onnx(model_copy, onnx_tmp_path, save_as_external_data=True)
+            onnx.checker.check_model(onnx_tmp_path)
     else:
         onnx.checker.check_model(model)
 

From 581d686bb065faea62a5e20181acc6bded7ef0d5 Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Mon, 19 Jan 2026 11:35:58 -0800
Subject: [PATCH 4/6] cleanup debug prints

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/autocast/precisionconverter.py | 6 ------
 modelopt/onnx/utils.py                       | 3 +--
 2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
index 056cd9cd0..278486c4b 100644
--- a/modelopt/onnx/autocast/precisionconverter.py
+++ b/modelopt/onnx/autocast/precisionconverter.py
@@ -82,10 +82,6 @@ class PrecisionConverter:
     Public Methods:
         convert: Convert specified nodes to FP16/BF16 precision while keeping others in FP32.
     """
-    def print_byte_size(self, label: str):
-        model_proto = self.model.SerializeToString()
-        model_size = len(model_proto)
-        print(f"GAGAM {label} ByteSize: {model_size}")
 
     def __init__(
         self,
@@ -1298,9 +1294,7 @@ def _fix_network_output_names(self):
     def _sanity_check(self):
         sanity_ok = True
         try:
-            self.print_byte_size("before check_model")
             onnx_utils.check_model(self.model)
-            self.print_byte_size("after check_model")
         except onnx.checker.ValidationError as e:
             logger.error(f"Internal error: onnx.checker failed: {e}")
             sanity_ok = False
diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
index 8ebeaff19..e5eb1c33b 100644
--- a/modelopt/onnx/utils.py
+++ b/modelopt/onnx/utils.py
@@ -644,7 +644,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo
         model_proto = model.SerializeToString()
         model_size = len(model_proto)
         save_as_external_data = save_as_external_data or model_size > size_threshold
-        logger.warning(
+        logger.debug(
             f"Model size: {model_size} bytes, using external data: {save_as_external_data}"
         )
 
@@ -658,7 +658,6 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo
 
     # Set ir_version to 10, remove it once ORT supports ir_version 11
     model.ir_version = 10
-    save_as_external_data = True # GAGAM: for debug
     if save_as_external_data:
         external_data_path = os.path.basename(onnx_path) + "_data"
         if os.path.exists(external_data_path):

From cd96fa5af480a6dd1f015287e3a88db9ef48026b Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Mon, 19 Jan 2026 12:36:38 -0800
Subject: [PATCH 5/6] move deepcopy to save_onnx

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
index e5eb1c33b..5c5dfddbd 100644
--- a/modelopt/onnx/utils.py
+++ b/modelopt/onnx/utils.py
@@ -560,8 +560,7 @@ def check_model(model: onnx.ModelProto) -> None:
             # ONNX also looks in CWD, so we need to use a unique id
             unique_id = str(uuid.uuid4())[:8]
             onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx")
-            model_copy = copy.deepcopy(model)
-            save_onnx(model_copy, onnx_tmp_path, save_as_external_data=True)
+            save_onnx(model, onnx_tmp_path, save_as_external_data=True)
             onnx.checker.check_model(onnx_tmp_path)
     else:
         onnx.checker.check_model(model)
@@ -664,8 +663,10 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo
             logger.warning(f"Removing existing external data file: {external_data_path}")
             os.remove(external_data_path)
 
+        # Copy so the onnx.ModelProto object will not be modified
+        model_copy = copy.deepcopy(model)
         onnx.save_model(
-            model,
+            model_copy,
             onnx_path,
             save_as_external_data=True,
             all_tensors_to_one_file=True,

From 00ea80ca522dc6f42f3262f1c0f32d50a957d7ea Mon Sep 17 00:00:00 2001
From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
Date: Tue, 20 Jan 2026 07:01:34 -0800
Subject: [PATCH 6/6] code de-deuplication

Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com>
---
 modelopt/onnx/autocast/referencerunner.py  |  5 +----
 modelopt/onnx/utils.py                     | 15 +++++++++++++++
 modelopt/torch/_deploy/utils/onnx_utils.py | 11 -----------
 modelopt/torch/_deploy/utils/torch_onnx.py |  2 +-
 4 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/modelopt/onnx/autocast/referencerunner.py b/modelopt/onnx/autocast/referencerunner.py
index 896066a73..2831f211d 100644
--- a/modelopt/onnx/autocast/referencerunner.py
+++ b/modelopt/onnx/autocast/referencerunner.py
@@ -128,10 +128,7 @@ def _get_ort_runner(self, model):
         # Check if model has external data by checking:
         # 1. If any initializer has data_location set to EXTERNAL (even if data is loaded)
         # 2. If model size would exceed 2GB (indicating need for external data)
-        has_external_data = any(
-            init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL
-            for init in self.model.graph.initializer
-        )
+        has_external_data = onnx_utils.check_model_uses_external_data(self.model)
 
         # Also check if model would be too large (>2GB) for SerializeToString
         # This handles cases where model was loaded with external data already loaded
diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py
index 5c5dfddbd..f2b020b06 100644
--- a/modelopt/onnx/utils.py
+++ b/modelopt/onnx/utils.py
@@ -696,6 +696,21 @@ def get_opset_version(model: onnx.ModelProto) -> int:
     return ai_onnx_domain[0].version
 
 
+def check_model_uses_external_data(model: onnx.ModelProto) -> bool:
+    """Checks if the model uses external data.
+
+    Args:
+        model: Loaded in-memory onnx ModelProto.
+
+    Returns:
+        True if any initializer tensor has data_location set to EXTERNAL.
+    """
+    return any(
+        init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL
+        for init in model.graph.initializer
+    )
+
+
 def bfloat16_to_float32(bf16_array):
     """Converts a bfloat16 array (as raw data) to a float32 array."""
     uint32_array = bf16_array.astype(np.uint32) << 16
diff --git a/modelopt/torch/_deploy/utils/onnx_utils.py b/modelopt/torch/_deploy/utils/onnx_utils.py
index a377afcb6..9120eb73a 100644
--- a/modelopt/torch/_deploy/utils/onnx_utils.py
+++ b/modelopt/torch/_deploy/utils/onnx_utils.py
@@ -45,14 +45,3 @@ def _get_onnx_external_data_tensors(model: onnx.ModelProto) -> list[str]:
         if tensor.HasField("data_location") and tensor.data_location == onnx.TensorProto.EXTERNAL
     ]
     return model_tensors_ext
-
-
-def check_model_uses_external_data(model: onnx.ModelProto) -> bool:
-    """
-    Checks if the model uses external data.
-    """
-    model_tensors = _get_initializer_tensors(model)
-    return any(
-        tensor.HasField("data_location") and tensor.data_location == onnx.TensorProto.EXTERNAL
-        for tensor in model_tensors
-    )
diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py
index 26a5781ed..304fb8ec7 100644
--- a/modelopt/torch/_deploy/utils/torch_onnx.py
+++ b/modelopt/torch/_deploy/utils/torch_onnx.py
@@ -42,6 +42,7 @@
 )
 from modelopt.onnx.quantization.qdq_utils import qdq_to_dq, replace_zero_scale_with_smallest_nonzero
 from modelopt.onnx.utils import (
+    check_model_uses_external_data,
     get_input_names,
     get_input_shapes,
     get_node_names,
@@ -55,7 +56,6 @@
 from modelopt.torch.utils._pytree import TreeSpec
 
 from ..utils.onnx_optimizer import Optimizer
-from .onnx_utils import check_model_uses_external_data
 
 ModelMetadata = dict[str, Any]
 ModelType = Any