From d4e15ed654b46c01664d9c6e07318364019e4fec Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Sun, 28 Dec 2025 02:19:10 -0800 Subject: [PATCH 1/6] fix referencerunner for external data Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/autocast/referencerunner.py | 64 +++++++++++++++++++++-- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/modelopt/onnx/autocast/referencerunner.py b/modelopt/onnx/autocast/referencerunner.py index 8dc91ff08..896066a73 100644 --- a/modelopt/onnx/autocast/referencerunner.py +++ b/modelopt/onnx/autocast/referencerunner.py @@ -24,11 +24,13 @@ import copy import io import sys +import tempfile from collections import OrderedDict import numpy as np import onnx +from modelopt.onnx import utils as onnx_utils from modelopt.onnx.autocast.logging_config import configure_logging, logger from modelopt.onnx.quantization.ort_utils import _prepare_ep_list @@ -118,13 +120,65 @@ def _load_inputs(self, inputs): return data_loader + def _get_ort_runner(self, model): + import onnxruntime as ort + from polygraphy.backend.onnx import BytesFromOnnx + from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx + + # Check if model has external data by checking: + # 1. If any initializer has data_location set to EXTERNAL (even if data is loaded) + # 2. If model size would exceed 2GB (indicating need for external data) + has_external_data = any( + init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL + for init in self.model.graph.initializer + ) + + # Also check if model would be too large (>2GB) for SerializeToString + # This handles cases where model was loaded with external data already loaded + if not has_external_data: + try: + # Try to estimate size by serializing the model + # If it fails or exceeds 2GB, we need file-based approach + model_size = len(self.model.SerializeToString()) + if model_size > 2 * (1024**3): # 2GB threshold + has_external_data = True + logger.debug( + f"Model size ({model_size / (1024**3):.2f} GB) exceeds 2GB, using file-based approach" + ) + except (ValueError, AttributeError) as e: + # SerializeToString failed (likely >2GB limit), use file-based approach + if "exceeds maximum protobuf size" in str(e) or "2GB" in str(e): + has_external_data = True + logger.debug("Model exceeds protobuf 2GB limit, using file-based approach") + + if has_external_data: + logger.debug("Model has external data, using file-based approach") + # Get the actual ONNX ModelProto from ModifyOutputs wrapper + modified_model = model() + + # Use a persistent temp file to handle external data files properly + tmp_file = tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) + tmp_file.close() + tmp_file_path = tmp_file.name + onnx_utils.save_onnx(modified_model, tmp_file_path, save_as_external_data=True) + logger.debug(f"Model with all outputs saved to {tmp_file_path}") + session = ort.InferenceSession(tmp_file_path, providers=self.providers) + runners = [OnnxrtRunner(lambda: session)] + + else: + # For models without external data, use the original BytesFromOnnx approach (no tmp files) + logger.debug("Model has no external data, using BytesFromOnnx approach") + serialize_onnx = BytesFromOnnx(model) + build_onnxrt_session = SessionFromOnnx(serialize_onnx, providers=self.providers) + runners = [OnnxrtRunner(build_onnxrt_session)] + + return runners + def run(self, inputs=None): """Run FP32 inference with provided or random inputs.""" import onnxruntime as ort from polygraphy import constants - from polygraphy.backend.onnx import BytesFromOnnx from polygraphy.backend.onnx import ModifyOutputs as ModifyOnnxOutputs - from polygraphy.backend.onnxrt import OnnxrtRunner, SessionFromOnnx from polygraphy.comparator import Comparator logger.info("Running ONNX Runtime to obtain reference outputs (this may take a while)...") @@ -133,9 +187,9 @@ def run(self, inputs=None): model_copy = copy.deepcopy(self.model) modify_outputs = ModifyOnnxOutputs(model_copy, outputs=constants.MARK_ALL) - serialize_onnx = BytesFromOnnx(modify_outputs) - build_onnxrt_session = SessionFromOnnx(serialize_onnx, providers=self.providers) - runners = [OnnxrtRunner(build_onnxrt_session)] + + # Load the modified model and create an inference session + runners = self._get_ort_runner(modify_outputs) # Comparator is used despite the fact that we are using ONNXRuntime # because it provides the ability to generate random inputs using DataLoader From 65325227d6dd0f9743580e0f3bec157c7af54943 Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Sun, 28 Dec 2025 02:25:26 -0800 Subject: [PATCH 2/6] draft: skip model checker for models with external data Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/autocast/precisionconverter.py | 8 ++++++- modelopt/onnx/utils.py | 22 ++++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py index 5b13ff080..056cd9cd0 100644 --- a/modelopt/onnx/autocast/precisionconverter.py +++ b/modelopt/onnx/autocast/precisionconverter.py @@ -82,6 +82,10 @@ class PrecisionConverter: Public Methods: convert: Convert specified nodes to FP16/BF16 precision while keeping others in FP32. """ + def print_byte_size(self, label: str): + model_proto = self.model.SerializeToString() + model_size = len(model_proto) + print(f"GAGAM {label} ByteSize: {model_size}") def __init__( self, @@ -175,7 +179,7 @@ def convert( onnx.ModelProto: The converted mixed precision model. """ try: - self.model = onnx_utils.check_model(self.model) + onnx_utils.check_model(self.model) except onnx.checker.ValidationError as e: logger.error(f"Internal error: onnx.checker failed on input model {e}") raise Exception( @@ -1294,7 +1298,9 @@ def _fix_network_output_names(self): def _sanity_check(self): sanity_ok = True try: + self.print_byte_size("before check_model") onnx_utils.check_model(self.model) + self.print_byte_size("after check_model") except onnx.checker.ValidationError as e: logger.error(f"Internal error: onnx.checker failed: {e}") sanity_ok = False diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py index 02306792a..ad077dc05 100644 --- a/modelopt/onnx/utils.py +++ b/modelopt/onnx/utils.py @@ -552,19 +552,19 @@ def _get_unique_name(old_name): return onnx_model, is_modified -def check_model(model: onnx.ModelProto) -> onnx.ModelProto: +def check_model(model: onnx.ModelProto) -> None: """Checks if the given model is valid.""" if model.ByteSize() > (2 * (1024**3)): # 2GB limit - with tempfile.TemporaryDirectory() as temp_dir: - # ONNX also looks in CWD, so we need to use a unique id - unique_id = str(uuid.uuid4())[:8] - onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx") - save_onnx(model, onnx_tmp_path, save_as_external_data=True) - onnx.checker.check_model(onnx_tmp_path) - return onnx.load(onnx_tmp_path) + logger.warning("Model exceeds 2GB limit, skipping check_model") + # with tempfile.TemporaryDirectory() as temp_dir: + # # ONNX also looks in CWD, so we need to use a unique id + # unique_id = str(uuid.uuid4())[:8] + # onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx") + # save_onnx(model, onnx_tmp_path, save_as_external_data=True) + # onnx.checker.check_model(onnx_tmp_path) + else: onnx.checker.check_model(model) - return model def find_lowest_common_ancestor(node1: Node, node2: Node) -> tuple[str | None, int, int]: @@ -644,7 +644,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo model_proto = model.SerializeToString() model_size = len(model_proto) save_as_external_data = save_as_external_data or model_size > size_threshold - logger.debug( + logger.warning( f"Model size: {model_size} bytes, using external data: {save_as_external_data}" ) @@ -658,7 +658,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo # Set ir_version to 10, remove it once ORT supports ir_version 11 model.ir_version = 10 - + save_as_external_data = True # GAGAM: for debug if save_as_external_data: external_data_path = os.path.basename(onnx_path) + "_data" if os.path.exists(external_data_path): From 7a2d91a563e8b20415bf6a055556cbc5ff588a1f Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:21:20 -0800 Subject: [PATCH 3/6] fix check_model for external data Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py index ad077dc05..8ebeaff19 100644 --- a/modelopt/onnx/utils.py +++ b/modelopt/onnx/utils.py @@ -15,6 +15,7 @@ """Utility functions related to onnx.""" +import copy import io import os import tempfile @@ -555,14 +556,13 @@ def _get_unique_name(old_name): def check_model(model: onnx.ModelProto) -> None: """Checks if the given model is valid.""" if model.ByteSize() > (2 * (1024**3)): # 2GB limit - logger.warning("Model exceeds 2GB limit, skipping check_model") - # with tempfile.TemporaryDirectory() as temp_dir: - # # ONNX also looks in CWD, so we need to use a unique id - # unique_id = str(uuid.uuid4())[:8] - # onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx") - # save_onnx(model, onnx_tmp_path, save_as_external_data=True) - # onnx.checker.check_model(onnx_tmp_path) - + with tempfile.TemporaryDirectory() as temp_dir: + # ONNX also looks in CWD, so we need to use a unique id + unique_id = str(uuid.uuid4())[:8] + onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx") + model_copy = copy.deepcopy(model) + save_onnx(model_copy, onnx_tmp_path, save_as_external_data=True) + onnx.checker.check_model(onnx_tmp_path) else: onnx.checker.check_model(model) From 581d686bb065faea62a5e20181acc6bded7ef0d5 Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Mon, 19 Jan 2026 11:35:58 -0800 Subject: [PATCH 4/6] cleanup debug prints Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/autocast/precisionconverter.py | 6 ------ modelopt/onnx/utils.py | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py index 056cd9cd0..278486c4b 100644 --- a/modelopt/onnx/autocast/precisionconverter.py +++ b/modelopt/onnx/autocast/precisionconverter.py @@ -82,10 +82,6 @@ class PrecisionConverter: Public Methods: convert: Convert specified nodes to FP16/BF16 precision while keeping others in FP32. """ - def print_byte_size(self, label: str): - model_proto = self.model.SerializeToString() - model_size = len(model_proto) - print(f"GAGAM {label} ByteSize: {model_size}") def __init__( self, @@ -1298,9 +1294,7 @@ def _fix_network_output_names(self): def _sanity_check(self): sanity_ok = True try: - self.print_byte_size("before check_model") onnx_utils.check_model(self.model) - self.print_byte_size("after check_model") except onnx.checker.ValidationError as e: logger.error(f"Internal error: onnx.checker failed: {e}") sanity_ok = False diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py index 8ebeaff19..e5eb1c33b 100644 --- a/modelopt/onnx/utils.py +++ b/modelopt/onnx/utils.py @@ -644,7 +644,7 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo model_proto = model.SerializeToString() model_size = len(model_proto) save_as_external_data = save_as_external_data or model_size > size_threshold - logger.warning( + logger.debug( f"Model size: {model_size} bytes, using external data: {save_as_external_data}" ) @@ -658,7 +658,6 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo # Set ir_version to 10, remove it once ORT supports ir_version 11 model.ir_version = 10 - save_as_external_data = True # GAGAM: for debug if save_as_external_data: external_data_path = os.path.basename(onnx_path) + "_data" if os.path.exists(external_data_path): From cd96fa5af480a6dd1f015287e3a88db9ef48026b Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Mon, 19 Jan 2026 12:36:38 -0800 Subject: [PATCH 5/6] move deepcopy to save_onnx Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py index e5eb1c33b..5c5dfddbd 100644 --- a/modelopt/onnx/utils.py +++ b/modelopt/onnx/utils.py @@ -560,8 +560,7 @@ def check_model(model: onnx.ModelProto) -> None: # ONNX also looks in CWD, so we need to use a unique id unique_id = str(uuid.uuid4())[:8] onnx_tmp_path = os.path.join(temp_dir, f"model_{unique_id}.onnx") - model_copy = copy.deepcopy(model) - save_onnx(model_copy, onnx_tmp_path, save_as_external_data=True) + save_onnx(model, onnx_tmp_path, save_as_external_data=True) onnx.checker.check_model(onnx_tmp_path) else: onnx.checker.check_model(model) @@ -664,8 +663,10 @@ def save_onnx(model: onnx.ModelProto, onnx_path: str, save_as_external_data: boo logger.warning(f"Removing existing external data file: {external_data_path}") os.remove(external_data_path) + # Copy so the onnx.ModelProto object will not be modified + model_copy = copy.deepcopy(model) onnx.save_model( - model, + model_copy, onnx_path, save_as_external_data=True, all_tensors_to_one_file=True, From 00ea80ca522dc6f42f3262f1c0f32d50a957d7ea Mon Sep 17 00:00:00 2001 From: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> Date: Tue, 20 Jan 2026 07:01:34 -0800 Subject: [PATCH 6/6] code de-deuplication Signed-off-by: Gal Hubara Agam <96368689+galagam@users.noreply.github.com> --- modelopt/onnx/autocast/referencerunner.py | 5 +---- modelopt/onnx/utils.py | 15 +++++++++++++++ modelopt/torch/_deploy/utils/onnx_utils.py | 11 ----------- modelopt/torch/_deploy/utils/torch_onnx.py | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/modelopt/onnx/autocast/referencerunner.py b/modelopt/onnx/autocast/referencerunner.py index 896066a73..2831f211d 100644 --- a/modelopt/onnx/autocast/referencerunner.py +++ b/modelopt/onnx/autocast/referencerunner.py @@ -128,10 +128,7 @@ def _get_ort_runner(self, model): # Check if model has external data by checking: # 1. If any initializer has data_location set to EXTERNAL (even if data is loaded) # 2. If model size would exceed 2GB (indicating need for external data) - has_external_data = any( - init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL - for init in self.model.graph.initializer - ) + has_external_data = onnx_utils.check_model_uses_external_data(self.model) # Also check if model would be too large (>2GB) for SerializeToString # This handles cases where model was loaded with external data already loaded diff --git a/modelopt/onnx/utils.py b/modelopt/onnx/utils.py index 5c5dfddbd..f2b020b06 100644 --- a/modelopt/onnx/utils.py +++ b/modelopt/onnx/utils.py @@ -696,6 +696,21 @@ def get_opset_version(model: onnx.ModelProto) -> int: return ai_onnx_domain[0].version +def check_model_uses_external_data(model: onnx.ModelProto) -> bool: + """Checks if the model uses external data. + + Args: + model: Loaded in-memory onnx ModelProto. + + Returns: + True if any initializer tensor has data_location set to EXTERNAL. + """ + return any( + init.HasField("data_location") and init.data_location == onnx.TensorProto.EXTERNAL + for init in model.graph.initializer + ) + + def bfloat16_to_float32(bf16_array): """Converts a bfloat16 array (as raw data) to a float32 array.""" uint32_array = bf16_array.astype(np.uint32) << 16 diff --git a/modelopt/torch/_deploy/utils/onnx_utils.py b/modelopt/torch/_deploy/utils/onnx_utils.py index a377afcb6..9120eb73a 100644 --- a/modelopt/torch/_deploy/utils/onnx_utils.py +++ b/modelopt/torch/_deploy/utils/onnx_utils.py @@ -45,14 +45,3 @@ def _get_onnx_external_data_tensors(model: onnx.ModelProto) -> list[str]: if tensor.HasField("data_location") and tensor.data_location == onnx.TensorProto.EXTERNAL ] return model_tensors_ext - - -def check_model_uses_external_data(model: onnx.ModelProto) -> bool: - """ - Checks if the model uses external data. - """ - model_tensors = _get_initializer_tensors(model) - return any( - tensor.HasField("data_location") and tensor.data_location == onnx.TensorProto.EXTERNAL - for tensor in model_tensors - ) diff --git a/modelopt/torch/_deploy/utils/torch_onnx.py b/modelopt/torch/_deploy/utils/torch_onnx.py index 26a5781ed..304fb8ec7 100644 --- a/modelopt/torch/_deploy/utils/torch_onnx.py +++ b/modelopt/torch/_deploy/utils/torch_onnx.py @@ -42,6 +42,7 @@ ) from modelopt.onnx.quantization.qdq_utils import qdq_to_dq, replace_zero_scale_with_smallest_nonzero from modelopt.onnx.utils import ( + check_model_uses_external_data, get_input_names, get_input_shapes, get_node_names, @@ -55,7 +56,6 @@ from modelopt.torch.utils._pytree import TreeSpec from ..utils.onnx_optimizer import Optimizer -from .onnx_utils import check_model_uses_external_data ModelMetadata = dict[str, Any] ModelType = Any