From 30c61108ce7b9cb3a76cbc669bccf770d56999d8 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 8 Jan 2026 17:41:33 -0800 Subject: [PATCH 1/2] remove quantization_config in config.json from original deepseek models Signed-off-by: Zhiyu Cheng --- examples/deepseek/quantize_to_nvfp4.py | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/examples/deepseek/quantize_to_nvfp4.py b/examples/deepseek/quantize_to_nvfp4.py index d94f48fce..484b67e2d 100644 --- a/examples/deepseek/quantize_to_nvfp4.py +++ b/examples/deepseek/quantize_to_nvfp4.py @@ -82,6 +82,39 @@ def _remap_key(key_dict: dict[str, Any]): key_dict.update(new_dict) +def remove_quantization_config_from_original_config(export_dir: str) -> None: + """Remove `quantization_config` from exported HF `config.json` if present. + + DeepSeek original checkpoints may include a `quantization_config` field in `config.json` + (describing the source checkpoint's quantization). When we export ModelOpt quantization + configs to `hf_quant_config.json`, leaving the original `quantization_config` in place can + be confusing. This function performs an in-place, best-effort cleanup in the exported + checkpoint directory. + """ + config_path = os.path.join(export_dir, "config.json") + if not os.path.exists(config_path): + return + + try: + with open(config_path) as f: + cfg = json.load(f) + except Exception as e: + print(f"Warning: Failed to read {config_path}: {e}") + return + + if not isinstance(cfg, dict) or "quantization_config" not in cfg: + return + + cfg.pop("quantization_config", None) + try: + with open(config_path, "w") as f: + json.dump(cfg, f, indent=2, sort_keys=True) + f.write("\n") + except Exception as e: + print(f"Warning: Failed to write {config_path}: {e}") + return + + def load_and_preprocess_state_dict(modelopt_state_root, world_size=8): state_dict_list = [ torch.load(f"{modelopt_state_root}/amax_dict_rank{rank}-mp{world_size}.pt") @@ -302,3 +335,5 @@ def get_tensor(tensor_name): save_root=args.fp4_path, per_layer_quant_config=per_layer_quant_config, ) + + remove_quantization_config_from_original_config(args.fp4_path) From 52a2230e46407af35dc7fea495f228b67e1a2549 Mon Sep 17 00:00:00 2001 From: Zhiyu Cheng Date: Thu, 8 Jan 2026 17:56:54 -0800 Subject: [PATCH 2/2] simplify Signed-off-by: Zhiyu Cheng --- examples/deepseek/quantize_to_nvfp4.py | 35 ++++++-------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/examples/deepseek/quantize_to_nvfp4.py b/examples/deepseek/quantize_to_nvfp4.py index 484b67e2d..a18cbbc16 100644 --- a/examples/deepseek/quantize_to_nvfp4.py +++ b/examples/deepseek/quantize_to_nvfp4.py @@ -83,36 +83,17 @@ def _remap_key(key_dict: dict[str, Any]): def remove_quantization_config_from_original_config(export_dir: str) -> None: - """Remove `quantization_config` from exported HF `config.json` if present. + """Remove `quantization_config` from exported HF `config.json`. - DeepSeek original checkpoints may include a `quantization_config` field in `config.json` - (describing the source checkpoint's quantization). When we export ModelOpt quantization - configs to `hf_quant_config.json`, leaving the original `quantization_config` in place can - be confusing. This function performs an in-place, best-effort cleanup in the exported - checkpoint directory. + Assumes the exported checkpoint directory has a `config.json` containing `quantization_config`. """ config_path = os.path.join(export_dir, "config.json") - if not os.path.exists(config_path): - return - - try: - with open(config_path) as f: - cfg = json.load(f) - except Exception as e: - print(f"Warning: Failed to read {config_path}: {e}") - return - - if not isinstance(cfg, dict) or "quantization_config" not in cfg: - return - - cfg.pop("quantization_config", None) - try: - with open(config_path, "w") as f: - json.dump(cfg, f, indent=2, sort_keys=True) - f.write("\n") - except Exception as e: - print(f"Warning: Failed to write {config_path}: {e}") - return + with open(config_path) as f: + cfg = json.load(f) + del cfg["quantization_config"] + with open(config_path, "w") as f: + json.dump(cfg, f, indent=2, sort_keys=True) + f.write("\n") def load_and_preprocess_state_dict(modelopt_state_root, world_size=8):