Move all insecure modelopt state loading to single function

kevalmorabia97 · kevalmorabia97 · commit 1523a7411345 · 2026-02-27T04:41:17.000-08:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/docs/source/guides/2_save_load.rst b/docs/source/guides/2_save_load.rst
@@ -129,9 +129,7 @@ Here is the example workflow of restoring the ModelOpt-modified model architectu
     model = ...
 
     # Restore the model architecture using the saved `modelopt_state`
-    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-    modelopt_state = torch.load("modelopt_state.pth", weights_only=False)
-    model = mto.restore_from_modelopt_state(model, modelopt_state)
+    model = mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_state.pth")
 
     # Load the model weights separately after restoring the model architecture
     custom_method_to_load_model_weights(model)
diff --git a/examples/diffusers/distillation/distillation_trainer.py b/examples/diffusers/distillation/distillation_trainer.py
@@ -591,10 +591,9 @@ def _apply_modelopt_quantization(self) -> None:
                         f"Resuming: restoring quantization architecture from "
                         f"{modelopt_state_path} (weights loaded later by accelerator)"
                     )
-                    # Security NOTE: weights_only=False is used on ModelOpt-generated state,
-                    # not on untrusted user input.
-                    state = torch.load(modelopt_state_path, weights_only=False, map_location="cpu")
-                    self._transformer = mto.restore_from_modelopt_state(self._transformer, state)
+                    self._transformer = mto.restore_from_modelopt_state(
+                        self._transformer, modelopt_state_path=modelopt_state_path
+                    )
                     logger.info("Quantization architecture restored for resume")
                     return
                 else:
diff --git a/examples/llm_qat/README.md b/examples/llm_qat/README.md
@@ -81,7 +81,7 @@ torch.save(mto.modelopt_state(model), "modelopt_quantizer_states.pt")
 
 # To resume training from a checkpoint or load the final QAT model for evaluation,
 # load the quantizer states before loading the model weights
-# mto.restore_from_modelopt_state(model, torch.load("modelopt_quantizer_states.pt", weights_only=False))
+# mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_quantizer_states.pt")
 # After loading the quantizer states, load the model weights
 # model.load_state_dict(state_dict_from_last_checkpoint)
 
diff --git a/examples/llm_qat/export.py b/examples/llm_qat/export.py
@@ -18,7 +18,6 @@
 import warnings
 from pathlib import Path
 
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 import modelopt.torch.opt as mto
@@ -51,8 +50,7 @@ def get_model(
 
     # Restore modelopt state for LoRA models. For QAT/QAD models from_pretrained call handles this
     if hasattr(model, "peft_config"):
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        modelopt_state = torch.load(f"{ckpt_path}/modelopt_state_train.pth", weights_only=False)
+        modelopt_state = mto.load_modelopt_state(f"{ckpt_path}/modelopt_state_train.pth")
         restore_from_modelopt_state(model, modelopt_state)
         print_rank_0("Restored modelopt state")
 
diff --git a/modelopt/torch/opt/conversion.py b/modelopt/torch/opt/conversion.py
@@ -51,6 +51,7 @@
 __all__ = [
     "ModeloptStateManager",
     "apply_mode",
+    "load_modelopt_state",
     "modelopt_state",
     "restore",
     "restore_from_modelopt_state",
@@ -512,7 +513,29 @@ def save(model: nn.Module, f: str | os.PathLike | BinaryIO, **kwargs) -> None:
     torch.save(ckpt_dict, f, **kwargs)
 
 
-def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]) -> nn.Module:
+def load_modelopt_state(modelopt_state_path: str | os.PathLike, **kwargs) -> dict[str, Any]:
+    """Load the modelopt state from a file.
+
+    Args:
+        modelopt_state_path: Target file location.
+        **kwargs: additional args for ``torch.load()``.
+
+    Returns:
+        A modelopt state dictionary describing the modifications to the model.
+    """
+    # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
+    kwargs.setdefault("weights_only", False)
+    kwargs.setdefault("map_location", "cpu")
+    # TODO: Add some validation to ensure the file is a valid modelopt state file.
+    modelopt_state = torch.load(modelopt_state_path, **kwargs)
+    return modelopt_state
+
+
+def restore_from_modelopt_state(
+    model: ModelLike,
+    modelopt_state: dict[str, Any] | None = None,
+    modelopt_state_path: str | os.PathLike | None = None,
+) -> nn.Module:
     """Restore the model architecture from the modelopt state dictionary based on the user-provided model.
 
     This method does not restore the model parameters such as weights, biases and quantization scales.
@@ -526,10 +549,7 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         model = ...  # Create the model-like object
 
         # Restore the previously saved modelopt state followed by model weights
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        mto.restore_from_modelopt_state(
-            model, torch.load("modelopt_state.pt", weights_only=False)
-        )  # Restore modelopt state
+        mto.restore_from_modelopt_state(model, modelopt_state_path="modelopt_state.pt")
         model.load_state_dict(torch.load("model_weights.pt"), ...)  # Load the model weights
 
     If you want to restore the model weights and the modelopt state with saved scales, please use
@@ -543,11 +563,21 @@ def restore_from_modelopt_state(model: ModelLike, modelopt_state: dict[str, Any]
         modelopt_state: The modelopt state dict describing the modelopt modifications to the model. The
             ``modelopt_state`` can be generated via
             :meth:`mto.modelopt_state()<modelopt.torch.opt.conversion.modelopt_state>`.
+            Cannot be used with modelopt_state_path.
+        modelopt_state_path: The path to the modelopt state file.
+            Cannot be used with modelopt_state.
 
     Returns:
         A modified model architecture based on the restored modifications with the unmodified
         weights as stored in the provided ``model`` argument.
     """
+    assert (modelopt_state is not None) != (modelopt_state_path is not None), (
+        "Either modelopt_state or modelopt_state_path must be provided, but not both."
+    )
+    if modelopt_state_path is not None:
+        modelopt_state = load_modelopt_state(modelopt_state_path)
+    assert modelopt_state, "modelopt_state is required!"
+
     # initialize ModelLikeModule if needed.
     model = model if isinstance(model, nn.Module) else ModelLikeModule(model)
 
diff --git a/modelopt/torch/opt/plugins/huggingface.py b/modelopt/torch/opt/plugins/huggingface.py
@@ -79,10 +79,8 @@ def new_init_fn(self, *args, **kwargs):
         modelopt_state_path = _get_modelopt_state_path(model_path)
         _original__init__(self, *args, **kwargs)
         if os.path.isfile(modelopt_state_path):
-            # Security NOTE: weights_only=False is used on ModelOpt-generated state_dict, not on untrusted user input
-            modelopt_state = torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
             with extra_context() if extra_context else nullcontext():
-                restore_from_modelopt_state(self, modelopt_state)
+                restore_from_modelopt_state(self, modelopt_state_path=modelopt_state_path)
 
             print_rank_0(f"Restored ModelOpt state from {modelopt_state_path}")
 
diff --git a/modelopt/torch/opt/plugins/peft.py b/modelopt/torch/opt/plugins/peft.py
@@ -72,10 +72,7 @@ def _new_load_adapter(self, model_id, adapter_name, *args, **kwargs):
         assert adapter_name in self.peft_config, (
             f"ModelOpt modified model should have adapter_name={adapter_name} in peft_config"
         )
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        restore_from_modelopt_state(
-            self, torch.load(modelopt_state_path, map_location="cpu", weights_only=False)
-        )
+        restore_from_modelopt_state(self, modelopt_state_path=modelopt_state_path)
 
     outputs = self._modelopt_cache["load_adapter"](self, model_id, adapter_name, *args, **kwargs)
 
diff --git a/modelopt/torch/quantization/plugins/transformers_trainer.py b/modelopt/torch/quantization/plugins/transformers_trainer.py
@@ -28,7 +28,6 @@
 import modelopt.torch.opt as mto
 import modelopt.torch.quantization as mtq
 from modelopt.torch.distill.plugins.huggingface import KDTrainer
-from modelopt.torch.opt.conversion import restore_from_modelopt_state
 from modelopt.torch.opt.plugins import ModelOptHFTrainer
 from modelopt.torch.utils import print_rank_0
 
@@ -233,10 +232,9 @@ def _save_modelopt_state_with_weights(self):
         print_rank_0(f"Saved modelopt state to {self._modelopt_state_path}")
 
     def _restore_modelopt_state_with_weights(self):
-        # Security NOTE: weights_only=False is used here on ModelOpt-generated state_dict, not on untrusted user input
-        modelopt_state = torch.load(self._modelopt_state_path, weights_only=False)
+        modelopt_state = mto.load_modelopt_state(self._modelopt_state_path)
         modelopt_weights = modelopt_state.pop("modelopt_state_weights", None)
-        restore_from_modelopt_state(self.model, modelopt_state)
+        mto.restore_from_modelopt_state(self.model, modelopt_state)
         if modelopt_weights is not None:
             set_quantizer_state_dict(self.model, modelopt_weights)
         print_rank_0("Restored modelopt state with weights.")