fix dyname load bug

yuanlehome · yuanlehome · commit 71872d69d2e0 · 2025-12-11T23:57:38.000+08:00
diff --git a/fastdeploy/model_executor/layers/quantization/kv_cache.py b/fastdeploy/model_executor/layers/quantization/kv_cache.py
@@ -263,10 +263,11 @@ def process_weights_after_loading(self, layer: nn.Layer):
         """
         use for loader v1
         """
-        if layer.cache_k_scale._is_initialized():
-            layer.cache_k_out_scale.set_value(1 / layer.cache_k_scale)
-        if layer.cache_v_scale._is_initialized():
-            layer.cache_v_out_scale.set_value(1 / layer.cache_v_scale)
+        if "block_wise" not in layer.cache_quant_type_str:
+            if layer.cache_k_scale._is_initialized():
+                layer.cache_k_out_scale.set_value(1 / layer.cache_k_scale)
+            if layer.cache_v_scale._is_initialized():
+                layer.cache_v_out_scale.set_value(1 / layer.cache_v_scale)
 
     def apply(self, layer):
         """
diff --git a/fastdeploy/model_executor/model_loader/default_loader.py b/fastdeploy/model_executor/model_loader/default_loader.py
@@ -99,6 +99,7 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer:
     def load_rl_mock_model(self, fd_config: FDConfig) -> nn.Layer:
         """use for rl model load"""
         # (TODO:gaoziyuan) optimze
+        assert fd_config.load_config.load_strategy == "normal", fd_config.load_config.load_strategy
         original_architectures = fd_config.model_config.architectures[0]
         logger.info(f"Starting to load model {original_architectures}.")
 
@@ -110,16 +111,15 @@ def load_rl_mock_model(self, fd_config: FDConfig) -> nn.Layer:
             model_architectures = original_architectures.replace("Ernie5ForCausalLM", "Ernie5MTPForCausalLM")
 
         model_architectures += "RL"
-        context = paddle.LazyGuard()
+        context = contextlib.nullcontext()
 
         with context:
             model_cls = ModelRegistry.get_class(model_architectures)
             model = model_cls(fd_config)
 
         model.eval()
 
-        if fd_config.load_config.load_strategy == "normal":
-            # normal strategy need load weight and architectures need without "RL"
-            self.load_weights(model, fd_config, original_architectures)
+        # normal strategy need load weight and architectures need without "RL"
+        self.load_weights(model, fd_config, original_architectures)
         # RL model not need set_state_dict
         return model
diff --git a/fastdeploy/model_executor/model_loader/default_loader_v1.py b/fastdeploy/model_executor/model_loader/default_loader_v1.py
@@ -102,6 +102,7 @@ def load_model(self, fd_config: FDConfig) -> nn.Layer:
     def load_rl_mock_model(self, fd_config: FDConfig) -> nn.Layer:
         """use for rl model load"""
         # (TODO:gaoziyuan) optimze
+        assert fd_config.load_config.load_strategy == "normal", fd_config.load_config.load_strategy
         original_architectures = fd_config.model_config.architectures[0]
 
         import fastdeploy.rl  # noqa
@@ -120,8 +121,7 @@ def load_rl_mock_model(self, fd_config: FDConfig) -> nn.Layer:
 
         model.eval()
 
-        if fd_config.load_config.load_strategy == "normal":
-            # normal strategy need load weight and architectures need without "RL"
-            self.load_weights(model, fd_config, original_architectures)
+        # normal strategy need load weight and architectures need without "RL"
+        self.load_weights(model, fd_config, original_architectures)
         # RL model not need set_state_dict
         return model
diff --git a/fastdeploy/rl/dynamic_weight_manager.py b/fastdeploy/rl/dynamic_weight_manager.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 """
 
+import gc
 import os
 import time
 from multiprocessing.shared_memory import SharedMemory
@@ -107,8 +108,12 @@ def _normal_load_weight(self):
         from fastdeploy.model_executor.model_loader import get_model_loader
 
         model_loader = get_model_loader(load_config=self.fd_config.load_config)
-        state_dict = model_loader.load_rl_mock_model(fd_config=self.fd_config).state_dict()
+        model = model_loader.load_rl_mock_model(fd_config=self.fd_config)
+        state_dict = model.state_dict()
         self._update_model_from_state(state_dict, "raw")
+        del model
+        del state_dict
+        gc.collect()
 
     def _update_ipc_snapshot(self):
         """Update using IPC snapshot strategy for elastic recovery."""