diff --git a/fastdeploy/model_executor/layers/quantization/kv_cache.py b/fastdeploy/model_executor/layers/quantization/kv_cache.py index cd461fde799..2ccc06f0e45 100644 --- a/fastdeploy/model_executor/layers/quantization/kv_cache.py +++ b/fastdeploy/model_executor/layers/quantization/kv_cache.py @@ -263,10 +263,11 @@ def process_weights_after_loading(self, layer: nn.Layer): """ use for loader v1 """ - if layer.cache_k_scale._is_initialized(): - layer.cache_k_out_scale.set_value(1 / layer.cache_k_scale) - if layer.cache_v_scale._is_initialized(): - layer.cache_v_out_scale.set_value(1 / layer.cache_v_scale) + if "block_wise" not in layer.cache_quant_type_str: + if layer.cache_k_scale._is_initialized(): + layer.cache_k_out_scale.set_value(1 / layer.cache_k_scale) + if layer.cache_v_scale._is_initialized(): + layer.cache_v_out_scale.set_value(1 / layer.cache_v_scale) def apply(self, layer): """