From fc69c25be57ca55bae586cf32d65f121ae1079ec Mon Sep 17 00:00:00 2001 From: rainyfly <1435317881@qq.com> Date: Thu, 11 Dec 2025 14:21:52 +0800 Subject: [PATCH] Fix bug for caching output when preempted --- fastdeploy/output/token_processor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 6286451d9eb..b1b2838172c 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -735,7 +735,9 @@ def _process_batch_output(self): and self.cfg.cache_config.enable_prefix_caching and self.cfg.cache_config.enable_output_caching ): - if (task.num_total_tokens - 1) % self.cfg.cache_config.block_size == 0: + if (task.num_total_tokens - 1) % self.cfg.cache_config.block_size == 0 and ( + task_id not in self.resource_manager.to_be_rescheduled_request_id_set + ): self.resource_manager.cache_output_tokens( task ) # when enable prefix caching, cache kv cache for output tokens