Skip to content

Commit 00d303e

Browse files
committed
disable prefix cache
1 parent cf5ce6d commit 00d303e

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

fastdeploy/config.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1691,7 +1691,9 @@ def postprocess(self):
16911691
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
16921692
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
16931693
self.cache_config.enable_prefix_caching = False
1694-
1694+
if self.routing_replay_config.enable_routing_replay:
1695+
# TODO(gongshaotian): R3 support prefix caching
1696+
self.cache_config.enable_prefix_caching = False
16951697
if (
16961698
self.structured_outputs_config is not None
16971699
and self.structured_outputs_config.guided_decoding_backend != "off"

0 commit comments

Comments
 (0)