We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent cf5ce6d commit 00d303eCopy full SHA for 00d303e
fastdeploy/config.py
@@ -1691,7 +1691,9 @@ def postprocess(self):
1691
self.cache_config.postprocess(self.scheduler_config.max_num_batched_tokens, self.scheduler_config.max_num_seqs)
1692
if self.model_config is not None and self.model_config.enable_mm and not envs.ENABLE_V1_KVCACHE_SCHEDULER:
1693
self.cache_config.enable_prefix_caching = False
1694
-
+ if self.routing_replay_config.enable_routing_replay:
1695
+ # TODO(gongshaotian): R3 support prefix caching
1696
+ self.cache_config.enable_prefix_caching = False
1697
if (
1698
self.structured_outputs_config is not None
1699
and self.structured_outputs_config.guided_decoding_backend != "off"
0 commit comments