Skip to content

Commit dbedb07

Browse files
ST-XXCopilot
andauthored
[BugFix] reschedule_preempt_task append waiting & PREEMPTED blocksize (#5506)
* bugfix reschedule_preempt_task append waiting & PREEMPTED blocksize * bugfix reschedule_preempt_task append waiting & PREEMPTED blocksize * 注释 * [bugfix] PREEMPTED task blocksize * Apply suggestion from @Copilot Co-authored-by: Copilot <[email protected]> --------- Co-authored-by: Copilot <[email protected]>
1 parent a389bb7 commit dbedb07

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_re
261261
self.running.insert(0, preempted_req)
262262
continue
263263
preempted_req.status = RequestStatus.PREEMPTED
264+
preempted_req.last_preempted_blocksize = len(preempted_req.block_tables)
264265
preempted_req.num_computed_tokens = 0
265266
if self.config.scheduler_config.splitwise_role == "decode":
266267
self.tasks_list[preempted_req.idx] = None
@@ -735,6 +736,14 @@ def _allocate_decode_and_extend():
735736
break
736737
num_new_tokens = self._get_num_new_tokens(request, token_budget)
737738
num_new_block = self.get_new_block_nums(request, num_new_tokens)
739+
# If num_new_block is less than the last preempted block size, use the last preempted block size instead.
740+
# For normal requests, when allocating blocks, we reserve two extra blocks for decoding.
741+
# In the request rescheduling scenario, we currently only consider the number of tokens already generated,
742+
# which might lead to allocating fewer blocks than the previous allocation, causing repeated rescheduling.
743+
# This adjustment ensures we at least allocate as many blocks as before to avoid this issue.
744+
last_preempted_blocksize = getattr(request, "last_preempted_blocksize", 0)
745+
if num_new_block < last_preempted_blocksize:
746+
num_new_block = last_preempted_blocksize
738747
# Allocate blocks to prefill
739748
if self.cache_manager.can_allocate_gpu_blocks(num_new_block):
740749
if not request.get("skip_allocate", False):

0 commit comments

Comments
 (0)