diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu index 709911d2ba0..8d963eb0c36 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu @@ -42,7 +42,7 @@ __global__ void speculate_limit_thinking_content_length_kernel_v2( if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行. - if (current_limit_think_status == 3 && stop_flags[bid]) { + if (current_limit_think_status == 3 || stop_flags[bid]) { return; }