Skip to content

Commit bb7e1dd

Browse files
committed
fix: improve robustness of request handling
- Relax pre-tokenization prompt length check to use char-based estimate - Catch exceptions in detokenization receive loop to avoid crash
1 parent 4fc0835 commit bb7e1dd

2 files changed

Lines changed: 11 additions & 1 deletion

File tree

lightllm/server/detokenization/manager.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,10 @@ def handle_loop(self):
7676
for _ in range(recv_max_count):
7777
recv_obj: GroupReqIndexes = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK)
7878
assert isinstance(recv_obj, GroupReqIndexes)
79-
self._add_new_group_req_index(recv_obj=recv_obj)
79+
try:
80+
self._add_new_group_req_index(recv_obj=recv_obj)
81+
except Exception as e:
82+
logger.error(f"add new group req index has exception: {str(e)}")
8083

8184
# 当队列中存在较多的请求时,将一次接受的数量上调
8285
recv_max_count = min(int(recv_max_count * 1.3), 256)

lightllm/server/httpserver/manager.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,13 @@ async def _encode(
439439
self, prompt: Union[str, List[int]], multimodal_params: MultimodalParams, sampling_params: SamplingParams
440440
):
441441
if isinstance(prompt, str):
442+
# 宽松拦截
443+
max_prompt_chars = self.max_req_total_len * 8
444+
if len(prompt) > max_prompt_chars:
445+
raise ValueError(
446+
f"prompt text length {len(prompt)} exceeds the character limit {max_prompt_chars}, "
447+
f"the request is rejected before tokenization."
448+
)
442449
if self.enable_multimodal:
443450
assert (
444451
len(multimodal_params.images + multimodal_params.audios) <= self.args.cache_capacity

0 commit comments

Comments
 (0)