fix: improve robustness of request handling

sufubao · sufubao · commit bb7e1ddd0505 · 2026-04-01T11:12:45.000Z
- Relax pre-tokenization prompt length check to use char-based estimate
- Catch exceptions in detokenization receive loop to avoid crash
diff --git a/lightllm/server/detokenization/manager.py b/lightllm/server/detokenization/manager.py
@@ -76,7 +76,10 @@ def handle_loop(self):
                     for _ in range(recv_max_count):
                         recv_obj: GroupReqIndexes = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK)
                         assert isinstance(recv_obj, GroupReqIndexes)
-                        self._add_new_group_req_index(recv_obj=recv_obj)
+                        try:
+                            self._add_new_group_req_index(recv_obj=recv_obj)
+                        except Exception as e:
+                            logger.error(f"add new group req index has exception: {str(e)}")
 
                     # 当队列中存在较多的请求时，将一次接受的数量上调
                     recv_max_count = min(int(recv_max_count * 1.3), 256)
diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py
@@ -439,6 +439,13 @@ async def _encode(
         self, prompt: Union[str, List[int]], multimodal_params: MultimodalParams, sampling_params: SamplingParams
     ):
         if isinstance(prompt, str):
+            # 宽松拦截
+            max_prompt_chars = self.max_req_total_len * 8
+            if len(prompt) > max_prompt_chars:
+                raise ValueError(
+                    f"prompt text length {len(prompt)} exceeds the character limit {max_prompt_chars}, "
+                    f"the request is rejected before tokenization."
+                )
             if self.enable_multimodal:
                 assert (
                     len(multimodal_params.images + multimodal_params.audios) <= self.args.cache_capacity