@@ -257,8 +257,8 @@ chats\_to\_tokens
257257
258258``` python
259259chats_to_tokens(
260- chat: Chat | None ,
261- tokenizer: AutoTokenizer ,
260+ chat: Chat,
261+ tokenizer: PreTrainedTokenizerBase ,
262262 * ,
263263 apply_chat_template_kwargs: dict[str , Any]
264264 | None = None ,
@@ -272,10 +272,10 @@ Transform a chat into a tokenized format with structured slices.
272272** Parameters:**
273273
274274* ** ` chat ` **
275- (` Chat | None ` )
275+ (` Chat ` )
276276 –The chat object to tokenize.
277277* ** ` tokenizer ` **
278- (` AutoTokenizer ` )
278+ (` PreTrainedTokenizerBase ` )
279279 –The tokenizer to use for encoding and decoding.
280280
281281** Returns:**
@@ -286,8 +286,8 @@ Transform a chat into a tokenized format with structured slices.
286286<Accordion title = " Source code in rigging/data.py" icon = " code" >
287287``` python
288288async def chats_to_tokens (
289- chat : Chat | None ,
290- tokenizer : AutoTokenizer ,
289+ chat : Chat,
290+ tokenizer : " PreTrainedTokenizerBase " ,
291291 * ,
292292 apply_chat_template_kwargs : dict[str , t.Any] | None = None ,
293293 encode_kwargs : dict[str , t.Any] | None = None ,
@@ -323,8 +323,9 @@ async def chats_to_tokens(
323323 if chat.params and chat.params.tools
324324 else None
325325 )
326+ # the tools above return dict[str, Any], but Transformers expects list[dict[Any, Any]]
326327
327- chat_text = tokenizer.apply_chat_template(messages, tools = tools, ** apply_chat_template_kwargs)
328+ chat_text = tokenizer.apply_chat_template(messages, tools = tools, ** apply_chat_template_kwargs) # type: ignore [ arg - type ]
328329 chat_tokens = tokenizer.encode(chat_text, ** encode_kwargs)
329330
330331 slices: list[TokenSlice] = []
@@ -334,7 +335,13 @@ async def chats_to_tokens(
334335 for message in chat.all:
335336 # Find this message
336337 if not (
337- match := find_in_tokens(message.content, chat_tokens, tokenizer.decode, 0 , search_start)
338+ match := find_in_tokens(
339+ message.content,
340+ chat_tokens,
341+ lambda tokens : tokenizer.decode(tokens),
342+ 0 ,
343+ search_start,
344+ )
338345 ):
339346 warnings.warn(
340347 f " Warning: Could not find message ' { message.content[:50 ]} ...' in chat tokens " ,
@@ -370,7 +377,7 @@ async def chats_to_tokens(
370377 part_match = find_in_tokens(
371378 part_text,
372379 message_tokens,
373- tokenizer.decode,
380+ lambda tokens : tokenizer.decode(tokens) ,
374381 msg_start,
375382 part_search_start,
376383 )
@@ -399,8 +406,9 @@ async def chats_to_tokens(
399406 # Continue searching after this message
400407 search_start = msg_end
401408
409+ # we ask for a string by default in apply_chat_template_kwargs with the tokenize=False
402410 return TokenizedChat(
403- text = chat_text,
411+ text = chat_text, # type: ignore [ arg - type ]
404412 tokens = chat_tokens,
405413 slices = slices,
406414 obj = chat,
0 commit comments