[fix] The cached token content is incorrectly truncated bug

Abandon-ht · Abandon-ht · commit f95e4fa3c6d6 · 2025-02-06T16:47:50.000+08:00
diff --git a/projects/llm_framework/main_llm/src/runner/LLM.hpp b/projects/llm_framework/main_llm/src/runner/LLM.hpp
@@ -580,7 +580,7 @@ class LLM
                 if (_attr.runing_callback)
                 {
                     cached_token.push_back(max_index);
-                    if (cached_token.size() >= 3)
+                    if (cached_token.size() >= 5)
                     {
                         float t_cost_ms = t_cost.cost();
                         float token_per_sec = token_ids.size() / (t_cost_ms / 1000);

Original file line number	Diff line number	Diff line change
`@@ -580,7 +580,7 @@ class LLM`
`580`	`580`	`if (_attr.runing_callback)`
`581`	`581`	`{`
`582`	`582`	`cached_token.push_back(max_index);`
`583`		`- if (cached_token.size() >= 3)`
	`583`	`+ if (cached_token.size() >= 5)`
`584`	`584`	`{`
`585`	`585`	`float t_cost_ms = t_cost.cost();`
`586`	`586`	`float token_per_sec = token_ids.size() / (t_cost_ms / 1000);`