1616 calculate_upload_timeout ,
1717)
1818
19- LLAMA_TIER_BY_MODE = {
20- "basic" : "cost_effective " ,
21- "premium" : "agentic_plus " ,
19+ LLAMA_PARSE_MODE_MAP = {
20+ "basic" : "parse_page_with_llm " ,
21+ "premium" : "parse_page_with_agent " ,
2222}
2323
2424
25+ def _extract_content (result ) -> str :
26+ """Pull markdown text out of whatever object LlamaParse.aparse returns."""
27+ if hasattr (result , "get_markdown_documents" ):
28+ markdown_docs = result .get_markdown_documents (split_by_page = False )
29+ if markdown_docs and hasattr (markdown_docs [0 ], "text" ):
30+ return markdown_docs [0 ].text
31+ if hasattr (result , "pages" ) and result .pages :
32+ return "\n \n " .join (p .md for p in result .pages if hasattr (p , "md" ) and p .md )
33+
34+ if isinstance (result , list ):
35+ if result and hasattr (result [0 ], "text" ):
36+ return result [0 ].text
37+ return "\n \n " .join (
38+ doc .page_content if hasattr (doc , "page_content" ) else str (doc )
39+ for doc in result
40+ )
41+
42+ return str (result )
43+
44+
2545async def parse_with_llamacloud (
2646 file_path : str , estimated_pages : int , processing_mode : str = "basic"
2747) -> str :
2848 from llama_cloud_services import LlamaParse
49+ from llama_cloud_services .parse .base import JobFailedException
2950 from llama_cloud_services .parse .utils import ResultType
3051
3152 file_size_bytes = os .path .getsize (file_path )
@@ -41,12 +62,13 @@ async def parse_with_llamacloud(
4162 pool = 120.0 ,
4263 )
4364
44- tier = LLAMA_TIER_BY_MODE .get (processing_mode , "cost_effective " )
65+ parse_mode = LLAMA_PARSE_MODE_MAP .get (processing_mode , "parse_page_with_llm " )
4566
4667 logging .info (
4768 f"LlamaCloud upload configured: file_size={ file_size_mb :.1f} MB, "
4869 f"pages={ estimated_pages } , upload_timeout={ upload_timeout :.0f} s, "
49- f"job_timeout={ job_timeout :.0f} s, tier={ tier } (mode={ processing_mode } )"
70+ f"job_timeout={ job_timeout :.0f} s, parse_mode={ parse_mode } "
71+ f"(mode={ processing_mode } )"
5072 )
5173
5274 last_exception = None
@@ -61,11 +83,12 @@ async def parse_with_llamacloud(
6183 verbose = True ,
6284 language = "en" ,
6385 result_type = ResultType .MD ,
86+ parse_mode = parse_mode ,
87+ ignore_errors = False ,
6488 max_timeout = int (max (2000 , job_timeout + upload_timeout )),
6589 job_timeout_in_seconds = job_timeout ,
6690 job_timeout_extra_time_per_page_in_seconds = PER_PAGE_JOB_TIMEOUT ,
6791 custom_client = custom_client ,
68- tier = tier ,
6992 )
7093 result = await parser .aparse (file_path )
7194
@@ -75,27 +98,18 @@ async def parse_with_llamacloud(
7598 f"{ len (attempt_errors )} failures"
7699 )
77100
78- if hasattr (result , "get_markdown_documents" ):
79- markdown_docs = result .get_markdown_documents (split_by_page = False )
80- if markdown_docs and hasattr (markdown_docs [0 ], "text" ):
81- return markdown_docs [0 ].text
82- if hasattr (result , "pages" ) and result .pages :
83- return "\n \n " .join (
84- p .md for p in result .pages if hasattr (p , "md" ) and p .md
85- )
86- return str (result )
87-
88- if isinstance (result , list ):
89- if result and hasattr (result [0 ], "text" ):
90- return result [0 ].text
91- return "\n \n " .join (
92- doc .page_content if hasattr (doc , "page_content" ) else str (doc )
93- for doc in result
101+ content = _extract_content (result )
102+ if not content or not content .strip ():
103+ raise RuntimeError (
104+ "LlamaCloud returned empty/whitespace-only content"
94105 )
106+ return content
95107
96- return str (result )
97-
98- except LLAMACLOUD_RETRYABLE_EXCEPTIONS as e :
108+ except (
109+ * LLAMACLOUD_RETRYABLE_EXCEPTIONS ,
110+ RuntimeError ,
111+ JobFailedException ,
112+ ) as e :
99113 last_exception = e
100114 error_type = type (e ).__name__
101115 error_msg = str (e )[:200 ]
0 commit comments