1+ using System ;
2+ using System . IO ;
3+ using System . Net . Http ;
4+ using System . Net . Http . Headers ;
5+ using System . Threading . Tasks ;
6+ using Microsoft . Extensions . Logging ;
7+ using Microsoft . KernelMemory . Diagnostics ;
8+ using System . Text . Json ;
9+ using System . Text . Json . Serialization ;
10+
11+ /// <summary>
12+ /// As for https://docs.cloud.llamaindex.ai/API/upload-file-api-v-1-parsing-upload-post
13+ /// </summary>
14+ public class LLamaCloudParserClient
15+ {
16+ private readonly HttpClient _httpClient ;
17+ private readonly ILogger < LLamaCloudParserClient > _log ;
18+ private readonly string _apiKey ;
19+ private readonly string _baseUrl ;
20+
21+ public LLamaCloudParserClient (
22+ CloudParserConfiguration config ,
23+ HttpClient httpClient ,
24+ ILogger < LLamaCloudParserClient > ? log = null )
25+ {
26+ if ( String . IsNullOrEmpty ( config . ApiKey ) )
27+ {
28+ throw new ArgumentException ( "ApiKey is required" , nameof ( config . ApiKey ) ) ;
29+ }
30+
31+ this . _httpClient = httpClient ;
32+ _log = log ?? DefaultLogger < LLamaCloudParserClient > . Instance ;
33+ _apiKey = config . ApiKey ;
34+ _baseUrl = config . BaseUrl ! ;
35+ }
36+
37+ public async Task < UploadResponse > UploadAsync (
38+ Stream fileContent ,
39+ string fileName ,
40+ UploadParameters ? parameters = null )
41+ {
42+ var requestUri = $ "{ _baseUrl . TrimEnd ( '/' ) } /api/v1/parsing/upload";
43+ using var request = new HttpRequestMessage ( HttpMethod . Post , requestUri ) ;
44+
45+ request . Headers . Accept . Add ( new MediaTypeWithQualityHeaderValue ( "application/json" ) ) ;
46+ request . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , _apiKey ) ;
47+
48+ var multipartContent = new MultipartFormDataContent ( ) ;
49+ var streamContent = new StreamContent ( fileContent ) ;
50+ multipartContent . Add ( streamContent , "file" , fileName ) ;
51+
52+ if ( parameters != null )
53+ {
54+ foreach ( var prop in typeof ( UploadParameters ) . GetProperties ( ) )
55+ {
56+ var value = prop . GetValue ( parameters ) ;
57+ if ( value != null )
58+ {
59+ if ( value is bool boolValue )
60+ {
61+ multipartContent . Add ( new StringContent ( boolValue . ToString ( ) . ToLower ( ) ) , prop . Name ) ;
62+ }
63+ else if ( value is string [ ] arrayValue )
64+ {
65+ multipartContent . Add ( new StringContent ( string . Join ( "," , arrayValue ) ) , prop . Name ) ;
66+ }
67+ else
68+ {
69+ multipartContent . Add ( new StringContent ( value . ToString ( ) ! ) , prop . Name ) ;
70+ }
71+ }
72+ }
73+ }
74+
75+ request . Content = multipartContent ;
76+
77+ using var response = await _httpClient . SendAsync ( request ) ;
78+ response . EnsureSuccessStatusCode ( ) ;
79+
80+ var jsonResponse = await response . Content . ReadAsStringAsync ( ) ;
81+ return JsonSerializer . Deserialize < UploadResponse > ( jsonResponse )
82+ ?? throw new InvalidOperationException ( "Failed to parse response" ) ;
83+ }
84+ }
85+
86+ public class CloudParserConfiguration
87+ {
88+ public string ? ApiKey { get ; internal set ; }
89+ public string ? BaseUrl { get ; internal set ; }
90+ }
91+
92+ public class UploadParameters
93+ {
94+ public string ? ProjectId { get ; set ; }
95+ public string ? OrganizationId { get ; set ; }
96+ public bool AnnotateLinks { get ; set ; }
97+ public bool AutoMode { get ; set ; }
98+ public bool AutoModeTriggerOnImageInPage { get ; set ; }
99+ public bool AutoModeTriggerOnTableInPage { get ; set ; }
100+ public string ? AutoModeTriggerOnTextInPage { get ; set ; }
101+ public string ? AutoModeTriggerOnRegexpInPage { get ; set ; }
102+ public string ? AzureOpenAiApiVersion { get ; set ; }
103+ public string ? AzureOpenAiDeploymentName { get ; set ; }
104+ public string ? AzureOpenAiEndpoint { get ; set ; }
105+ public string ? AzureOpenAiKey { get ; set ; }
106+ public float ? BboxBottom { get ; set ; }
107+ public float ? BboxLeft { get ; set ; }
108+ public float ? BboxRight { get ; set ; }
109+ public float ? BboxTop { get ; set ; }
110+ public bool ContinuousMode { get ; set ; }
111+ public bool DisableOcr { get ; set ; }
112+ public bool DisableReconstruction { get ; set ; }
113+ public bool DisableImageExtraction { get ; set ; }
114+ public bool DoNotCache { get ; set ; }
115+ public bool DoNotUnrollColumns { get ; set ; }
116+ public bool ExtractCharts { get ; set ; }
117+ public bool FastMode { get ; set ; }
118+ public bool GuessXlsxSheetName { get ; set ; }
119+ public bool HtmlMakeAllElementsVisible { get ; set ; }
120+ public bool HtmlRemoveFixedElements { get ; set ; }
121+ public bool HtmlRemoveNavigationElements { get ; set ; }
122+ public string ? HttpProxy { get ; set ; }
123+ public string ? InputS3Path { get ; set ; }
124+ public string ? InputUrl { get ; set ; }
125+ public bool InvalidateCache { get ; set ; }
126+ public bool IsFormattingInstruction { get ; set ; } = true ;
127+ public string [ ] ? Language { get ; set ; } = new [ ] { "en" } ;
128+ public bool ExtractLayout { get ; set ; }
129+ public object ? MaxPages { get ; set ; }
130+ public bool OutputPdfOfDocument { get ; set ; }
131+ public string ? OutputS3PathPrefix { get ; set ; }
132+ public string ? PagePrefix { get ; set ; }
133+ public string ? PageSeparator { get ; set ; }
134+ public string ? PageSuffix { get ; set ; }
135+ public string ? ParsingInstruction { get ; set ; }
136+ public bool PremiumMode { get ; set ; }
137+ public bool SkipDiagonalText { get ; set ; }
138+ public bool StructuredOutput { get ; set ; }
139+ public string ? StructuredOutputJsonSchema { get ; set ; }
140+ public string ? StructuredOutputJsonSchemaName { get ; set ; }
141+ public bool TakeScreenshot { get ; set ; }
142+ public string ? TargetPages { get ; set ; }
143+ public bool UseVendorMultimodalModel { get ; set ; }
144+ public string ? VendorMultimodalApiKey { get ; set ; }
145+ public string ? VendorMultimodalModelName { get ; set ; }
146+ public string ? WebhookUrl { get ; set ; }
147+ public string ? BoundingBox { get ; set ; }
148+ public bool Gpt4OMode { get ; set ; }
149+ public string ? Gpt4OApiKey { get ; set ; }
150+ }
151+
152+ public class UploadResponse
153+ {
154+ [ JsonPropertyName ( "id" ) ]
155+ public Guid Id { get ; set ; }
156+
157+ [ JsonPropertyName ( "status" ) ]
158+ [ JsonConverter ( typeof ( JsonStringEnumConverter ) ) ]
159+ public UploadStatus Status { get ; set ; }
160+
161+ [ JsonPropertyName ( "error_code" ) ]
162+ public object ? ErrorCode { get ; set ; }
163+
164+ [ JsonPropertyName ( "error_message" ) ]
165+ public object ? ErrorMessage { get ; set ; }
166+ }
167+
168+ public enum UploadStatus
169+ {
170+ PENDING ,
171+ SUCCESS ,
172+ ERROR ,
173+ PARTIAL_SUCCESS
174+ }
0 commit comments