Skip to content

Commit 4b3d09c

Browse files
committed
pdf: tolerate missing renderer when using pdfpig
1 parent edfd1cb commit 4b3d09c

1 file changed

Lines changed: 33 additions & 2 deletions

File tree

src/MarkItDown/Converters/PdfConverter.cs

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,22 @@ private PdfExtractionResult BuildExtractionFromExtractedText(IReadOnlyList<PdfPa
405405
private async Task<PdfExtractionResult> BuildExtractionFromPdfPigAsync(byte[] pdfBytes, StreamInfo streamInfo, CancellationToken cancellationToken)
406406
{
407407
var pages = await textExtractor.ExtractTextAsync(pdfBytes, cancellationToken).ConfigureAwait(false);
408-
var pageImages = await imageRenderer.RenderImagesAsync(pdfBytes, cancellationToken).ConfigureAwait(false);
408+
409+
IReadOnlyList<string> pageImages;
410+
411+
try
412+
{
413+
pageImages = await imageRenderer.RenderImagesAsync(pdfBytes, cancellationToken).ConfigureAwait(false);
414+
}
415+
catch (OperationCanceledException)
416+
{
417+
throw;
418+
}
419+
catch
420+
{
421+
pageImages = Array.Empty<string>();
422+
}
423+
409424
return BuildExtractionFromExtractedText(pages, pageImages, streamInfo);
410425
}
411426

@@ -434,7 +449,23 @@ private async Task AppendMissingPageSnapshotsAsync(
434449
return;
435450
}
436451

437-
var renderedPages = await imageRenderer.RenderImagesAsync(pdfBytes, cancellationToken).ConfigureAwait(false);
452+
IReadOnlyList<string> renderedPages;
453+
454+
try
455+
{
456+
renderedPages = await imageRenderer.RenderImagesAsync(pdfBytes, cancellationToken).ConfigureAwait(false);
457+
}
458+
catch (OperationCanceledException)
459+
{
460+
throw;
461+
}
462+
catch
463+
{
464+
// Rendering support is optional for document intelligence; ignore failures
465+
// so that conversions can still succeed when the renderer is unavailable.
466+
return;
467+
}
468+
438469
if (renderedPages.Count == 0)
439470
{
440471
return;

0 commit comments

Comments
 (0)