Skip to content

Commit 4bd64a7

Browse files
committed
packages
1 parent e9d3e8e commit 4bd64a7

7 files changed

Lines changed: 221 additions & 38 deletions

File tree

Directory.Build.props

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
<Nullable>enable</Nullable>
77
<EnableNETAnalyzers>true</EnableNETAnalyzers>
88
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
9+
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
910
</PropertyGroup>
1011

1112
<PropertyGroup>
@@ -30,7 +31,7 @@
3031
</PropertyGroup>
3132

3233
<ItemGroup>
33-
<PackageReference Update="DotNet.ReproducibleBuilds" Version="1.2.25">
34+
<PackageReference Update="DotNet.ReproducibleBuilds">
3435
<PrivateAssets>all</PrivateAssets>
3536
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
3637
</PackageReference>

Directory.Packages.props

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<Project>
2+
<ItemGroup>
3+
<PackageVersion Include="AngleSharp" Version="1.3.0" />
4+
<PackageVersion Include="AWSSDK.Rekognition" Version="4.0.2.6" />
5+
<PackageVersion Include="AWSSDK.S3" Version="4.0.7.7" />
6+
<PackageVersion Include="AWSSDK.Textract" Version="4.0.2.6" />
7+
<PackageVersion Include="AWSSDK.TranscribeService" Version="4.0.3.9" />
8+
<PackageVersion Include="Azure.AI.FormRecognizer" Version="4.1.0" />
9+
<PackageVersion Include="Azure.AI.OpenAI" Version="2.1.0" />
10+
<PackageVersion Include="Azure.AI.Vision.ImageAnalysis" Version="1.0.0" />
11+
<PackageVersion Include="Azure.Identity" Version="1.12.0" />
12+
<PackageVersion Include="coverlet.collector" Version="6.0.4" />
13+
<PackageVersion Include="DocumentFormat.OpenXml" Version="3.3.0" />
14+
<PackageVersion Include="DotNet.ReproducibleBuilds" Version="1.2.25" />
15+
<PackageVersion Include="Google.Cloud.DocumentAI.V1" Version="3.21.0" />
16+
<PackageVersion Include="Google.Cloud.Speech.V1" Version="3.8.0" />
17+
<PackageVersion Include="Google.Cloud.Vision.V1" Version="3.7.0" />
18+
<PackageVersion Include="ManagedCode.MimeTypes" Version="1.0.4" />
19+
<PackageVersion Include="Microsoft.Extensions.AI" Version="9.9.1" />
20+
<PackageVersion Include="Microsoft.Extensions.AI.OpenAI" Version="9.9.1-preview.1.25474.6" />
21+
<PackageVersion Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.9" />
22+
<PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
23+
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.14.1" />
24+
<PackageVersion Include="MimeKit" Version="4.14.0" />
25+
<PackageVersion Include="Moq" Version="4.20.72" />
26+
<PackageVersion Include="PdfPig" Version="0.1.11" />
27+
<PackageVersion Include="PDFtoImage" Version="5.1.1" />
28+
<PackageVersion Include="Sep" Version="0.11.1" />
29+
<PackageVersion Include="Shouldly" Version="4.3.0" />
30+
<PackageVersion Include="SkiaSharp" Version="3.119.1" />
31+
<PackageVersion Include="Spectre.Console" Version="0.51.1" />
32+
<PackageVersion Include="System.Text.Encoding.CodePages" Version="9.0.9" />
33+
<PackageVersion Include="System.Text.Json" Version="9.0.9" />
34+
<PackageVersion Include="YoutubeExplode" Version="6.5.5" />
35+
<PackageVersion Include="xunit" Version="2.9.3" />
36+
<PackageVersion Include="xunit.runner.visualstudio" Version="3.1.4" />
37+
</ItemGroup>
38+
</Project>

src/MarkItDown.Cli/MarkItDown.Cli.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
</PropertyGroup>
1616

1717
<ItemGroup>
18-
<PackageReference Include="Spectre.Console" Version="0.51.1" />
18+
<PackageReference Include="Spectre.Console" />
1919
</ItemGroup>
2020

2121
<ItemGroup>

src/MarkItDown/MarkItDown.csproj

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,38 @@
88
</PropertyGroup>
99

1010
<ItemGroup>
11-
<PackageReference Include="AngleSharp" Version="1.3.0" />
12-
<PackageReference Include="AWSSDK.Rekognition" Version="4.0.2.6" />
13-
<PackageReference Include="AWSSDK.S3" Version="4.0.7.7" />
14-
<PackageReference Include="AWSSDK.Textract" Version="4.0.2.6" />
15-
<PackageReference Include="AWSSDK.TranscribeService" Version="4.0.3.9" />
16-
<PackageReference Include="Google.Cloud.DocumentAI.V1" Version="3.21.0" />
17-
<PackageReference Include="Google.Cloud.Speech.V1" Version="3.8.0" />
18-
<PackageReference Include="Google.Cloud.Vision.V1" Version="3.7.0" />
19-
<PackageReference Include="ManagedCode.MimeTypes" Version="1.0.4" />
20-
<PackageReference Include="Sep" Version="0.11.1" />
21-
<PackageReference Include="Microsoft.Extensions.AI" Version="9.9.1" />
22-
<PackageReference Include="Azure.AI.FormRecognizer" Version="4.1.0" />
23-
<PackageReference Include="Azure.AI.Vision.ImageAnalysis" Version="1.0.0" />
24-
<PackageReference Include="Azure.Identity" Version="1.12.0" />
25-
26-
<PackageReference Include="System.Text.Json" Version="9.0.9" />
27-
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.9" />
28-
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.9" />
29-
<PackageReference Include="System.Text.Encoding.CodePages" Version="9.0.9" />
11+
<PackageReference Include="AngleSharp" />
12+
<PackageReference Include="AWSSDK.Rekognition" />
13+
<PackageReference Include="AWSSDK.S3" />
14+
<PackageReference Include="AWSSDK.Textract" />
15+
<PackageReference Include="AWSSDK.TranscribeService" />
16+
<PackageReference Include="Google.Cloud.DocumentAI.V1" />
17+
<PackageReference Include="Google.Cloud.Speech.V1" />
18+
<PackageReference Include="Google.Cloud.Vision.V1" />
19+
<PackageReference Include="ManagedCode.MimeTypes" />
20+
<PackageReference Include="Sep" />
21+
<PackageReference Include="Microsoft.Extensions.AI" />
22+
<PackageReference Include="Azure.AI.FormRecognizer" />
23+
<PackageReference Include="Azure.AI.Vision.ImageAnalysis" />
24+
<PackageReference Include="Azure.Identity" />
25+
<PackageReference Include="System.Text.Json" />
26+
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
27+
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" />
28+
<PackageReference Include="System.Text.Encoding.CodePages" />
3029

3130
<!-- PDF Support -->
32-
<PackageReference Include="PdfPig" Version="0.1.11" />
33-
<PackageReference Include="PDFtoImage" Version="5.1.1" />
31+
<PackageReference Include="PdfPig" />
32+
<PackageReference Include="PDFtoImage" />
3433

3534
<!-- Office Documents Support -->
36-
<PackageReference Include="DocumentFormat.OpenXml" Version="3.3.0" />
35+
<PackageReference Include="DocumentFormat.OpenXml" />
3736

3837
<!-- Image processing support -->
39-
<PackageReference Include="SkiaSharp" Version="3.119.1" />
38+
<PackageReference Include="SkiaSharp" />
4039

4140
<!-- Email support -->
42-
<PackageReference Include="MimeKit" Version="4.14.0" />
43-
<PackageReference Include="YoutubeExplode" Version="6.5.5" />
41+
<PackageReference Include="MimeKit" />
42+
<PackageReference Include="YoutubeExplode" />
4443
</ItemGroup>
4544

4645
<ItemGroup>

tests/MarkItDown.Cli.Tests/MarkItDown.Cli.Tests.csproj

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
</PropertyGroup>
99

1010
<ItemGroup>
11-
<PackageReference Include="coverlet.collector" Version="6.0.2" />
12-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
13-
<PackageReference Include="Shouldly" Version="4.3.0" />
14-
<PackageReference Include="xunit" Version="2.9.2" />
15-
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2" />
11+
<PackageReference Include="coverlet.collector" />
12+
<PackageReference Include="Microsoft.NET.Test.Sdk" />
13+
<PackageReference Include="Shouldly" />
14+
<PackageReference Include="xunit" />
15+
<PackageReference Include="xunit.runner.visualstudio" />
1616
</ItemGroup>
1717

1818
<ItemGroup>
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#pragma warning disable MEAI001
2+
using System.Linq;
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Threading;
6+
using System.Threading.Tasks;
7+
using MarkItDown;
8+
using MarkItDown.Intelligence;
9+
using MarkItDown.Conversion.Middleware;
10+
11+
using MarkItDown.Tests.Fixtures;
12+
using Microsoft.Extensions.AI;
13+
using Shouldly;
14+
15+
namespace MarkItDown.Tests.Conversion;
16+
17+
public class AiModelPipelineTests
18+
{
19+
[Fact]
20+
public async Task ImageEnrichment_UsesInjectedChatClient()
21+
{
22+
var chatClient = new RecordingChatClient("SONAR diagram with layered services");
23+
var options = new MarkItDownOptions
24+
{
25+
AiModels = new StaticAiModelProvider(chatClient, null),
26+
EnableAiImageEnrichment = true
27+
};
28+
29+
var client = new MarkItDownClient(options);
30+
31+
await using var stream = DocxInlineImageFactory.Create();
32+
var streamInfo = new StreamInfo(
33+
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
34+
extension: ".docx",
35+
fileName: "inline-images.docx");
36+
37+
var result = await client.ConvertAsync(stream, streamInfo);
38+
39+
chatClient.Requests.Count.ShouldBeGreaterThan(0);
40+
result.Artifacts.Images.ShouldNotBeEmpty();
41+
result.Artifacts.Images[0].DetailedDescription.ShouldBe("SONAR diagram with layered services");
42+
}
43+
44+
[Fact]
45+
public async Task CustomMiddleware_UsesSpeechToTextClient()
46+
{
47+
var speechClient = new RecordingSpeechClient("Mission control acknowledges receipt of telemetry.");
48+
var invoked = false;
49+
var pipeline = new ConversionPipeline(
50+
new IConversionMiddleware[] { new SpeechAnnotationMiddleware(new byte[] { 1, 2, 3 }, () => invoked = true) },
51+
new StaticAiModelProvider(null, speechClient),
52+
logger: null);
53+
54+
var artifacts = new ConversionArtifacts();
55+
var segments = new List<DocumentSegment>();
56+
var streamInfo = new StreamInfo(mimeType: "audio/wav", extension: ".wav", fileName: "sample.wav");
57+
58+
await pipeline.ExecuteAsync(streamInfo, artifacts, segments, CancellationToken.None);
59+
60+
invoked.ShouldBeTrue();
61+
speechClient.InvocationCount.ShouldBe(1);
62+
segments.ShouldContain(segment =>
63+
segment.Type == SegmentType.Audio &&
64+
segment.Markdown.Contains("Mission control acknowledges receipt of telemetry."));
65+
}
66+
67+
private sealed class RecordingChatClient(string responseText) : IChatClient
68+
{
69+
public List<IReadOnlyList<ChatMessage>> Requests { get; } = new();
70+
71+
public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options, CancellationToken cancellationToken = default)
72+
{
73+
var recorded = new List<ChatMessage>(messages);
74+
Requests.Add(recorded);
75+
var reply = new ChatMessage(ChatRole.Assistant, responseText);
76+
return Task.FromResult(new ChatResponse(new List<ChatMessage> { reply }));
77+
}
78+
79+
IAsyncEnumerable<ChatResponseUpdate> IChatClient.GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options, CancellationToken cancellationToken)
80+
{
81+
throw new NotSupportedException();
82+
}
83+
84+
public object? GetService(Type serviceType, object? serviceKey = null) => null;
85+
86+
public void Dispose()
87+
{
88+
}
89+
}
90+
91+
private sealed class RecordingSpeechClient(string transcript) : ISpeechToTextClient
92+
{
93+
public int InvocationCount { get; private set; }
94+
95+
public Task<SpeechToTextResponse> GetTextAsync(System.IO.Stream audio, SpeechToTextOptions? options, CancellationToken cancellationToken = default)
96+
{
97+
InvocationCount++;
98+
return Task.FromResult(new SpeechToTextResponse(transcript));
99+
}
100+
101+
IAsyncEnumerable<SpeechToTextResponseUpdate> ISpeechToTextClient.GetStreamingTextAsync(System.IO.Stream audio, SpeechToTextOptions? options, CancellationToken cancellationToken)
102+
{
103+
throw new NotSupportedException();
104+
}
105+
106+
public object? GetService(Type serviceType, object? serviceKey = null) => null;
107+
108+
public void Dispose()
109+
{
110+
}
111+
}
112+
113+
private sealed class SpeechAnnotationMiddleware(byte[] audioBytes, Action onInvoke) : IConversionMiddleware
114+
{
115+
public async Task InvokeAsync(ConversionPipelineContext context, CancellationToken cancellationToken)
116+
{
117+
onInvoke();
118+
var speechClient = context.AiModels.SpeechToTextClient;
119+
if (speechClient is null || audioBytes.Length == 0)
120+
{
121+
return;
122+
}
123+
124+
await using var buffer = new System.IO.MemoryStream(audioBytes, writable: false);
125+
var response = await speechClient.GetTextAsync(buffer, new SpeechToTextOptions
126+
{
127+
ModelId = "gpt-4o-transcribe",
128+
SpeechLanguage = "en-US"
129+
}, cancellationToken);
130+
131+
if (string.IsNullOrWhiteSpace(response.Text))
132+
{
133+
return;
134+
}
135+
136+
context.Segments.Add(new DocumentSegment(
137+
markdown: response.Text!,
138+
type: SegmentType.Audio,
139+
label: "AI Speech Transcript"));
140+
}
141+
}
142+
}
143+
#pragma warning restore MEAI001

tests/MarkItDown.Tests/MarkItDown.Tests.csproj

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,20 @@
55
</PropertyGroup>
66

77
<ItemGroup>
8-
<PackageReference Include="coverlet.collector" Version="6.0.4">
8+
<PackageReference Include="coverlet.collector">
99
<PrivateAssets>all</PrivateAssets>
1010
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
1111
</PackageReference>
12-
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.14.1" />
13-
<PackageReference Include="Moq" Version="4.20.72" />
14-
<PackageReference Include="xunit" Version="2.9.3" />
15-
<PackageReference Include="xunit.runner.visualstudio" Version="3.1.4">
12+
<PackageReference Include="Azure.AI.OpenAI" />
13+
<PackageReference Include="Microsoft.NET.Test.Sdk" />
14+
<PackageReference Include="Microsoft.Extensions.AI.OpenAI" />
15+
<PackageReference Include="Moq" />
16+
<PackageReference Include="xunit" />
17+
<PackageReference Include="xunit.runner.visualstudio">
1618
<PrivateAssets>all</PrivateAssets>
1719
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
1820
</PackageReference>
19-
<PackageReference Include="Shouldly" Version="4.3.0" />
21+
<PackageReference Include="Shouldly" />
2022
</ItemGroup>
2123

2224
<ItemGroup>

0 commit comments

Comments
 (0)