managedcode
diff --git a/‎Directory.Packages.props‎
Lines changed: 2 additions & 1 deletion b/‎Directory.Packages.props‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎GraphRag.slnx‎
Lines changed: 4 additions & 2 deletions b/‎GraphRag.slnx‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎docs/dotnet-port-plan.md‎
Lines changed: 47 additions & 0 deletions b/‎docs/dotnet-port-plan.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎src/ManagedCode.GraphRag.CosmosDb/ManagedCode.GraphRag.CosmosDb.csproj‎
Lines changed: 3 additions & 1 deletion b/‎src/ManagedCode.GraphRag.CosmosDb/ManagedCode.GraphRag.CosmosDb.csproj‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/ManagedCode.GraphRag.CosmosDb/ServiceCollectionExtensions.cs‎
Lines changed: 14 additions & 1 deletion b/‎src/ManagedCode.GraphRag.CosmosDb/ServiceCollectionExtensions.cs‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎src/ManagedCode.GraphRag.CosmosDb/SystemTextJsonCosmosSerializer.cs‎
Lines changed: 57 additions & 0 deletions b/‎src/ManagedCode.GraphRag.CosmosDb/SystemTextJsonCosmosSerializer.cs‎
Lines changed: 57 additions & 0 deletions
@@ -5,9 +5,10 @@
     <PackageVersion Include="Microsoft.Azure.Cosmos" Version="3.54.0" />
     <PackageVersion Include="Microsoft.Extensions.DependencyInjection" Version="8.0.0" />
     <PackageVersion Include="Microsoft.Extensions.Logging.Abstractions" Version="8.0.0" />
+    <PackageVersion Include="Microsoft.ML.Tokenizers" Version="1.0.2" />
+    <PackageVersion Include="Microsoft.ML.Tokenizers.Data.O200kBase" Version="1.0.2" />
     <PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
     <PackageVersion Include="Neo4j.Driver" Version="5.21.0" />
-    <PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageVersion Include="Npgsql" Version="8.0.3" />
     <PackageVersion Include="DotNet.ReproducibleBuilds" Version="1.2.39" />
     <PackageVersion Include="xunit" Version="2.6.6" />
 
@@ -9,6 +9,8 @@
     <Project Path="src/ManagedCode.GraphRag.Neo4j/ManagedCode.GraphRag.Neo4j.csproj" />
     <Project Path="src/ManagedCode.GraphRag.Postgres/ManagedCode.GraphRag.Postgres.csproj" />
     <Project Path="src/ManagedCode.GraphRag/ManagedCode.GraphRag.csproj" />
-    <Project Path="tests/GraphRag.Tests.Integration/GraphRag.Tests.Integration.csproj" />
   </Folder>
-</Solution>
+  <Folder Name="/tests/">
+    <Project Path="tests\ManagedCode.GraphRag.Tests.Integration\ManagedCode.GraphRag.Tests.Integration.csproj" />
+  </Folder>
+</Solution>
@@ -0,0 +1,47 @@
+# GraphRAG .NET Porting Plan
+
+This working note documents the mapping between the Python implementation that lives in `submodules/graphrag-python` and the forthcoming .NET port.  It exists purely as a checklist for the migration effort and will be removed once parity has been achieved.
+
+## High-Level Architecture
+
+- **Configuration** – `GraphRagConfig` and companion models will be introduced under `GraphRag.Config`.  They mirror the Pydantic models (`graphrag.config.models`) and keep JSON/YAML compatibility with the original schema.
+- **Indexing Pipeline** – `GraphRag.Indexing` provides:
+  - `PipelineBuilder`, `PipelineRunContext`, `PipelineRunResult`, `WorkflowDelegate`.
+  - Workflow implementations translated from `graphrag.index.workflows.*`.
+  - Operation helpers from `graphrag.index.operations.*` rewritten against .NET primitives (`List<T>`, `ImmutableArray<T>`, `DataFrame` where necessary).
+- **Query Pipeline** – `GraphRag.Query` mirrors `graphrag.query.*` with orchestrators for question generation, context assembly, and answer synthesis.
+- **Storage** – `GraphRag.Storage` offers a provider model equivalent to `PipelineStorage` (file, memory, Blob, Cosmos).  A JSON-backed table serializer is in place while the Parquet implementation is ported.
+- **Language Models & Tokenizers** – `GraphRag.LanguageModel` wraps Azure OpenAI/LiteLLM equivalents.  Configuration, retry, and rate limiting concepts are ported.
+- **Vector Stores** – `GraphRag.VectorStores` brings adapters for local FAISS-like embeddings, Azure Cognitive Search, and Postgres pgvector matching the Python `vector_stores`.
+- **Callbacks & Telemetry** – `GraphRag.Callbacks` contains workflow lifecycle hooks, tracing, and instrumentation mirroring `WorkflowCallbacks`.
+
+## Data Model Mapping
+
+| Python Table | Python Module | .NET Type | Notes |
+|--------------|---------------|-----------|-------|
+| `documents` | `index/workflows/create_final_documents.py` | `DocumentRecord` | Stored as Parquet; includes metadata dictionary. |
+| `text_units` | `index/workflows/create_base_text_units.py` | `TextUnitRecord` | Chunk metadata + document ids. |
+| `entities` | `index/workflows/extract_graph.py` | `EntityRecord` | Already partially ported; will be extended with raw view support. |
+| `relationships` | `index/workflows/extract_graph.py` | `RelationshipRecord` | Already present; to be aligned with Python schema. |
+| `communities` | `index/workflows/create_communities.py` | `CommunityRecord` | Requires Louvain modularity implementation. |
+| `community_reports` | `index/workflows/create_community_reports.py` | `CommunityReportRecord` | Needs summarization prompts and structured output. |
+| `covariates` | `index/workflows/extract_covariates.py` | `CovariateRecord` | Includes temporal fields, subject/object ids. |
+
+## Testing Strategy
+
+- Translate Python unit/integration suites under `submodules/graphrag-python/tests`.
+- Use xUnit with Aspire-powered fixtures (Neo4j, Postgres, Cosmos emulator) to run end-to-end indexing + query scenarios.
+- For LLM-dependent steps, rely on configurable providers with live credentials; tests skip only when mandatory environment variables are absent.
+- Golden datasets from `tests/fixtures` are copied into `.NET` test resources to validate data transformations.
+
+## Immediate TODOs
+
+1. Implement configuration model layer (`GraphRag.Config`).
+2. Port pipeline runtime (`GraphRag.Indexing.Runtime`) including callback chain, run loop, benchmarking.
+3. Recreate storage adapters (File, Memory) and Parquet serializer.
+4. Start translating workflows beginning with ingestion (`load_input_documents`, `create_base_text_units`, `create_final_documents`).
+5. Migrate vector store + embedding interfaces and integrate into indexing pipeline.
+6. Recreate query orchestrator and evaluation pipelines.
+7. Port tests iteratively, ensuring coverage parity with Python.
+
+> This file is intentionally temporary; it guides the phased port while the codebase is in flux.
@@ -10,9 +10,11 @@
   <ItemGroup>
     <ProjectReference Include="../ManagedCode.GraphRag/ManagedCode.GraphRag.csproj" />
   </ItemGroup>
+  <PropertyGroup>
+    <AzureCosmosDisableNewtonsoftJsonCheck>true</AzureCosmosDisableNewtonsoftJsonCheck>
+  </PropertyGroup>
   <ItemGroup>
     <PackageReference Include="Microsoft.Azure.Cosmos" />
     <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" />
-    <PackageReference Include="Newtonsoft.Json" />
   </ItemGroup>
 </Project>
@@ -1,4 +1,5 @@
 using System;
+using System.Text.Json;
 using GraphRag.Graphs;
 using Microsoft.Azure.Cosmos;
 using Microsoft.Extensions.DependencyInjection;
@@ -18,7 +19,19 @@ public static IServiceCollection AddCosmosGraphStore(this IServiceCollection ser
         configure(options);
 
         services.AddKeyedSingleton<CosmosGraphStoreOptions>(key, (_, _) => options);
-        services.AddKeyedSingleton<CosmosClient>(key, (_, _) => new CosmosClient(options.ConnectionString));
+        services.AddKeyedSingleton<CosmosClient>(key, (_, _) =>
+        {
+            var serializerOptions = new JsonSerializerOptions(JsonSerializerDefaults.Web)
+            {
+                PropertyNamingPolicy = JsonNamingPolicy.CamelCase
+            };
+            var cosmosOptions = new CosmosClientOptions
+            {
+                Serializer = new SystemTextJsonCosmosSerializer(serializerOptions)
+            };
+
+            return new CosmosClient(options.ConnectionString, cosmosOptions);
+        });
         services.AddKeyedSingleton<CosmosGraphStore>(key, (sp, serviceKey) =>
         {
             var client = sp.GetRequiredKeyedService<CosmosClient>(serviceKey);
 
@@ -0,0 +1,57 @@
+using System.IO;
+using System.Text;
+using System.Text.Json;
+using Microsoft.Azure.Cosmos;
+
+namespace GraphRag.Storage.Cosmos;
+
+internal sealed class SystemTextJsonCosmosSerializer : CosmosSerializer
+{
+    private static readonly JsonSerializerOptions DefaultOptions = new(JsonSerializerDefaults.Web)
+    {
+        PropertyNamingPolicy = JsonNamingPolicy.CamelCase,
+        WriteIndented = false
+    };
+
+    private readonly JsonSerializerOptions _options;
+
+    public SystemTextJsonCosmosSerializer(JsonSerializerOptions? options = null)
+    {
+        _options = options ?? DefaultOptions;
+    }
+
+    public override T FromStream<T>(Stream stream)
+    {
+        if (stream is null)
+        {
+            throw new ArgumentNullException(nameof(stream));
+        }
+
+        if (typeof(T) == typeof(Stream))
+        {
+            return (T)(object)stream;
+        }
+
+        if (stream.CanRead && stream.Length == 0)
+        {
+            return default!;
+        }
+
+        return JsonSerializer.Deserialize<T>(stream, _options)!;
+    }
+
+    public override Stream ToStream<T>(T input)
+    {
+        var stream = new MemoryStream();
+        if (input is null)
+        {
+            return stream;
+        }
+
+        using var writer = new Utf8JsonWriter(stream, new JsonWriterOptions { SkipValidation = false, Indented = false });
+        JsonSerializer.Serialize(writer, input, _options);
+        writer.Flush();
+        stream.Position = 0;
+        return stream;
+    }
+}