Skip to content

Commit 68a20b5

Browse files
NainetenNaineten
authored andcommitted
feat(perf): optimize OFPA decoding with batch git processing and caching
Major performance improvement for Unreal Engine projects with many OFPA files. Changes: - Implement QueryFileContent.RunBatchAsync using git cat-file --batch to reduce process creation overhead for Index/HEAD file reads. - Refactor WorkingCopy.DecodeOFPAPathsAsync: - Use batch processing for missing working tree files. - Implement sequential task execution to prevent race conditions. - Add stat-based caching (size/mtime) to skip re-decoding unchanged files. - Cache IsUnrealEngineProject check in Repository ViewModel to avoid repeated I/O. - Add unit tests for OFPAParser case sensitivity and extension checks. - Add PerformanceLogger for internal profiling of decode operations.
1 parent 2095f17 commit 68a20b5

8 files changed

Lines changed: 463 additions & 100 deletions

File tree

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,11 @@ build/*.AppImage
3939
SourceGit.app/
4040
build.command
4141
src/Properties/launchSettings.json
42+
43+
# Temporary and tool-specific files
44+
.claude/
45+
error_showcase.png
46+
publish/
47+
nul
48+
progress.md
49+
build_release.bat

src/Commands/QueryFileContent.cs

Lines changed: 236 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,44 @@
1-
using System;
1+
using System;
2+
using System.Collections.Generic;
23
using System.Diagnostics;
34
using System.IO;
5+
using System.Text;
46
using System.Threading.Tasks;
57

8+
#nullable enable
9+
610
namespace SourceGit.Commands
711
{
812
public static class QueryFileContent
913
{
10-
public static async Task<Stream> RunAsync(string repo, string revision, string file)
14+
public static Task<Stream> RunIndexAsync(string repo, string file)
1115
{
12-
var starter = new ProcessStartInfo();
13-
starter.WorkingDirectory = repo;
14-
starter.FileName = Native.OS.GitExecutable;
15-
starter.Arguments = $"show {revision}:{file.Quoted()}";
16-
starter.UseShellExecute = false;
17-
starter.CreateNoWindow = true;
18-
starter.WindowStyle = ProcessWindowStyle.Hidden;
19-
starter.RedirectStandardOutput = true;
16+
// Read from index (staged content).
17+
return RunObjectSpecAsync(repo, $":{file.Quoted()}");
18+
}
19+
20+
public static Task<Stream> RunAsync(string repo, string revision, string file)
21+
{
22+
// Read from a specific revision.
23+
return RunObjectSpecAsync(repo, $"{revision}:{file.Quoted()}");
24+
}
25+
26+
private static async Task<Stream> RunObjectSpecAsync(string repo, string objectSpec)
27+
{
28+
// Shared git show runner for both index and revision reads.
29+
var starter = new ProcessStartInfo
30+
{
31+
WorkingDirectory = repo,
32+
FileName = Native.OS.GitExecutable,
33+
Arguments = $"show {objectSpec}",
34+
UseShellExecute = false,
35+
CreateNoWindow = true,
36+
WindowStyle = ProcessWindowStyle.Hidden,
37+
RedirectStandardOutput = true,
38+
};
2039

2140
var stream = new MemoryStream();
41+
var sw = Stopwatch.StartNew();
2242
try
2343
{
2444
using var proc = Process.Start(starter)!;
@@ -29,11 +49,217 @@ public static async Task<Stream> RunAsync(string repo, string revision, string f
2949
{
3050
App.RaiseException(repo, $"Failed to query file content: {e}");
3151
}
52+
sw.Stop();
53+
Utilities.PerformanceLogger.Log($"[GitShow] {objectSpec} : {sw.ElapsedMilliseconds}ms");
3254

3355
stream.Position = 0;
3456
return stream;
3557
}
3658

59+
// Batch read file contents using git cat-file --batch.
60+
// maxBytesPerObject: if > 0, read only first N bytes of each object (for performance).
61+
public static async Task<Dictionary<string, byte[]>> RunBatchAsync(string repo, IReadOnlyList<string> objectSpecs, int maxBytesPerObject = 0)
62+
{
63+
var results = new Dictionary<string, byte[]>(StringComparer.Ordinal);
64+
if (objectSpecs == null || objectSpecs.Count == 0)
65+
return results;
66+
67+
var starter = new ProcessStartInfo
68+
{
69+
WorkingDirectory = repo,
70+
FileName = Native.OS.GitExecutable,
71+
Arguments = "cat-file --batch",
72+
UseShellExecute = false,
73+
CreateNoWindow = true,
74+
WindowStyle = ProcessWindowStyle.Hidden,
75+
RedirectStandardInput = true,
76+
RedirectStandardOutput = true,
77+
};
78+
79+
var swTotal = Stopwatch.StartNew();
80+
var swStart = Stopwatch.StartNew();
81+
long startMs = 0, firstReadMs = 0, dataReadMs = 0, waitExitMs = 0;
82+
int missingCount = 0;
83+
long totalBytesRead = 0;
84+
long totalBytesInObjects = 0;
85+
int minSize = int.MaxValue, maxSize = 0;
86+
87+
try
88+
{
89+
using var proc = Process.Start(starter)!;
90+
swStart.Stop();
91+
startMs = swStart.ElapsedMilliseconds;
92+
93+
// Write requests in background to avoid deadlock (pipe buffer full)
94+
var writeTask = Task.Run(async () =>
95+
{
96+
await using var input = proc.StandardInput;
97+
foreach (var spec in objectSpecs)
98+
{
99+
await input.WriteLineAsync(spec).ConfigureAwait(false);
100+
}
101+
});
102+
103+
await using var output = proc.StandardOutput.BaseStream;
104+
105+
var swFirstRead = Stopwatch.StartNew();
106+
var swDataRead = new Stopwatch();
107+
bool firstReadDone = false;
108+
109+
for (int i = 0; i < objectSpecs.Count; i++)
110+
{
111+
var header = await ReadBatchHeaderLineAsync(output).ConfigureAwait(false);
112+
113+
if (!firstReadDone)
114+
{
115+
swFirstRead.Stop();
116+
firstReadMs = swFirstRead.ElapsedMilliseconds;
117+
firstReadDone = true;
118+
}
119+
120+
if (header == null)
121+
break;
122+
123+
if (header.EndsWith(" missing", StringComparison.Ordinal))
124+
{
125+
missingCount++;
126+
continue;
127+
}
128+
129+
var size = ParseBatchObjectSize(header);
130+
if (size > 0)
131+
{
132+
totalBytesInObjects += size;
133+
if (size < minSize) minSize = size;
134+
if (size > maxSize) maxSize = size;
135+
136+
// If maxBytesPerObject is set, read only that many bytes and skip the rest.
137+
var bytesToRead = (maxBytesPerObject > 0 && size > maxBytesPerObject)
138+
? maxBytesPerObject
139+
: size;
140+
var bytesToSkip = size - bytesToRead;
141+
142+
swDataRead.Start();
143+
var data = await ReadExactBytesAsync(output, bytesToRead).ConfigureAwait(false);
144+
swDataRead.Stop();
145+
146+
if (data != null)
147+
{
148+
results[objectSpecs[i]] = data;
149+
totalBytesRead += data.Length;
150+
}
151+
152+
// Skip remaining bytes if we limited the read.
153+
if (bytesToSkip > 0)
154+
{
155+
swDataRead.Start();
156+
await SkipBytesAsync(output, bytesToSkip).ConfigureAwait(false);
157+
swDataRead.Stop();
158+
}
159+
}
160+
161+
// Consume trailing newline after object content (even for size 0).
162+
_ = await ReadSingleByteAsync(output).ConfigureAwait(false);
163+
}
164+
165+
dataReadMs = swDataRead.ElapsedMilliseconds;
166+
167+
// Ensure writing is finished (should be, or implies error)
168+
await writeTask.ConfigureAwait(false);
169+
170+
var swWait = Stopwatch.StartNew();
171+
await proc.WaitForExitAsync().ConfigureAwait(false);
172+
swWait.Stop();
173+
waitExitMs = swWait.ElapsedMilliseconds;
174+
}
175+
catch (Exception e)
176+
{
177+
App.RaiseException(repo, $"Failed to query batch file content: {e}");
178+
}
179+
180+
swTotal.Stop();
181+
var avgSize = results.Count > 0 ? totalBytesInObjects / results.Count : 0;
182+
Utilities.PerformanceLogger.Log(
183+
$"[GitBatch] {objectSpecs.Count} specs, {results.Count} found, {missingCount} missing | " +
184+
$"Data:{totalBytesRead / 1024}KB (min:{minSize / 1024}KB avg:{avgSize / 1024}KB max:{maxSize / 1024}KB) | " +
185+
$"Start:{startMs}ms FirstRead:{firstReadMs}ms DataRead:{dataReadMs}ms Exit:{waitExitMs}ms Total:{swTotal.ElapsedMilliseconds}ms");
186+
187+
return results;
188+
}
189+
190+
private static int ParseBatchObjectSize(string header)
191+
{
192+
// Header format: "<sha1> <type> <size>" or "<spec> missing"
193+
var lastSpace = header.LastIndexOf(' ');
194+
if (lastSpace <= 0 || lastSpace == header.Length - 1)
195+
return 0;
196+
197+
if (int.TryParse(header.AsSpan(lastSpace + 1), out var size))
198+
return size;
199+
200+
return 0;
201+
}
202+
203+
private static async Task<string?> ReadBatchHeaderLineAsync(Stream stream)
204+
{
205+
var buffer = new MemoryStream();
206+
while (true)
207+
{
208+
int value = await ReadSingleByteAsync(stream).ConfigureAwait(false);
209+
if (value == -1)
210+
break;
211+
212+
if (value == '\n')
213+
break;
214+
215+
buffer.WriteByte((byte)value);
216+
}
217+
218+
if (buffer.Length == 0)
219+
return null;
220+
221+
var line = Encoding.ASCII.GetString(buffer.ToArray());
222+
return line.EndsWith('\r') ? line[..^1] : line;
223+
}
224+
225+
private static async Task<byte[]?> ReadExactBytesAsync(Stream stream, int length)
226+
{
227+
var buffer = new byte[length];
228+
var totalRead = 0;
229+
while (totalRead < length)
230+
{
231+
var read = await stream.ReadAsync(buffer.AsMemory(totalRead, length - totalRead)).ConfigureAwait(false);
232+
if (read <= 0)
233+
return null;
234+
235+
totalRead += read;
236+
}
237+
238+
return buffer;
239+
}
240+
241+
private static async Task SkipBytesAsync(Stream stream, int length)
242+
{
243+
// Use a small buffer to skip bytes efficiently.
244+
var buffer = new byte[Math.Min(length, 8192)];
245+
var remaining = length;
246+
while (remaining > 0)
247+
{
248+
var toRead = Math.Min(remaining, buffer.Length);
249+
var read = await stream.ReadAsync(buffer.AsMemory(0, toRead)).ConfigureAwait(false);
250+
if (read <= 0)
251+
break;
252+
remaining -= read;
253+
}
254+
}
255+
256+
private static async Task<int> ReadSingleByteAsync(Stream stream)
257+
{
258+
var buffer = new byte[1];
259+
var read = await stream.ReadAsync(buffer.AsMemory(0, 1)).ConfigureAwait(false);
260+
return read == 0 ? -1 : buffer[0];
261+
}
262+
37263
public static async Task<Stream> FromLFSAsync(string repo, string oid, long size)
38264
{
39265
var starter = new ProcessStartInfo();

src/Utilities/OFPAParser.cs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ namespace SourceGit.Utilities
2020
/// Compatibility: UE 4.26 - 5.7+
2121
/// Performance: ~0.1 ms/file
2222
/// </summary>
23-
/// <summary>
24-
/// Decodes human-readable names from Unreal Engine OFPA (One File Per Actor) .uasset files.
25-
/// These files have hashed names like "KCBX0GWLTFQT9RJ8M1LY8.uasset" in __ExternalActors__ folders.
26-
/// </summary>
2723
public static class OFPAParser
2824
{
2925
// Unreal Engine asset magic number (little-endian: 0x9E2A83C1)

src/Utilities/PerformanceLogger.cs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
using System;
2+
using System.IO;
3+
4+
namespace SourceGit.Utilities
5+
{
6+
public static class PerformanceLogger
7+
{
8+
private static readonly string LogFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "ofpa_perf.log");
9+
private static readonly object _lock = new object();
10+
11+
public static void Log(string message)
12+
{
13+
lock (_lock)
14+
{
15+
try
16+
{
17+
File.AppendAllText(LogFile, message + Environment.NewLine);
18+
}
19+
catch { }
20+
}
21+
}
22+
}
23+
}

src/ViewModels/Repository.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
using System;
22
using System.Collections.Generic;
33
using System.IO;
4+
using System.Linq;
45
using System.Text;
56
using System.Text.Json;
67
using System.Threading;
@@ -57,6 +58,12 @@ public bool HasAllowedSignersFile
5758
get => _hasAllowedSignersFile;
5859
}
5960

61+
public bool IsUnrealEngineProject
62+
{
63+
get;
64+
private set;
65+
}
66+
6067
public int SelectedViewIndex
6168
{
6269
get => _selectedViewIndex;
@@ -508,6 +515,9 @@ public void Open()
508515
_selectedViewIndex = 0;
509516
}
510517

518+
IsUnrealEngineProject = Directory.EnumerateFiles(FullPath, "*.uproject").Any() ||
519+
Directory.EnumerateFiles(FullPath, "*.uplugin").Any();
520+
511521
_lastFetchTime = DateTime.Now;
512522
_autoFetchTimer = new Timer(AutoFetchByTimer, null, 5000, 5000);
513523
RefreshAll();

0 commit comments

Comments
 (0)