Skip to content

Commit 02aa3b8

Browse files
committed
Add pluggable comparison engine system with Docxodus support
This commit introduces a pluggable architecture for document comparison engines, allowing users to choose between Open-Xml-PowerTools and Docxodus backends. Key changes: - Add abstract ComparisonEngine base class defining the engine interface - Refactor XmlPowerToolsEngine to inherit from ComparisonEngine - Add DocxodusEngine for the modern Docxodus backend (.NET 8.0) - Create EngineRegistry for dynamic engine discovery and selection - Add get_engine(), list_engines(), list_available_engines() functions - Create Docxodus C# CLI project using Docxodus NuGet package - Update build system to support building both engines - Add comprehensive test suite (79 tests) covering: - Base class interface contracts - Engine implementations with mocking - Registry functionality - Package imports and API - Backward compatibility with run_redline() method Breaking changes: None - maintains full backward compatibility Usage: from python_redlines import get_engine engine = get_engine() # default: openxml-powertools engine = get_engine('docxodus') # use Docxodus redline, _, _ = engine.compare(author, original, modified)
1 parent 254e742 commit 02aa3b8

17 files changed

Lines changed: 2232 additions & 126 deletions

build_differ.py

Lines changed: 145 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,41 @@
1+
"""
2+
Build script for compiling comparison engine binaries.
3+
4+
This script builds self-contained .NET executables for multiple platforms
5+
for both the Open-Xml-PowerTools and Docxodus comparison engines.
6+
"""
7+
18
import subprocess
29
import os
10+
import sys
311
import tarfile
412
import zipfile
13+
import argparse
14+
15+
16+
# Engine configurations
17+
ENGINES = {
18+
'openxml-powertools': {
19+
'csproj_path': './csproj',
20+
'binary_name': 'redlines',
21+
'dist_subdir': 'openxml-powertools',
22+
},
23+
'docxodus': {
24+
'csproj_path': './csproj-docxodus',
25+
'binary_name': 'redline',
26+
'dist_subdir': 'docxodus',
27+
},
28+
}
29+
30+
# Platform configurations
31+
PLATFORMS = [
32+
{'rid': 'linux-x64', 'archive_ext': '.tar.gz'},
33+
{'rid': 'linux-arm64', 'archive_ext': '.tar.gz'},
34+
{'rid': 'win-x64', 'archive_ext': '.zip'},
35+
{'rid': 'win-arm64', 'archive_ext': '.zip'},
36+
{'rid': 'osx-x64', 'archive_ext': '.tar.gz'},
37+
{'rid': 'osx-arm64', 'archive_ext': '.tar.gz'},
38+
]
539

640

741
def get_version():
@@ -14,95 +48,160 @@ def get_version():
1448
return about['__version__']
1549

1650

17-
def run_command(command):
51+
def run_command(command, check=True):
1852
"""
1953
Runs a shell command and prints its output.
54+
Returns True if successful, False otherwise.
2055
"""
21-
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
56+
print(f"Running: {command}")
57+
process = subprocess.Popen(
58+
command,
59+
shell=True,
60+
stdout=subprocess.PIPE,
61+
stderr=subprocess.STDOUT
62+
)
63+
output_lines = []
2264
for line in process.stdout:
23-
print(line.decode().strip())
65+
decoded = line.decode().strip()
66+
print(decoded)
67+
output_lines.append(decoded)
68+
69+
process.wait()
70+
if check and process.returncode != 0:
71+
print(f"Command failed with return code {process.returncode}")
72+
return False
73+
return True
2474

2575

2676
def compress_files(source_dir, target_file):
2777
"""
2878
Compresses files in the specified directory into a tar.gz or zip file.
2979
"""
80+
print(f"Compressing {source_dir} to {target_file}")
3081
if target_file.endswith('.tar.gz'):
3182
with tarfile.open(target_file, "w:gz") as tar:
3283
tar.add(source_dir, arcname=os.path.basename(source_dir))
3384
elif target_file.endswith('.zip'):
3485
with zipfile.ZipFile(target_file, 'w', zipfile.ZIP_DEFLATED) as zipf:
3586
for root, dirs, files in os.walk(source_dir):
3687
for file in files:
37-
zipf.write(os.path.join(root, file),
38-
os.path.relpath(os.path.join(root, file),
39-
os.path.join(source_dir, '..')))
88+
file_path = os.path.join(root, file)
89+
arcname = os.path.relpath(file_path, os.path.dirname(source_dir))
90+
zipf.write(file_path, arcname)
4091

4192

4293
def cleanup_old_builds(dist_dir, current_version):
4394
"""
4495
Deletes any build files ending in .zip or .tar.gz in the dist_dir with a different version tag.
4596
"""
97+
if not os.path.exists(dist_dir):
98+
return
99+
46100
for file in os.listdir(dist_dir):
47-
if not file.endswith((f'{current_version}.zip', f'{current_version}.tar.gz', '.gitignore')):
101+
if file.endswith(('.zip', '.tar.gz')) and current_version not in file:
48102
file_path = os.path.join(dist_dir, file)
49103
os.remove(file_path)
50104
print(f"Deleted old build file: {file}")
51105

52106

53-
def main():
54-
version = get_version()
55-
print(f"Version: {version}")
56-
57-
dist_dir = "./src/python_redlines/dist/"
107+
def build_engine(engine_name, engine_config, version, platforms=None):
108+
"""
109+
Build binaries for a specific engine.
58110
59-
# Build for Linux x64
60-
print("Building for Linux x64...")
61-
run_command('dotnet publish ./csproj -c Release -r linux-x64 --self-contained')
111+
Args:
112+
engine_name: Name of the engine
113+
engine_config: Configuration dict for the engine
114+
version: Version string
115+
platforms: Optional list of platforms to build (default: all)
116+
"""
117+
csproj_path = engine_config['csproj_path']
118+
binary_name = engine_config['binary_name']
119+
dist_subdir = engine_config['dist_subdir']
62120

63-
# Build for Linux ARM64
64-
print("Building for Linux ARM64...")
65-
run_command('dotnet publish ./csproj -c Release -r linux-arm64 --self-contained')
121+
dist_dir = f"./src/python_redlines/dist/{dist_subdir}/"
66122

67-
# Build for Windows x64
68-
print("Building for Windows x64...")
69-
run_command('dotnet publish ./csproj -c Release -r win-x64 --self-contained')
123+
# Ensure dist directory exists
124+
os.makedirs(dist_dir, exist_ok=True)
70125

71-
# Build for Windows ARM64
72-
print("Building for Windows ARM64...")
73-
run_command('dotnet publish ./csproj -c Release -r win-arm64 --self-contained')
126+
platforms_to_build = platforms or PLATFORMS
74127

75-
# Build for macOS x64
76-
print("Building for macOS x64...")
77-
run_command('dotnet publish ./csproj -c Release -r osx-x64 --self-contained')
128+
print(f"\n{'='*60}")
129+
print(f"Building {engine_name} engine")
130+
print(f"{'='*60}\n")
78131

79-
# Build for macOS ARM64
80-
print("Building for macOS ARM64...")
81-
run_command('dotnet publish ./csproj -c Release -r osx-arm64 --self-contained')
132+
for platform_config in platforms_to_build:
133+
rid = platform_config['rid']
134+
archive_ext = platform_config['archive_ext']
82135

83-
# Compress the Linux x64 build
84-
linux_x64_build_dir = './csproj/bin/Release/net8.0/linux-x64'
85-
compress_files(linux_x64_build_dir, f"{dist_dir}/linux-x64-{version}.tar.gz")
136+
print(f"\nBuilding for {rid}...")
86137

87-
# Compress the Linux ARM64 build
88-
linux_arm64_build_dir = './csproj/bin/Release/net8.0/linux-arm64'
89-
compress_files(linux_arm64_build_dir, f"{dist_dir}/linux-arm64-{version}.tar.gz")
138+
# Build the binary
139+
cmd = f'dotnet publish {csproj_path} -c Release -r {rid} --self-contained'
140+
if not run_command(cmd, check=False):
141+
print(f"Warning: Build failed for {rid}")
142+
continue
90143

91-
# Compress the Windows x64 build
92-
windows_build_dir = './csproj/bin/Release/net8.0/win-x64'
93-
compress_files(windows_build_dir, f"{dist_dir}/win-x64-{version}.zip")
144+
# Determine build output directory
145+
build_dir = f'{csproj_path}/bin/Release/net8.0/{rid}'
94146

95-
# Compress the macOS x64 build
96-
macos_x64_build_dir = './csproj/bin/Release/net8.0/osx-x64'
97-
compress_files(macos_x64_build_dir, f"{dist_dir}/osx-x64-{version}.tar.gz")
147+
# Check if build directory exists
148+
if not os.path.exists(build_dir):
149+
print(f"Warning: Build directory not found: {build_dir}")
150+
continue
98151

99-
# Compress the macOS ARM64 build
100-
macos_arm64_build_dir = './csproj/bin/Release/net8.0/osx-arm64'
101-
compress_files(macos_arm64_build_dir, f"{dist_dir}/osx-arm64-{version}.tar.gz")
152+
# Compress to archive
153+
archive_name = f"{rid}-{version}{archive_ext}"
154+
archive_path = os.path.join(dist_dir, archive_name)
155+
compress_files(build_dir, archive_path)
156+
print(f"Created: {archive_path}")
102157

158+
# Cleanup old builds
103159
cleanup_old_builds(dist_dir, version)
104160

105-
print("Build and compression complete.")
161+
print(f"\n{engine_name} build complete.")
162+
163+
164+
def main():
165+
parser = argparse.ArgumentParser(
166+
description='Build comparison engine binaries for multiple platforms.'
167+
)
168+
parser.add_argument(
169+
'--engine',
170+
choices=['all'] + list(ENGINES.keys()),
171+
default='all',
172+
help='Which engine to build (default: all)'
173+
)
174+
parser.add_argument(
175+
'--platform',
176+
choices=['all'] + [p['rid'] for p in PLATFORMS],
177+
default='all',
178+
help='Which platform to build for (default: all)'
179+
)
180+
181+
args = parser.parse_args()
182+
183+
version = get_version()
184+
print(f"Version: {version}")
185+
186+
# Determine which platforms to build
187+
if args.platform == 'all':
188+
platforms = PLATFORMS
189+
else:
190+
platforms = [p for p in PLATFORMS if p['rid'] == args.platform]
191+
192+
# Determine which engines to build
193+
if args.engine == 'all':
194+
engines_to_build = ENGINES.items()
195+
else:
196+
engines_to_build = [(args.engine, ENGINES[args.engine])]
197+
198+
# Build each engine
199+
for engine_name, engine_config in engines_to_build:
200+
build_engine(engine_name, engine_config, version, platforms)
201+
202+
print("\n" + "="*60)
203+
print("All builds complete.")
204+
print("="*60)
106205

107206

108207
if __name__ == "__main__":

csproj-docxodus/Program.cs

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using System;
2+
using System.IO;
3+
using Docxodus;
4+
using DocumentFormat.OpenXml.Packaging;
5+
6+
class Program
7+
{
8+
static int Main(string[] args)
9+
{
10+
// Parse arguments: <original> <modified> <output> [--author=<name>]
11+
if (args.Length < 3)
12+
{
13+
Console.WriteLine("Usage: redline <original.docx> <modified.docx> <output.docx> [--author=<name>]");
14+
return 1;
15+
}
16+
17+
string originalFilePath = args[0];
18+
string modifiedFilePath = args[1];
19+
string outputFilePath = args[2];
20+
string authorTag = "Redline";
21+
22+
// Parse optional --author flag
23+
for (int i = 3; i < args.Length; i++)
24+
{
25+
if (args[i].StartsWith("--author="))
26+
{
27+
authorTag = args[i].Substring("--author=".Length);
28+
}
29+
}
30+
31+
if (!File.Exists(originalFilePath))
32+
{
33+
Console.Error.WriteLine($"Error: Original file does not exist: {originalFilePath}");
34+
return 1;
35+
}
36+
37+
if (!File.Exists(modifiedFilePath))
38+
{
39+
Console.Error.WriteLine($"Error: Modified file does not exist: {modifiedFilePath}");
40+
return 1;
41+
}
42+
43+
try
44+
{
45+
var originalBytes = File.ReadAllBytes(originalFilePath);
46+
var modifiedBytes = File.ReadAllBytes(modifiedFilePath);
47+
var originalDocument = new WmlDocument(originalFilePath, originalBytes);
48+
var modifiedDocument = new WmlDocument(modifiedFilePath, modifiedBytes);
49+
50+
var comparisonSettings = new WmlComparerSettings
51+
{
52+
AuthorForRevisions = authorTag,
53+
DetailThreshold = 0
54+
};
55+
56+
var comparisonResults = WmlComparer.Compare(originalDocument, modifiedDocument, comparisonSettings);
57+
var revisions = WmlComparer.GetRevisions(comparisonResults, comparisonSettings);
58+
59+
// Output results
60+
Console.WriteLine($"Revisions found: {revisions.Count}");
61+
62+
File.WriteAllBytes(outputFilePath, comparisonResults.DocumentByteArray);
63+
return 0;
64+
}
65+
catch (Exception ex)
66+
{
67+
Console.Error.WriteLine($"Error: {ex.Message}");
68+
Console.Error.WriteLine("Detailed Stack Trace:");
69+
Console.Error.WriteLine(ex.StackTrace);
70+
return 1;
71+
}
72+
}
73+
}

csproj-docxodus/redline.csproj

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net8.0</TargetFramework>
6+
<ImplicitUsings>enable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
<AssemblyName>redline</AssemblyName>
9+
</PropertyGroup>
10+
11+
<ItemGroup>
12+
<PackageReference Include="Docxodus" Version="5.4.1" />
13+
</ItemGroup>
14+
15+
</Project>

hatch_run_build_hook.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,46 @@
1+
"""
2+
Hatch build hook for building comparison engine binaries.
3+
4+
This hook runs during the package build process to compile the
5+
.NET binaries for all supported comparison engines and platforms.
6+
"""
7+
8+
import os
19
import subprocess
210
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
311

12+
413
class HatchRunBuildHook(BuildHookInterface):
514
PLUGIN_NAME = 'hatch-run-build'
615

716
def initialize(self, version, build_data):
8-
# Run the 'hatch run build' command
9-
subprocess.run(["python", "-m", "build_differ"], check=True)
17+
"""
18+
Initialize the build hook by compiling engine binaries.
19+
20+
This runs the build_differ.py script which compiles self-contained
21+
.NET executables for all engines and platforms.
22+
"""
23+
# Check if we should skip the build (useful for development)
24+
if os.environ.get('SKIP_BINARY_BUILD', '').lower() in ('1', 'true', 'yes'):
25+
print("Skipping binary build (SKIP_BINARY_BUILD is set)")
26+
return
27+
28+
# Run the build script
29+
print("Building comparison engine binaries...")
30+
try:
31+
result = subprocess.run(
32+
["python", "-m", "build_differ"],
33+
check=True,
34+
capture_output=True,
35+
text=True
36+
)
37+
if result.stdout:
38+
print(result.stdout)
39+
except subprocess.CalledProcessError as e:
40+
print(f"Warning: Binary build failed: {e}")
41+
if e.stdout:
42+
print(f"stdout: {e.stdout}")
43+
if e.stderr:
44+
print(f"stderr: {e.stderr}")
45+
# Don't fail the build - binaries might already exist
46+
# or the user might be installing on a platform we don't build for

0 commit comments

Comments
 (0)