Skip to content

Commit 3e7c12e

Browse files
Merge pull request #3044 from SixLabors/js/resize-perf
Minor optimizations to Resize.
2 parents 498a0a0 + d041f47 commit 3e7c12e

5 files changed

Lines changed: 94 additions & 24 deletions

File tree

src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeKernelMap.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ protected virtual void Dispose(bool disposing)
102102
[MethodImpl(InliningOptions.ShortMethod)]
103103
internal ref ResizeKernel GetKernel(nuint destIdx) => ref this.kernels[(int)destIdx];
104104

105+
/// <summary>
106+
/// Returns a read-only span of <see cref="ResizeKernel"/> over the underlying kernel data.
107+
/// </summary>
108+
[MethodImpl(InliningOptions.ShortMethod)]
109+
internal ReadOnlySpan<ResizeKernel> GetKernelSpan() => this.kernels;
110+
105111
/// <summary>
106112
/// Computes the weights to apply at each pixel when resizing.
107113
/// </summary>

src/ImageSharp/Processing/Processors/Transforms/Resize/ResizeWorker.cs

Lines changed: 74 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -110,34 +110,62 @@ public void FillDestinationPixels(RowInterval rowInterval, Buffer2D<TPixel> dest
110110
{
111111
Span<Vector4> tempColSpan = this.tempColumnBuffer.GetSpan();
112112

113-
// When creating transposedFirstPassBuffer, we made sure it's contiguous:
113+
// When creating transposedFirstPassBuffer, we made sure it's contiguous.
114114
Span<Vector4> transposedFirstPassBufferSpan = this.transposedFirstPassBuffer.DangerousGetSingleSpan();
115115

116116
int left = this.targetWorkingRect.Left;
117-
int right = this.targetWorkingRect.Right;
118117
int width = this.targetWorkingRect.Width;
118+
nuint widthCount = (uint)width;
119+
120+
// Normalize destination-space Y to kernel indices using uint arithmetic.
121+
// This relies on the contract that processing addresses are normalized (cropping/padding handled by targetOrigin).
122+
int targetOriginY = this.targetOrigin.Y;
123+
124+
// Hoist invariant calculations outside the loop.
125+
int currentWindowMax = this.currentWindow.Max;
126+
int currentWindowMin = this.currentWindow.Min;
127+
nuint workerHeight = (uint)this.workerHeight;
128+
nuint workerHeight2 = workerHeight * 2;
129+
130+
// Ref-walk the kernel table to avoid bounds checks in the tight loop.
131+
ReadOnlySpan<ResizeKernel> vKernels = this.verticalKernelMap.GetKernelSpan();
132+
ref ResizeKernel vKernelBase = ref MemoryMarshal.GetReference(vKernels);
133+
134+
ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan);
135+
119136
for (int y = rowInterval.Min; y < rowInterval.Max; y++)
120137
{
121-
// Ensure offsets are normalized for cropping and padding.
122-
ResizeKernel kernel = this.verticalKernelMap.GetKernel((uint)(y - this.targetOrigin.Y));
138+
// Normalize destination-space Y to an unsigned kernel index.
139+
uint vIdx = (uint)(y - targetOriginY);
140+
ref ResizeKernel kernel = ref Unsafe.Add(ref vKernelBase, (nint)vIdx);
123141

124-
while (kernel.StartIndex + kernel.Length > this.currentWindow.Max)
142+
// Slide the working window when the kernel would read beyond the current cached region.
143+
int kernelEnd = kernel.StartIndex + kernel.Length;
144+
while (kernelEnd > currentWindowMax)
125145
{
126146
this.Slide();
147+
currentWindowMax = this.currentWindow.Max;
148+
currentWindowMin = this.currentWindow.Min;
127149
}
128150

129-
ref Vector4 tempRowBase = ref MemoryMarshal.GetReference(tempColSpan);
151+
int top = kernel.StartIndex - currentWindowMin;
152+
ref Vector4 colRef0 = ref transposedFirstPassBufferSpan[top];
130153

131-
int top = kernel.StartIndex - this.currentWindow.Min;
154+
// Unroll by 2 and advance column refs via arithmetic to reduce inner-loop overhead.
155+
nuint i = 0;
156+
for (; i + 1 < widthCount; i += 2)
157+
{
158+
ref Vector4 colRef1 = ref Unsafe.Add(ref colRef0, workerHeight);
132159

133-
ref Vector4 fpBase = ref transposedFirstPassBufferSpan[top];
160+
Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0);
161+
Unsafe.Add(ref tempRowBase, i + 1) = kernel.ConvolveCore(ref colRef1);
134162

135-
for (nuint x = 0; x < (uint)(right - left); x++)
136-
{
137-
ref Vector4 firstPassColumnBase = ref Unsafe.Add(ref fpBase, x * (uint)this.workerHeight);
163+
colRef0 = ref Unsafe.Add(ref colRef0, workerHeight2);
164+
}
138165

139-
// Destination color components
140-
Unsafe.Add(ref tempRowBase, x) = kernel.ConvolveCore(ref firstPassColumnBase);
166+
if (i < widthCount)
167+
{
168+
Unsafe.Add(ref tempRowBase, i) = kernel.ConvolveCore(ref colRef0);
141169
}
142170

143171
Span<TPixel> targetRowSpan = destination.DangerousGetRowSpan(y).Slice(left, width);
@@ -171,7 +199,19 @@ private void CalculateFirstPassValues(RowInterval calculationInterval)
171199

172200
nuint left = (uint)this.targetWorkingRect.Left;
173201
nuint right = (uint)this.targetWorkingRect.Right;
202+
nuint widthCount = right - left;
203+
204+
// Normalize destination-space X to kernel indices using uint arithmetic.
205+
// This relies on the contract that processing addresses are normalized (cropping/padding handled by targetOrigin).
174206
nuint targetOriginX = (uint)this.targetOrigin.X;
207+
208+
nuint workerHeight = (uint)this.workerHeight;
209+
int currentWindowMin = this.currentWindow.Min;
210+
211+
// Ref-walk the kernel table to avoid bounds checks in the tight loop.
212+
ReadOnlySpan<ResizeKernel> hKernels = this.horizontalKernelMap.GetKernelSpan();
213+
ref ResizeKernel hKernelBase = ref MemoryMarshal.GetReference(hKernels);
214+
175215
for (int y = calculationInterval.Min; y < calculationInterval.Max; y++)
176216
{
177217
Span<TPixel> sourceRow = this.source.DangerousGetRowSpan(y);
@@ -182,17 +222,30 @@ private void CalculateFirstPassValues(RowInterval calculationInterval)
182222
tempRowSpan,
183223
this.conversionModifiers);
184224

185-
// optimization for:
186-
// Span<Vector4> firstPassSpan = transposedFirstPassBufferSpan.Slice(y - this.currentWindow.Min);
187-
ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - this.currentWindow.Min];
225+
ref Vector4 firstPassBaseRef = ref transposedFirstPassBufferSpan[y - currentWindowMin];
226+
227+
// Unroll by 2 to reduce loop and kernel lookup overhead.
228+
nuint x = left;
229+
nuint z = 0;
230+
231+
for (; z + 1 < widthCount; x += 2, z += 2)
232+
{
233+
nuint hIdx0 = (uint)(x - targetOriginX);
234+
nuint hIdx1 = (uint)((x + 1) - targetOriginX);
235+
236+
ref ResizeKernel kernel0 = ref Unsafe.Add(ref hKernelBase, (nint)hIdx0);
237+
ref ResizeKernel kernel1 = ref Unsafe.Add(ref hKernelBase, (nint)hIdx1);
238+
239+
Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel0.Convolve(tempRowSpan);
240+
Unsafe.Add(ref firstPassBaseRef, (z + 1) * workerHeight) = kernel1.Convolve(tempRowSpan);
241+
}
188242

189-
for (nuint x = left, z = 0; x < right; x++, z++)
243+
if (z < widthCount)
190244
{
191-
ResizeKernel kernel = this.horizontalKernelMap.GetKernel(x - targetOriginX);
245+
nuint hIdx = (uint)(x - targetOriginX);
246+
ref ResizeKernel kernel = ref Unsafe.Add(ref hKernelBase, (nint)hIdx);
192247

193-
// optimization for:
194-
// firstPassSpan[x * this.workerHeight] = kernel.Convolve(tempRowSpan);
195-
Unsafe.Add(ref firstPassBaseRef, z * (uint)this.workerHeight) = kernel.Convolve(tempRowSpan);
248+
Unsafe.Add(ref firstPassBaseRef, z * workerHeight) = kernel.Convolve(tempRowSpan);
196249
}
197250
}
198251
}

tests/ImageSharp.Benchmarks/Config.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
using BenchmarkDotNet.Environments;
1111
using BenchmarkDotNet.Jobs;
1212
using BenchmarkDotNet.Reports;
13+
using BenchmarkDotNet.Toolchains.InProcess.Emit;
1314

1415
namespace SixLabors.ImageSharp.Benchmarks;
1516

@@ -45,6 +46,15 @@ public Short() => this.AddJob(
4546
.WithArguments([new MsBuildArgument("/p:DebugType=portable")]));
4647
}
4748

49+
public class StandardInProcess : Config
50+
{
51+
public StandardInProcess() => this.AddJob(
52+
Job.Default
53+
.WithRuntime(CoreRuntime.Core80)
54+
.WithToolchain(InProcessEmitToolchain.Instance)
55+
.WithArguments([new MsBuildArgument("/p:DebugType=portable")]));
56+
}
57+
4858
#if OS_WINDOWS
4959
private bool IsElevated => new WindowsPrincipal(WindowsIdentity.GetCurrent()).IsInRole(WindowsBuiltInRole.Administrator);
5060
#endif

tests/ImageSharp.Benchmarks/ImageSharp.Benchmarks.csproj

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@
5757

5858
<ItemGroup>
5959
<PackageReference Include="Magick.NET-Q16-AnyCPU" />
60-
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
61-
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.14.0" Condition="'$(IsWindows)'=='true'" />
60+
<PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="18.3.36812.1" />
61+
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
62+
<PackageReference Include="BenchmarkDotNet.Diagnostics.Windows" Version="0.15.8" Condition="'$(IsWindows)'=='true'" />
6263
<PackageReference Include="Colourful" />
6364
<PackageReference Include="NetVips" />
6465
<PackageReference Include="NetVips.Native" />

tests/ImageSharp.Benchmarks/Processing/Resize.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
namespace SixLabors.ImageSharp.Benchmarks;
1414

15-
[Config(typeof(Config.Standard))]
15+
[Config(typeof(Config.StandardInProcess))]
1616
public abstract class Resize<TPixel>
1717
where TPixel : unmanaged, IPixel<TPixel>
1818
{

0 commit comments

Comments
 (0)