Skip to content

Commit ba0c815

Browse files
Add SequenceSplitJoinEnumerator for zero-allocation Replace
- SequenceSplitJoinEnumerator: Wraps StringSegmentSequenceSplitEnumerator directly - Updated Replace() and ReplaceAsSegments() to use zero-allocation path - Updated ReplaceToString() to iterate ValueEnumerable directly - Added comprehensive ZLinqImprovementsBenchmark for measuring improvements - All 955 tests pass
1 parent a448e02 commit ba0c815

4 files changed

Lines changed: 362 additions & 7 deletions

File tree

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
using BenchmarkDotNet.Attributes;
2+
using BenchmarkDotNet.Columns;
3+
using BenchmarkDotNet.Configs;
4+
using BenchmarkDotNet.Diagnosers;
5+
using BenchmarkDotNet.Jobs;
6+
using BenchmarkDotNet.Order;
7+
using Microsoft.Extensions.Primitives;
8+
using Open.Text;
9+
using System.Text.RegularExpressions;
10+
using ZLinq;
11+
12+
namespace Open.Text.Benchmarks;
13+
14+
/// <summary>
15+
/// Comprehensive benchmarks measuring the ZLinq integration improvements.
16+
/// Compares allocation behavior across different operations.
17+
/// </summary>
18+
[Config(typeof(Config))]
19+
[MemoryDiagnoser]
20+
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
21+
[GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
22+
[CategoriesColumn]
23+
public class ZLinqImprovementsBenchmark
24+
{
25+
private class Config : ManualConfig
26+
{
27+
public Config()
28+
{
29+
AddDiagnoser(MemoryDiagnoser.Default);
30+
AddColumn(StatisticColumn.Mean);
31+
AddColumn(StatisticColumn.Median);
32+
AddColumn(RankColumn.Arabic);
33+
AddJob(Job.ShortRun.WithId("ZLinq"));
34+
}
35+
}
36+
37+
// Test data
38+
private const string SmallCsv = "apple,banana,cherry,date,elderberry";
39+
private const string MediumCsv = "apple,banana,cherry,date,elderberry,fig,grape,honeydew,kiwi,lemon,mango,nectarine,orange,papaya,quince";
40+
private static readonly string LargeCsv = string.Join(",", Enumerable.Range(1, 1000).Select(i => $"item{i}"));
41+
42+
private static readonly Regex CommaRegex = new(",", RegexOptions.Compiled);
43+
private static readonly Regex WordRegex = new(@"\w+", RegexOptions.Compiled);
44+
45+
// =====================================================================
46+
// CATEGORY: Char Split - Foreach Only (Pure enumeration, no materialization)
47+
// =====================================================================
48+
49+
[BenchmarkCategory("CharSplit-Foreach"), Benchmark(Baseline = true, Description = "BCL String.Split")]
50+
public int CharSplit_Foreach_BCL()
51+
{
52+
int count = 0;
53+
foreach (var s in SmallCsv.Split(','))
54+
count += s.Length;
55+
return count;
56+
}
57+
58+
[BenchmarkCategory("CharSplit-Foreach"), Benchmark(Description = "SplitAsSegments (ZLinq)")]
59+
public int CharSplit_Foreach_ZLinq()
60+
{
61+
int count = 0;
62+
foreach (var s in SmallCsv.SplitAsSegments(','))
63+
count += s.Length;
64+
return count;
65+
}
66+
67+
// =====================================================================
68+
// CATEGORY: Char Split - With LINQ Count()
69+
// =====================================================================
70+
71+
[BenchmarkCategory("CharSplit-Count"), Benchmark(Baseline = true, Description = "BCL Split + LINQ Count")]
72+
public int CharSplit_Count_BCL()
73+
{
74+
return SmallCsv.Split(',').Count();
75+
}
76+
77+
[BenchmarkCategory("CharSplit-Count"), Benchmark(Description = "SplitAsSegments + ZLinq Count")]
78+
public int CharSplit_Count_ZLinq()
79+
{
80+
return SmallCsv.SplitAsSegments(',').Count();
81+
}
82+
83+
// =====================================================================
84+
// CATEGORY: Char Split - Large String Foreach
85+
// =====================================================================
86+
87+
[BenchmarkCategory("CharSplit-Large"), Benchmark(Baseline = true, Description = "BCL Split (1000 items)")]
88+
public int CharSplit_Large_BCL()
89+
{
90+
int count = 0;
91+
foreach (var s in LargeCsv.Split(','))
92+
count++;
93+
return count;
94+
}
95+
96+
[BenchmarkCategory("CharSplit-Large"), Benchmark(Description = "SplitAsSegments (1000 items)")]
97+
public int CharSplit_Large_ZLinq()
98+
{
99+
int count = 0;
100+
foreach (var s in LargeCsv.SplitAsSegments(','))
101+
count++;
102+
return count;
103+
}
104+
105+
// =====================================================================
106+
// CATEGORY: String Sequence Split
107+
// =====================================================================
108+
109+
[BenchmarkCategory("SeqSplit"), Benchmark(Baseline = true, Description = "BCL Split(string)")]
110+
public int SeqSplit_BCL()
111+
{
112+
int count = 0;
113+
foreach (var s in MediumCsv.Split(","))
114+
count += s.Length;
115+
return count;
116+
}
117+
118+
[BenchmarkCategory("SeqSplit"), Benchmark(Description = "SplitAsSegments(string)")]
119+
public int SeqSplit_ZLinq()
120+
{
121+
int count = 0;
122+
foreach (var s in MediumCsv.SplitAsSegments(","))
123+
count += s.Length;
124+
return count;
125+
}
126+
127+
// =====================================================================
128+
// CATEGORY: Regex Split
129+
// =====================================================================
130+
131+
[BenchmarkCategory("RegexSplit"), Benchmark(Baseline = true, Description = "Regex.Split")]
132+
public int RegexSplit_BCL()
133+
{
134+
int count = 0;
135+
foreach (var s in CommaRegex.Split(MediumCsv))
136+
count += s.Length;
137+
return count;
138+
}
139+
140+
[BenchmarkCategory("RegexSplit"), Benchmark(Description = "SplitAsSegments(Regex)")]
141+
public int RegexSplit_ZLinq()
142+
{
143+
int count = 0;
144+
foreach (var s in MediumCsv.SplitAsSegments(CommaRegex))
145+
count += s.Length;
146+
return count;
147+
}
148+
149+
// =====================================================================
150+
// CATEGORY: Regex Match Enumeration
151+
// =====================================================================
152+
153+
[BenchmarkCategory("RegexMatch"), Benchmark(Baseline = true, Description = "Regex.Matches")]
154+
public int RegexMatch_BCL()
155+
{
156+
int count = 0;
157+
foreach (Match m in WordRegex.Matches(MediumCsv))
158+
count += m.Length;
159+
return count;
160+
}
161+
162+
[BenchmarkCategory("RegexMatch"), Benchmark(Description = "AsSegments(Regex)")]
163+
public int RegexMatch_ZLinq()
164+
{
165+
int count = 0;
166+
foreach (var s in WordRegex.AsSegments(MediumCsv))
167+
count += s.Length;
168+
return count;
169+
}
170+
171+
// =====================================================================
172+
// CATEGORY: Replace Operation
173+
// =====================================================================
174+
175+
[BenchmarkCategory("Replace"), Benchmark(Baseline = true, Description = "BCL String.Replace")]
176+
public string Replace_BCL()
177+
{
178+
return MediumCsv.Replace(",", " | ");
179+
}
180+
181+
[BenchmarkCategory("Replace"), Benchmark(Description = "ReplaceToString (ZLinq)")]
182+
public string Replace_ZLinq()
183+
{
184+
return MediumCsv.AsSegment().ReplaceToString(",", " | ");
185+
}
186+
187+
// =====================================================================
188+
// CATEGORY: LINQ Chain - Where + Select + Count
189+
// =====================================================================
190+
191+
[BenchmarkCategory("LinqChain"), Benchmark(Baseline = true, Description = "BCL + LINQ Chain")]
192+
public int LinqChain_BCL()
193+
{
194+
return SmallCsv.Split(',')
195+
.Where(s => s.Length > 4)
196+
.Select(s => s.Length)
197+
.Sum();
198+
}
199+
200+
[BenchmarkCategory("LinqChain"), Benchmark(Description = "ZLinq Chain")]
201+
public int LinqChain_ZLinq()
202+
{
203+
return SmallCsv.SplitAsSegments(',')
204+
.Where(s => s.Length > 4)
205+
.Select(s => s.Length)
206+
.Sum();
207+
}
208+
209+
// =====================================================================
210+
// CATEGORY: ToArray Materialization
211+
// =====================================================================
212+
213+
[BenchmarkCategory("ToArray"), Benchmark(Baseline = true, Description = "BCL Split (already array)")]
214+
public int ToArray_BCL()
215+
{
216+
return SmallCsv.Split(',').Length;
217+
}
218+
219+
[BenchmarkCategory("ToArray"), Benchmark(Description = "SplitAsSegments.ToArray()")]
220+
public int ToArray_ZLinq()
221+
{
222+
return SmallCsv.SplitAsSegments(',').ToArray().Length;
223+
}
224+
}

Source/Extensions.StringSegment.cs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,21 @@ public static ValueEnumerable<RegexSplitJoinEnumerator, StringSegment> Join(
170170
new RegexSplitJoinEnumerator(source.Enumerator, between));
171171
}
172172

173+
/// <summary>
174+
/// Joins a sequence split result with a separator sequence (zero-allocation).
175+
/// </summary>
176+
/// <param name="source">The sequence split segments to join.</param>
177+
/// <param name="between">The segment to place between each segment.</param>
178+
/// <returns>A ValueEnumerable of the joined segments (zero-allocation when used with foreach or ZLinq).</returns>
179+
[CLSCompliant(false)]
180+
public static ValueEnumerable<SequenceSplitJoinEnumerator, StringSegment> Join(
181+
this ValueEnumerable<StringSegmentSequenceSplitEnumerator, StringSegment> source,
182+
StringSegment between)
183+
{
184+
return new ValueEnumerable<SequenceSplitJoinEnumerator, StringSegment>(
185+
new SequenceSplitJoinEnumerator(source.Enumerator, between));
186+
}
187+
173188
/// <summary>
174189
/// Joins a sequence of segments with an optional separator sequence.
175190
/// </summary>
@@ -194,20 +209,25 @@ public static string JoinToString(this IEnumerable<StringSegment> source, String
194209
/// <returns>A ValueEnumerable of the segments (zero-allocation when used with foreach or ZLinq).</returns>
195210
/// <inheritdoc cref="SplitAsSegments(string, string, StringSplitOptions, StringComparison)"/>
196211
[CLSCompliant(false)]
197-
public static ValueEnumerable<StringSegmentJoinEnumerator, StringSegment> Replace(
212+
public static ValueEnumerable<SequenceSplitJoinEnumerator, StringSegment> Replace(
198213
this StringSegment source,
199214
StringSegment splitSequence,
200215
StringSegment replacement,
201216
StringComparison comparisonType = StringComparison.Ordinal)
202-
=> Join(SplitAsSegments(source, splitSequence, comparisonType: comparisonType).ToArray(), replacement);
217+
=> Join(SplitAsSegments(source, splitSequence, comparisonType: comparisonType), replacement);
203218

204219
/// <returns>The resultant string.</returns>
205220
/// <inheritdoc cref="Replace(StringSegment, StringSegment, StringSegment, StringComparison)"/>
206221
public static string ReplaceToString(this StringSegment source,
207222
StringSegment splitSequence,
208223
StringSegment replacement,
209224
StringComparison comparisonType = StringComparison.Ordinal)
210-
=> JoinToString(SplitAsSegments(source, splitSequence, comparisonType: comparisonType).ToArray(), replacement);
225+
{
226+
var sb = new StringBuilder();
227+
foreach (var segment in Replace(source, splitSequence, replacement, comparisonType))
228+
sb.Append(segment.AsSpan());
229+
return sb.ToString();
230+
}
211231

212232
/// <inheritdoc cref="Replace(StringSegment, StringSegment, StringSegment, StringComparison)"/>
213233
[CLSCompliant(false)]
@@ -219,7 +239,7 @@ public static ValueEnumerable<RegexSplitJoinEnumerator, StringSegment> ReplaceAs
219239

220240
/// <inheritdoc cref="Replace(StringSegment, StringSegment, StringSegment, StringComparison)"/>
221241
[CLSCompliant(false)]
222-
public static ValueEnumerable<StringSegmentJoinEnumerator, StringSegment> ReplaceAsSegments(
242+
public static ValueEnumerable<SequenceSplitJoinEnumerator, StringSegment> ReplaceAsSegments(
223243
this string source,
224244
StringSegment splitSequence,
225245
StringSegment replacement,

Source/StringSegmentSplitEnumerable.cs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -817,3 +817,114 @@ public void Dispose()
817817
_source.Dispose();
818818
}
819819
}
820+
821+
/// <summary>
822+
/// Zero-allocation enumerator for joining sequence split segments with a separator.
823+
/// Wraps StringSegmentSequenceSplitEnumerator directly without boxing.
824+
/// </summary>
825+
[SuppressMessage("Design", "CA1815:Override equals and operator equals on value types", Justification = "Enumerators with mutable state should not be compared for equality")]
826+
public struct SequenceSplitJoinEnumerator : IValueEnumerator<StringSegment>
827+
{
828+
private readonly StringSegment _separator;
829+
private StringSegmentSequenceSplitEnumerator _source;
830+
private bool _isFirst;
831+
private StringSegment _pendingElement;
832+
private bool _completed;
833+
834+
internal SequenceSplitJoinEnumerator(StringSegmentSequenceSplitEnumerator source, StringSegment separator)
835+
{
836+
_separator = separator;
837+
_source = source;
838+
_isFirst = true;
839+
_pendingElement = default;
840+
_completed = false;
841+
}
842+
843+
/// <summary>
844+
/// Advances to the next segment.
845+
/// </summary>
846+
public bool TryGetNext(out StringSegment current)
847+
{
848+
if (_completed)
849+
{
850+
current = default;
851+
return false;
852+
}
853+
854+
// If we have a pending element to yield (after yielding separator)
855+
if (_pendingElement.HasValue)
856+
{
857+
current = _pendingElement;
858+
_pendingElement = default;
859+
return true;
860+
}
861+
862+
// Try to get next element from source
863+
while (_source.TryGetNext(out var element))
864+
{
865+
if (_isFirst)
866+
{
867+
_isFirst = false;
868+
// Skip empty first element (don't yield it)
869+
if (element.Length == 0)
870+
continue;
871+
current = element;
872+
return true;
873+
}
874+
875+
// For subsequent elements, yield separator first (if has value)
876+
if (_separator.HasValue)
877+
{
878+
// Only store as pending if non-empty
879+
if (element.Length != 0)
880+
_pendingElement = element;
881+
current = _separator;
882+
return true;
883+
}
884+
885+
// No separator, only yield non-empty elements
886+
if (element.Length == 0)
887+
continue;
888+
current = element;
889+
return true;
890+
}
891+
892+
_completed = true;
893+
current = default;
894+
return false;
895+
}
896+
897+
/// <summary>
898+
/// Returns false as we cannot determine count without enumerating.
899+
/// </summary>
900+
public bool TryGetNonEnumeratedCount(out int count)
901+
{
902+
count = 0;
903+
return false;
904+
}
905+
906+
/// <summary>
907+
/// Returns false as joined segments are not contiguous in memory.
908+
/// </summary>
909+
public bool TryGetSpan(out ReadOnlySpan<StringSegment> span)
910+
{
911+
span = default;
912+
return false;
913+
}
914+
915+
/// <summary>
916+
/// Returns false as we don't support indexed access efficiently.
917+
/// </summary>
918+
public bool TryCopyTo(scoped Span<StringSegment> destination, Index offset)
919+
{
920+
return false;
921+
}
922+
923+
/// <summary>
924+
/// No resources to dispose.
925+
/// </summary>
926+
public void Dispose()
927+
{
928+
_source.Dispose();
929+
}
930+
}

0 commit comments

Comments
 (0)