Skip to content

Commit a448e02

Browse files
Add zero-allocation Regex split and match enumerators
- RegexMatchSegmentEnumerator: Zero-allocation enumerator for Regex.AsSegments() - RegexSplitSegmentEnumerator: Zero-allocation enumerator for SplitAsSegments(Regex) - RegexSplitJoinEnumerator: Specialized join enumerator for regex split results - Updated AsSegments(Regex) to return ValueEnumerable - Updated SplitAsSegments(Regex) to return ValueEnumerable - Updated ReplaceAsSegments(Regex) to return ValueEnumerable - All 955 tests pass
1 parent 4a7e065 commit a448e02

3 files changed

Lines changed: 370 additions & 88 deletions

File tree

Source/Extensions.StringSegment.cs

Lines changed: 23 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -97,79 +97,18 @@ public static bool IsNullOrWhiteSpace(this StringSegment segment)
9797
/// <param name="source">The source characters to look through.</param>
9898
/// <param name="pattern">The pattern to split by.</param>
9999
/// <param name="options">Can specify to omit empty entries.</param>
100-
/// <returns>An enumerable of the segments.</returns>
101-
public static IEnumerable<StringSegment> SplitAsSegments(
100+
/// <returns>A ValueEnumerable of the segments (zero-allocation when used with foreach or ZLinq).</returns>
101+
[CLSCompliant(false)]
102+
public static ValueEnumerable<RegexSplitSegmentEnumerator, StringSegment> SplitAsSegments(
102103
this string source,
103104
Regex pattern,
104105
StringSplitOptions options = StringSplitOptions.None)
105106
{
106-
return source is null
107-
? throw new ArgumentNullException(nameof(source))
108-
: pattern is null
109-
? throw new ArgumentNullException(nameof(pattern))
110-
: source.Length == 0
111-
? options.HasFlag(StringSplitOptions.RemoveEmptyEntries)
112-
? Enumerable.Empty<StringSegment>()
113-
: Enumerable.Repeat(StringSegment.Empty, 1)
114-
: SplitCore(source, pattern, options);
115-
116-
static IEnumerable<StringSegment> SplitCore(string source, Regex pattern, StringSplitOptions options)
117-
{
118-
int nextStart = 0;
119-
Match match = pattern.Match(source);
120-
bool removeEmpty = options.HasFlag(StringSplitOptions.RemoveEmptyEntries);
121-
#if NET5_0_OR_GREATER
122-
bool trimEach = options.HasFlag(StringSplitOptions.TrimEntries);
123-
#endif
124-
while (match.Success)
125-
{
126-
if (!removeEmpty || match.Index - nextStart != 0)
127-
{
128-
StringSegment next = new(source, nextStart, match.Index - nextStart);
129-
#if NET5_0_OR_GREATER
130-
if (trimEach)
131-
{
132-
next = next.Trim();
133-
if (!removeEmpty || next.Length != 0) yield return next;
134-
}
135-
else
136-
{
137-
yield return next;
138-
}
139-
#else
140-
yield return next;
141-
#endif
142-
}
143-
144-
nextStart = match.Index + match.Length;
145-
match = match.NextMatch();
146-
}
147-
148-
int len;
149-
if (removeEmpty)
150-
{
151-
len = source.Length - nextStart;
152-
if (len == 0) yield break;
153-
}
154-
else
155-
{
156-
len = source.Length - nextStart;
157-
}
107+
if (source is null) throw new ArgumentNullException(nameof(source));
108+
if (pattern is null) throw new ArgumentNullException(nameof(pattern));
158109

159-
{
160-
StringSegment next = source.AsSegment(nextStart, len);
161-
#if NET5_0_OR_GREATER
162-
if (trimEach)
163-
{
164-
next = next.Trim();
165-
if (!removeEmpty || next.Length != 0) yield return next;
166-
yield break;
167-
}
168-
#endif
169-
170-
yield return next;
171-
}
172-
}
110+
return new ValueEnumerable<RegexSplitSegmentEnumerator, StringSegment>(
111+
new RegexSplitSegmentEnumerator(source, pattern, options));
173112
}
174113

175114
/// <returns>A ValueEnumerable of the segments (zero-allocation when used with foreach or ZLinq).</returns>
@@ -216,6 +155,21 @@ public static ValueEnumerable<StringSegmentJoinEnumerator, StringSegment> Join(t
216155
new StringSegmentJoinEnumerator(source, between));
217156
}
218157

158+
/// <summary>
159+
/// Joins a regex split sequence of segments with a separator sequence (zero-allocation).
160+
/// </summary>
161+
/// <param name="source">The regex split segments to join.</param>
162+
/// <param name="between">The segment to place between each segment.</param>
163+
/// <returns>A ValueEnumerable of the joined segments (zero-allocation when used with foreach or ZLinq).</returns>
164+
[CLSCompliant(false)]
165+
public static ValueEnumerable<RegexSplitJoinEnumerator, StringSegment> Join(
166+
this ValueEnumerable<RegexSplitSegmentEnumerator, StringSegment> source,
167+
StringSegment between)
168+
{
169+
return new ValueEnumerable<RegexSplitJoinEnumerator, StringSegment>(
170+
new RegexSplitJoinEnumerator(source.Enumerator, between));
171+
}
172+
219173
/// <summary>
220174
/// Joins a sequence of segments with an optional separator sequence.
221175
/// </summary>
@@ -257,7 +211,7 @@ public static string ReplaceToString(this StringSegment source,
257211

258212
/// <inheritdoc cref="Replace(StringSegment, StringSegment, StringSegment, StringComparison)"/>
259213
[CLSCompliant(false)]
260-
public static ValueEnumerable<StringSegmentJoinEnumerator, StringSegment> ReplaceAsSegments(
214+
public static ValueEnumerable<RegexSplitJoinEnumerator, StringSegment> ReplaceAsSegments(
261215
this string source,
262216
Regex splitSequence,
263217
StringSegment replacement)

Source/RegexExtensions.cs

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
namespace Open.Text;
1+
using ZLinq;
2+
3+
namespace Open.Text;
24

35
/// <summary>
46
/// A set of regular expression extensions.
@@ -93,26 +95,15 @@ public static ReadOnlySpan<char> GetValueSpan(this GroupCollection groups, strin
9395
/// </summary>
9496
/// <param name="pattern">The pattern to search with.</param>
9597
/// <param name="input">The string to search.</param>
96-
/// <returns>An enumerable containing the found segments.</returns>
98+
/// <returns>A ValueEnumerable of the found segments (zero-allocation when used with foreach or ZLinq).</returns>
9799
/// <exception cref="ArgumentNullException">If the pattern or input is null.</exception>
98-
public static IEnumerable<StringSegment> AsSegments(this Regex pattern, string input)
100+
[CLSCompliant(false)]
101+
public static ValueEnumerable<RegexMatchSegmentEnumerator, StringSegment> AsSegments(this Regex pattern, string input)
99102
{
100-
return pattern is null
101-
? throw new ArgumentNullException(nameof(pattern))
102-
: input is null
103-
? throw new ArgumentNullException(nameof(input))
104-
: input.Length == 0
105-
? Enumerable.Empty<StringSegment>()
106-
: AsSegmentsCore(pattern, input);
103+
if (pattern is null) throw new ArgumentNullException(nameof(pattern));
104+
if (input is null) throw new ArgumentNullException(nameof(input));
107105

108-
static IEnumerable<StringSegment> AsSegmentsCore(Regex pattern, string input)
109-
{
110-
Match match = pattern.Match(input);
111-
while (match.Success)
112-
{
113-
yield return new(input, match.Index, match.Length);
114-
match = match.NextMatch();
115-
}
116-
}
106+
return new ValueEnumerable<RegexMatchSegmentEnumerator, StringSegment>(
107+
new RegexMatchSegmentEnumerator(pattern, input));
117108
}
118109
}

0 commit comments

Comments
 (0)