Skip to content

Commit c7f9f88

Browse files
committed
Fixed: Regression in Hash Quality, Workaround .NET 8 Strangeness & Revert to using 64-bit FNV1
1 parent 91f0dd9 commit c7f9f88

2 files changed

Lines changed: 102 additions & 67 deletions

File tree

src/Reloaded.Memory/Internals/Algorithms/UnstableStringHash.cs

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,17 @@ internal static unsafe nuint GetHashCodeUnstable(this ReadOnlySpan<char> text)
4545
return text.UnstableHashVec256();
4646

4747
// Over 4 Vec128 regs (16 * 4 = 64 bytes)
48-
if (Vector256.IsHardwareAccelerated && length >= (sizeof(Vector128<ulong>) / sizeof(char)) * 4)
48+
if (Vector128.IsHardwareAccelerated && length >= (sizeof(Vector128<ulong>) / sizeof(char)) * 4)
4949
return text.UnstableHashVec128();
5050
#endif
5151

5252
return text.UnstableHashNonVector();
5353
}
5454

5555
#if NET7_0_OR_GREATER
56+
#if NET8_0 // Bug in .NET 8 seems to cause this to not re-jit to tier1 till like 200k calls on Linux x64
57+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
58+
#endif
5659
internal static unsafe UIntPtr UnstableHashVec128(this ReadOnlySpan<char> text)
5760
{
5861
fixed (char* src = &text.GetPinnableReference())
@@ -62,32 +65,32 @@ internal static unsafe UIntPtr UnstableHashVec128(this ReadOnlySpan<char> text)
6265
var hash2 = hash1;
6366
var ptr = (nuint*)(src);
6467

65-
var prime = Vector128.Create((uint)0x01000193);
66-
var hash1_128 = Vector128.Create(0x811c9dc5);
67-
var hash2_128 = Vector128.Create(0x811c9dc5);
68+
var prime = Vector128.Create((ulong)0x100000001b3);
69+
var hash1_128 = Vector128.Create(0xcbf29ce484222325);
70+
var hash2_128 = Vector128.Create(0xcbf29ce484222325);
6871

6972
while (length >= sizeof(Vector128<ulong>) / sizeof(char) * 4) // 64 byte chunks.
7073
{
7174
length -= (sizeof(Vector128<ulong>) / sizeof(char)) * 4;
72-
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr).AsUInt32());
73-
hash1_128 = Vector128.Multiply(hash1_128, prime);
75+
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr));
76+
hash1_128 = HashMultiply128(hash1_128, prime);
7477

75-
hash2_128 = Vector128.Xor(hash2_128, Vector128.Load((ulong*)ptr + 2).AsUInt32());
76-
hash2_128 = Vector128.Multiply(hash2_128, prime);
78+
hash2_128 = Vector128.Xor(hash2_128, Vector128.Load((ulong*)ptr + 2));
79+
hash2_128 = HashMultiply128(hash2_128, prime);
7780

78-
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr + 4).AsUInt32());
79-
hash1_128 = Vector128.Multiply(hash1_128, prime);
81+
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr + 4));
82+
hash1_128 = HashMultiply128(hash1_128, prime);
8083

81-
hash2_128 = Vector128.Xor(hash2_128, Vector128.Load((ulong*)ptr + 6).AsUInt32());
82-
hash2_128 = Vector128.Multiply(hash2_128, prime);
84+
hash2_128 = Vector128.Xor(hash2_128, Vector128.Load((ulong*)ptr + 6));
85+
hash2_128 = HashMultiply128(hash2_128, prime);
8386
ptr += (sizeof(Vector128<ulong>) / sizeof(nuint)) * 4;
8487
}
8588

8689
while (length >= sizeof(Vector128<ulong>) / sizeof(char)) // 16 byte chunks.
8790
{
8891
length -= sizeof(Vector128<ulong>) / sizeof(char);
89-
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr).AsUInt32());
90-
hash1_128 = Vector128.Multiply(hash1_128, prime);
92+
hash1_128 = Vector128.Xor(hash1_128, Vector128.Load((ulong*)ptr));
93+
hash1_128 = HashMultiply128(hash1_128, prime);
9194
ptr += (sizeof(Vector128<ulong>) / sizeof(nuint));
9295
}
9396

@@ -117,6 +120,9 @@ internal static unsafe UIntPtr UnstableHashVec128(this ReadOnlySpan<char> text)
117120
}
118121
}
119122

123+
#if NET8_0 // Bug in .NET 8 seems to cause this to not re-jit to tier1 till like 200k calls on Linux x64
124+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
125+
#endif
120126
internal static unsafe UIntPtr UnstableHashVec256(this ReadOnlySpan<char> text)
121127
{
122128
fixed (char* src = &text.GetPinnableReference())
@@ -126,32 +132,32 @@ internal static unsafe UIntPtr UnstableHashVec256(this ReadOnlySpan<char> text)
126132
var hash2 = hash1;
127133
var ptr = (nuint*)(src);
128134

129-
var prime = Vector256.Create((uint)0x01000193);
130-
var hash1_256 = Vector256.Create(0x811c9dc5);
131-
var hash2_256 = Vector256.Create(0x811c9dc5);
135+
var prime = Vector256.Create((ulong)0x100000001b3);
136+
var hash1_256 = Vector256.Create(0xcbf29ce484222325);
137+
var hash2_256 = Vector256.Create(0xcbf29ce484222325);
132138

133139
while (length >= sizeof(Vector256<ulong>) / sizeof(char) * 4) // 128 byte chunks.
134140
{
135141
length -= (sizeof(Vector256<ulong>) / sizeof(char)) * 4;
136-
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr).AsUInt32());
137-
hash1_256 = Vector256.Multiply(hash1_256, prime).AsUInt32();
142+
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr));
143+
hash1_256 = HashMultiply256(hash1_256, prime);
138144

139-
hash2_256 = Vector256.Xor(hash2_256, Vector256.Load((ulong*)ptr + 4).AsUInt32());
140-
hash2_256 = Vector256.Multiply(hash2_256, prime).AsUInt32();
145+
hash2_256 = Vector256.Xor(hash2_256, Vector256.Load((ulong*)ptr + 4));
146+
hash2_256 = HashMultiply256(hash2_256, prime);
141147

142-
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr + 8).AsUInt32());
143-
hash1_256 = Vector256.Multiply(hash1_256, prime).AsUInt32();
148+
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr + 8));
149+
hash1_256 = HashMultiply256(hash1_256, prime);
144150

145-
hash2_256 = Vector256.Xor(hash2_256, Vector256.Load((ulong*)ptr + 12).AsUInt32());
146-
hash2_256 = Vector256.Multiply(hash2_256, prime).AsUInt32();
151+
hash2_256 = Vector256.Xor(hash2_256, Vector256.Load((ulong*)ptr + 12));
152+
hash2_256 = HashMultiply256(hash2_256, prime);
147153
ptr += (sizeof(Vector256<ulong>) / sizeof(nuint)) * 4;
148154
}
149155

150156
while (length >= sizeof(Vector256<ulong>) / sizeof(char)) // 32 byte chunks.
151157
{
152158
length -= sizeof(Vector256<ulong>) / sizeof(char);
153-
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr).AsUInt32());
154-
hash1_256 = Vector256.Multiply(hash1_256, prime).AsUInt32();
159+
hash1_256 = Vector256.Xor(hash1_256, Vector256.Load((ulong*)ptr));
160+
hash1_256 = HashMultiply256(hash1_256, prime);
155161
ptr += (sizeof(Vector256<ulong>) / sizeof(nuint));
156162
}
157163

@@ -184,6 +190,28 @@ internal static unsafe UIntPtr UnstableHashVec256(this ReadOnlySpan<char> text)
184190
return hash1 + (hash2 * 1566083941);
185191
}
186192
}
193+
194+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
195+
internal static Vector128<ulong> HashMultiply128(Vector128<ulong> a, Vector128<ulong> b)
196+
{
197+
// See comment in HashMultiply256
198+
if (Sse2.IsSupported)
199+
return Sse2.Multiply(a.AsUInt32(), b.AsUInt32()).AsUInt64();
200+
201+
return Vector128.Multiply(a.AsUInt32(), b.AsUInt32()).AsUInt64();
202+
}
203+
204+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
205+
internal static Vector256<ulong> HashMultiply256(Vector256<ulong> a, Vector256<ulong> b)
206+
{
207+
// On AVX2, we want VPMULUDQ.
208+
// Unfortunately the Vector256 fallback can't produce this,
209+
// so we fallback to multiplying 32-bit ints, which isn't as good, but still not terrible.
210+
if (Avx2.IsSupported)
211+
return Avx2.Multiply(a.AsUInt32(), b.AsUInt32()).AsUInt64();
212+
213+
return Vector256.Multiply(a.AsUInt32(), b.AsUInt32()).AsUInt64();
214+
}
187215
#endif
188216

189217
internal static unsafe UIntPtr UnstableHashNonVector(this ReadOnlySpan<char> text)

src/Reloaded.Memory/Internals/Algorithms/UnstableStringHashLower.cs

Lines changed: 47 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
using Reloaded.Memory.Utilities;
44
using static Reloaded.Memory.Internals.Backports.System.Text.Unicode.Utf16Utility;
55
#if NET7_0_OR_GREATER
6+
using static Reloaded.Memory.Internals.Algorithms.UnstableStringHash;
67
using Reloaded.Memory.Extensions;
78
using Reloaded.Memory.Internals.Backports.System.Globalization;
89
using System.Numerics;
@@ -48,7 +49,7 @@ internal static unsafe nuint GetHashCodeUnstableLower(this ReadOnlySpan<char> te
4849
return text.UnstableHashVec256Lower();
4950

5051
// Over 4 Vec128 regs (16 * 4 = 64 bytes)
51-
if (Vector256.IsHardwareAccelerated && length >= (sizeof(Vector128<ulong>) / sizeof(char)) * 4)
52+
if (Vector128.IsHardwareAccelerated && length >= (sizeof(Vector128<ulong>) / sizeof(char)) * 4)
5253
return text.UnstableHashVec128Lower();
5354
#endif
5455

@@ -104,6 +105,9 @@ internal static unsafe nuint GetHashCodeUnstableLowerSlow(this ReadOnlySpan<char
104105
}
105106

106107
#if NET7_0_OR_GREATER
108+
#if NET8_0 // Bug in .NET 8 seems to cause this to not re-jit to tier1 till like 200k calls on Linux x64
109+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
110+
#endif
107111
internal static unsafe UIntPtr UnstableHashVec128Lower(this ReadOnlySpan<char> text)
108112
{
109113
fixed (char* src = &text.GetPinnableReference())
@@ -113,59 +117,59 @@ internal static unsafe UIntPtr UnstableHashVec128Lower(this ReadOnlySpan<char> t
113117
var hash2 = hash1;
114118
var ptr = (nuint*)(src);
115119

116-
var prime = Vector128.Create((uint)0x01000193);
117-
var hash1_128 = Vector128.Create(0x811c9dc5);
118-
var hash2_128 = Vector128.Create(0x811c9dc5);
120+
var prime = Vector128.Create((ulong)0x100000001b3);
121+
var hash1_128 = Vector128.Create(0xcbf29ce484222325);
122+
var hash2_128 = Vector128.Create(0xcbf29ce484222325);
119123

120124
// We "normalize to lowercase" every char by ORing with 0x0020. This casts
121125
// a very wide net because it will change, e.g., '^' to '~'. But that should
122126
// be ok because we expect this to be very rare in practice.
123-
var toLower = Vector128.Create<uint>(0x0020_0020);
127+
var toLower = Vector128.Create<short>(0x0020).AsUInt64();
124128

125129
while (length >= sizeof(Vector128<ulong>) / sizeof(char) * 4) // 64 byte chunks.
126130
{
127131
length -= (sizeof(Vector128<ulong>) / sizeof(char)) * 4;
128132

129-
var v0 = Vector128.Load((ulong*)ptr).AsUInt32();
130-
if (!AllCharsInVector128AreAscii(v0))
133+
var v0 = Vector128.Load((ulong*)ptr);
134+
if (!AllCharsInVector128AreAscii(v0.AsUInt16()))
131135
goto NotAscii;
132136

133137
hash1_128 = Vector128.Xor(hash1_128, Vector128.BitwiseOr(v0, toLower));
134-
hash1_128 = Vector128.Multiply(hash1_128, prime);
138+
hash1_128 = HashMultiply128(hash1_128, prime);
135139

136-
v0 = Vector128.Load((ulong*)ptr + 2).AsUInt32();
137-
if (!AllCharsInVector128AreAscii(v0))
140+
v0 = Vector128.Load((ulong*)ptr + 2);
141+
if (!AllCharsInVector128AreAscii(v0.AsUInt16()))
138142
goto NotAscii;
139143

140144
hash2_128 = Vector128.Xor(hash2_128, Vector128.BitwiseOr(v0, toLower));
141-
hash2_128 = Vector128.Multiply(hash2_128, prime);
145+
hash2_128 = HashMultiply128(hash2_128, prime);
142146

143-
v0 = Vector128.Load((ulong*)ptr + 4).AsUInt32();
144-
if (!AllCharsInVector128AreAscii(v0))
147+
v0 = Vector128.Load((ulong*)ptr + 4);
148+
if (!AllCharsInVector128AreAscii(v0.AsUInt16()))
145149
goto NotAscii;
146150

147151
hash1_128 = Vector128.Xor(hash1_128, Vector128.BitwiseOr(v0, toLower));
148-
hash1_128 = Vector128.Multiply(hash1_128, prime);
152+
hash1_128 = HashMultiply128(hash1_128, prime);
149153

150-
v0 = Vector128.Load((ulong*)ptr + 6).AsUInt32();
151-
if (!AllCharsInVector128AreAscii(v0))
154+
v0 = Vector128.Load((ulong*)ptr + 6);
155+
if (!AllCharsInVector128AreAscii(v0.AsUInt16()))
152156
goto NotAscii;
153157

154158
hash2_128 = Vector128.Xor(hash2_128, Vector128.BitwiseOr(v0, toLower));
155-
hash2_128 = Vector128.Multiply(hash2_128, prime);
159+
hash2_128 = HashMultiply128(hash2_128, prime);
156160
ptr += (sizeof(Vector128<ulong>) / sizeof(nuint)) * 4;
157161
}
158162

159163
while (length >= sizeof(Vector128<ulong>) / sizeof(char)) // 16 byte chunks.
160164
{
161165
length -= sizeof(Vector128<ulong>) / sizeof(char);
162166

163-
var v0 = Vector128.Load((ulong*)ptr).AsUInt32();
164-
if (!AllCharsInVector128AreAscii(v0))
167+
var v0 = Vector128.Load((ulong*)ptr);
168+
if (!AllCharsInVector128AreAscii(v0.AsUInt16()))
165169
goto NotAscii;
166170

167171
hash1_128 = Vector128.Xor(hash1_128, Vector128.BitwiseOr(v0, toLower));
168-
hash1_128 = Vector128.Multiply(hash1_128, prime);
172+
hash1_128 = HashMultiply128(hash1_128, prime);
169173
ptr += (sizeof(Vector128<ulong>) / sizeof(nuint));
170174
}
171175

@@ -224,6 +228,9 @@ internal static unsafe UIntPtr UnstableHashVec128Lower(this ReadOnlySpan<char> t
224228
return GetHashCodeUnstableLowerSlow(text);
225229
}
226230

231+
#if NET8_0 // Bug in .NET 8 seems to cause this to not re-jit to tier1 till like 200k calls on Linux x64
232+
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
233+
#endif
227234
internal static unsafe UIntPtr UnstableHashVec256Lower(this ReadOnlySpan<char> text)
228235
{
229236
fixed (char* src = &text.GetPinnableReference())
@@ -233,59 +240,59 @@ internal static unsafe UIntPtr UnstableHashVec256Lower(this ReadOnlySpan<char> t
233240
var hash2 = hash1;
234241
var ptr = (nuint*)(src);
235242

236-
var prime = Vector256.Create((uint)0x01000193);
237-
var hash1_256 = Vector256.Create(0x811c9dc5);
238-
var hash2_256 = Vector256.Create(0x811c9dc5);
243+
var prime = Vector256.Create((ulong)0x100000001b3);
244+
var hash1_256 = Vector256.Create(0xcbf29ce484222325);
245+
var hash2_256 = Vector256.Create(0xcbf29ce484222325);
239246

240247
// We "normalize to lowercase" every char by ORing with 0x0020. This casts
241248
// a very wide net because it will change, e.g., '^' to '~'. But that should
242249
// be ok because we expect this to be very rare in practice.
243-
var toLower = Vector256.Create<uint>(0x0020_0020);
250+
var toLower = Vector256.Create<short>(0x0020).AsUInt64();
244251

245252
while (length >= sizeof(Vector256<ulong>) / sizeof(char) * 4) // 128 byte chunks.
246253
{
247254
length -= (sizeof(Vector256<ulong>) / sizeof(char)) * 4;
248255

249-
var v0 = Vector256.Load((ulong*)ptr).AsUInt32();
250-
if (!AllCharsInVector256AreAscii(v0))
256+
var v0 = Vector256.Load((ulong*)ptr);
257+
if (!AllCharsInVector256AreAscii(v0.AsUInt16()))
251258
goto NotAscii;
252259

253260
hash1_256 = Vector256.Xor(hash1_256, Vector256.BitwiseOr(v0, toLower));
254-
hash1_256 = Vector256.Multiply(hash1_256.AsUInt32(), prime.AsUInt32());
261+
hash1_256 = HashMultiply256(hash1_256, prime);
255262

256-
v0 = Vector256.Load((ulong*)ptr + 4).AsUInt32();
257-
if (!AllCharsInVector256AreAscii(v0))
263+
v0 = Vector256.Load((ulong*)ptr + 4);
264+
if (!AllCharsInVector256AreAscii(v0.AsUInt16()))
258265
goto NotAscii;
259266

260267
hash2_256 = Vector256.Xor(hash2_256, Vector256.BitwiseOr(v0, toLower));
261-
hash2_256 = Vector256.Multiply(hash2_256.AsUInt32(), prime.AsUInt32());
268+
hash2_256 = HashMultiply256(hash2_256, prime);
262269

263-
v0 = Vector256.Load((ulong*)ptr + 8).AsUInt32();
264-
if (!AllCharsInVector256AreAscii(v0))
270+
v0 = Vector256.Load((ulong*)ptr + 8);
271+
if (!AllCharsInVector256AreAscii(v0.AsUInt16()))
265272
goto NotAscii;
266273

267274
hash1_256 = Vector256.Xor(hash1_256, Vector256.BitwiseOr(v0, toLower));
268-
hash1_256 = Vector256.Multiply(hash1_256.AsUInt32(), prime.AsUInt32());
275+
hash1_256 = HashMultiply256(hash1_256, prime);
269276

270-
v0 = Vector256.Load((ulong*)ptr + 12).AsUInt32();
271-
if (!AllCharsInVector256AreAscii(v0))
277+
v0 = Vector256.Load((ulong*)ptr + 12);
278+
if (!AllCharsInVector256AreAscii(v0.AsUInt16()))
272279
goto NotAscii;
273280

274281
hash2_256 = Vector256.Xor(hash2_256, Vector256.BitwiseOr(v0, toLower));
275-
hash2_256 = Vector256.Multiply(hash2_256.AsUInt32(), prime.AsUInt32());
282+
hash2_256 = HashMultiply256(hash2_256, prime);
276283
ptr += (sizeof(Vector256<ulong>) / sizeof(nuint)) * 4;
277284
}
278285

279286
while (length >= sizeof(Vector256<ulong>) / sizeof(char)) // 32 byte chunks.
280287
{
281288
length -= sizeof(Vector256<ulong>) / sizeof(char);
282289

283-
var v0 = Vector256.Load((ulong*)ptr).AsUInt32();
284-
if (!AllCharsInVector256AreAscii(v0))
290+
var v0 = Vector256.Load((ulong*)ptr);
291+
if (!AllCharsInVector256AreAscii(v0.AsUInt16()))
285292
goto NotAscii;
286293

287294
hash1_256 = Vector256.Xor(hash1_256, Vector256.BitwiseOr(v0, toLower));
288-
hash1_256 = Vector256.Multiply(hash1_256.AsUInt32(), prime.AsUInt32());
295+
hash1_256 = HashMultiply256(hash1_256, prime);
289296
ptr += (sizeof(Vector256<ulong>) / sizeof(nuint));
290297
}
291298

@@ -347,7 +354,7 @@ internal static unsafe UIntPtr UnstableHashVec256Lower(this ReadOnlySpan<char> t
347354
NotAscii:
348355
return GetHashCodeUnstableLowerSlow(text);
349356
}
350-
#endif
357+
#endif
351358

352359
internal static unsafe nuint UnstableHashNonVectorLower(this ReadOnlySpan<char> text)
353360
{

0 commit comments

Comments
 (0)