Skip to content

Commit 5afd0a4

Browse files
committed
Add a LSX implementation
1 parent a02d0ca commit 5afd0a4

9 files changed

Lines changed: 342 additions & 1 deletion

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ aarch64_neon = []
3838
aarch64_neon_prefetch = []
3939

4040
armv7_neon = []
41+
loongarch64_lsx = []
4142

4243
# make the portable SIMD public implementation available (experimental, nightly only)
4344
portable_public_imp = ["public_imp"]

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ This library has been thoroughly tested with sample data as well as fuzzing and
1818
* ARM64 (aarch64) SIMD is supported since Rust 1.61
1919
* WASM (wasm32) SIMD is supported
2020
* 🆕 armv7 NEON support with the `armv7_neon` feature on nightly Rust
21+
* 🆕 loongarch64 LSX support with the `loongarch64_lsx` feature on nightly Rust
2122
* x86-64: Up to 23 times faster than the std library on valid non-ASCII, up to four times faster on ASCII
2223
* aarch64: Up to eleven times faster than the std library on valid non-ASCII, up to four times faster on ASCII (Apple Silicon)
2324
* Faster than the original simdjson implementation
@@ -92,6 +93,10 @@ runtime using the `std::arch::is_arm_feature_detected!` macro unless the CPU tar
9293
`RUSTFLAGS="-C target-feature=+neon"`. Some targets such as `thumbv7neon-linux-androideabi` and `thumbv7neon-unknown-linux-gnueabihf`
9394
have NEON enabled by default.
9495

96+
### LoongArch64
97+
Requires a recent nightly Rust comipler. The `loongarch64_neon` feature needs to be enabled.
98+
The LSX implementation is selected at compile time if the `lsx` target feature is enabled.
99+
95100
### WASM32
96101
For wasm32 support, the implementation is selected at compile time based on the presence of the `simd128` target feature.
97102
Use `RUSTFLAGS="-C target-feature=+simd128"` to enable the WASM SIMD implementation. WASM, at

bench/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ simdutf8_wasmtime = ["wasmtime"]
2929
[dependencies]
3030
core_affinity = "0.8.1"
3131
criterion = "0.8.1"
32-
simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] }
32+
simdutf8 = { version = "*", path = "..", features = ["aarch64_neon", "loongarch64_lsx"] }
3333
simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true }
3434
# default is cranelift which is not as performant as the llvm backend
3535
wasmer = { version = "2.1", optional = true, default-features = false }

src/basic.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,24 @@ pub mod imp {
260260
}
261261
}
262262

263+
/// Includes the loongarch64 SIMD implementations.
264+
#[cfg(all(
265+
feature = "loongarch64_lsx",
266+
target_arch = "loongarch64",
267+
target_feature = "lsx"
268+
))]
269+
pub mod loongarch64 {
270+
/// Includes the LSX-based validation implementation for loongarch64 CPUs.
271+
///
272+
/// Using the provided functionality on CPUs which do not support LSX is undefined
273+
/// behavior and will very likely cause a crash.
274+
pub mod lsx {
275+
pub use crate::implementation::loongarch64::lsx::validate_utf8_basic as validate_utf8;
276+
pub use crate::implementation::loongarch64::lsx::ChunkedUtf8ValidatorImp;
277+
pub use crate::implementation::loongarch64::lsx::Utf8ValidatorImp;
278+
}
279+
}
280+
263281
/// Includes the wasm32 SIMD implementations.
264282
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
265283
pub mod wasm32 {

src/compat.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,22 @@ pub mod imp {
146146
}
147147
}
148148

149+
/// Includes the loongarch64 LSX SIMD implementations.
150+
#[cfg(all(
151+
feature = "loongarch64_lsx",
152+
target_arch = "loongarch64",
153+
target_feature = "lsx"
154+
))]
155+
pub mod loongarch64 {
156+
/// Includes the LSX-based validation implementationn for loongarch64 LSX-compatible CPUs.
157+
///
158+
/// Using the provided functionality on CPUs which do not support LSX is undefined
159+
/// behavior and will very likely cause a crash.
160+
pub mod lsx {
161+
pub use crate::implementations::loongarch64::lsx::validate_utf8_compat as validate_utf8;
162+
}
163+
}
164+
149165
/// Includes the wasm32 SIMD implementations.
150166
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
151167
pub mod wasm32 {
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
//! Contains the loongarch64 LSX UTF-8 validation implementation.
2+
3+
#[cfg(target_arch = "loongarch64")]
4+
use core::arch::loongarch64::{
5+
lsx_vand_v, lsx_vld, lsx_vldi, lsx_vmskltz_b, lsx_vmsknz_b, lsx_vor_v, lsx_vpickve2gr_w,
6+
lsx_vreplgr2vr_b, lsx_vshuf_b, lsx_vsrli_b, lsx_vssub_bu, lsx_vxor_v, m128i,
7+
};
8+
9+
use crate::implementation::helpers::Utf8CheckAlgorithm;
10+
11+
// LSX SIMD primitives
12+
13+
type SimdU8Value = crate::implementation::helpers::SimdU8Value<m128i>;
14+
15+
impl SimdU8Value {
16+
#[flexpect::e(clippy::too_many_arguments)]
17+
#[flexpect::e(clippy::cast_possible_wrap)]
18+
#[target_feature(enable = "lsx")]
19+
#[inline]
20+
unsafe fn from_32_cut_off_leading(
21+
_v0: u8,
22+
_v1: u8,
23+
_v2: u8,
24+
_v3: u8,
25+
_v4: u8,
26+
_v5: u8,
27+
_v6: u8,
28+
_v7: u8,
29+
_v8: u8,
30+
_v9: u8,
31+
_v10: u8,
32+
_v11: u8,
33+
_v12: u8,
34+
_v13: u8,
35+
_v14: u8,
36+
_v15: u8,
37+
v16: u8,
38+
v17: u8,
39+
v18: u8,
40+
v19: u8,
41+
v20: u8,
42+
v21: u8,
43+
v22: u8,
44+
v23: u8,
45+
v24: u8,
46+
v25: u8,
47+
v26: u8,
48+
v27: u8,
49+
v28: u8,
50+
v29: u8,
51+
v30: u8,
52+
v31: u8,
53+
) -> Self {
54+
let arr: [u8; 16] = [
55+
v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
56+
];
57+
Self::from(lsx_vld::<0>(arr.as_ptr().cast()))
58+
}
59+
60+
#[flexpect::e(clippy::too_many_arguments)]
61+
#[flexpect::e(clippy::cast_possible_wrap)]
62+
#[target_feature(enable = "lsx")]
63+
#[inline]
64+
unsafe fn repeat_16(
65+
v0: u8,
66+
v1: u8,
67+
v2: u8,
68+
v3: u8,
69+
v4: u8,
70+
v5: u8,
71+
v6: u8,
72+
v7: u8,
73+
v8: u8,
74+
v9: u8,
75+
v10: u8,
76+
v11: u8,
77+
v12: u8,
78+
v13: u8,
79+
v14: u8,
80+
v15: u8,
81+
) -> Self {
82+
let arr: [u8; 16] = [
83+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
84+
];
85+
Self::from(lsx_vld::<0>(arr.as_ptr().cast()))
86+
}
87+
88+
#[flexpect::e(clippy::cast_ptr_alignment)]
89+
#[target_feature(enable = "lsx")]
90+
#[inline]
91+
unsafe fn load_from(ptr: *const u8) -> Self {
92+
Self::from(lsx_vld::<0>(ptr.cast()))
93+
}
94+
95+
#[flexpect::e(clippy::too_many_arguments)]
96+
#[target_feature(enable = "lsx")]
97+
#[inline]
98+
unsafe fn lookup_16(
99+
self,
100+
v0: u8,
101+
v1: u8,
102+
v2: u8,
103+
v3: u8,
104+
v4: u8,
105+
v5: u8,
106+
v6: u8,
107+
v7: u8,
108+
v8: u8,
109+
v9: u8,
110+
v10: u8,
111+
v11: u8,
112+
v12: u8,
113+
v13: u8,
114+
v14: u8,
115+
v15: u8,
116+
) -> Self {
117+
let src = Self::repeat_16(
118+
v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
119+
)
120+
.0;
121+
122+
Self::from(lsx_vshuf_b(src, src, self.0))
123+
}
124+
125+
#[flexpect::e(clippy::cast_possible_wrap)]
126+
#[target_feature(enable = "lsx")]
127+
#[inline]
128+
unsafe fn splat(val: u8) -> Self {
129+
Self::from(lsx_vreplgr2vr_b(val as i32))
130+
}
131+
132+
#[target_feature(enable = "lsx")]
133+
#[inline]
134+
unsafe fn splat0() -> Self {
135+
Self::from(lsx_vldi::<0>())
136+
}
137+
138+
#[target_feature(enable = "lsx")]
139+
#[inline]
140+
unsafe fn or(self, b: Self) -> Self {
141+
Self::from(lsx_vor_v(self.0, b.0))
142+
}
143+
144+
#[target_feature(enable = "lsx")]
145+
#[inline]
146+
unsafe fn and(self, b: Self) -> Self {
147+
Self::from(lsx_vand_v(self.0, b.0))
148+
}
149+
150+
#[target_feature(enable = "lsx")]
151+
#[inline]
152+
unsafe fn xor(self, b: Self) -> Self {
153+
Self::from(lsx_vxor_v(self.0, b.0))
154+
}
155+
156+
#[target_feature(enable = "lsx")]
157+
#[inline]
158+
unsafe fn saturating_sub(self, b: Self) -> Self {
159+
Self::from(lsx_vssub_bu(self.0, b.0))
160+
}
161+
162+
// ugly but shr<N> requires const generics
163+
#[target_feature(enable = "lsx")]
164+
#[inline]
165+
unsafe fn shr4(self) -> Self {
166+
Self::from(lsx_vsrli_b::<4>(self.0))
167+
}
168+
169+
// ugly but prev<N> requires const generics
170+
#[target_feature(enable = "lsx")]
171+
#[inline]
172+
unsafe fn prev1(self, prev: Self) -> Self {
173+
let ctrl_arr: [u8; 16] = [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14];
174+
175+
Self::from(lsx_vshuf_b(
176+
prev.0,
177+
self.0,
178+
lsx_vld::<0>(ctrl_arr.as_ptr().cast()),
179+
))
180+
}
181+
182+
// ugly but prev<N> requires const generics
183+
#[target_feature(enable = "lsx")]
184+
#[inline]
185+
unsafe fn prev2(self, prev: Self) -> Self {
186+
let ctrl_arr: [u8; 16] = [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
187+
188+
Self::from(lsx_vshuf_b(
189+
prev.0,
190+
self.0,
191+
lsx_vld::<0>(ctrl_arr.as_ptr().cast()),
192+
))
193+
}
194+
195+
// ugly but prev<N> requires const generics
196+
#[target_feature(enable = "lsx")]
197+
#[inline]
198+
unsafe fn prev3(self, prev: Self) -> Self {
199+
let ctrl_arr: [u8; 16] = [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
200+
201+
Self::from(lsx_vshuf_b(
202+
prev.0,
203+
self.0,
204+
lsx_vld::<0>(ctrl_arr.as_ptr().cast()),
205+
))
206+
}
207+
208+
#[target_feature(enable = "lsx")]
209+
#[inline]
210+
unsafe fn any_bit_set(self) -> bool {
211+
lsx_vpickve2gr_w::<0>(lsx_vmsknz_b(self.0)) != 0
212+
}
213+
214+
#[target_feature(enable = "lsx")]
215+
#[inline]
216+
unsafe fn is_ascii(self) -> bool {
217+
lsx_vpickve2gr_w::<0>(lsx_vmskltz_b(self.0)) == 0
218+
}
219+
}
220+
221+
impl From<m128i> for SimdU8Value {
222+
#[inline]
223+
fn from(val: m128i) -> Self {
224+
Self(val)
225+
}
226+
}
227+
228+
impl Utf8CheckAlgorithm<SimdU8Value> {
229+
#[target_feature(enable = "lsx")]
230+
#[inline]
231+
unsafe fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value {
232+
let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0xe0 - 0x80));
233+
let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0xf0 - 0x80));
234+
is_third_byte.or(is_fourth_byte)
235+
}
236+
}
237+
238+
#[inline]
239+
unsafe fn simd_prefetch(_ptr: *const u8) {}
240+
241+
const PREFETCH: bool = false;
242+
use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk;
243+
simd_input_128_bit!(#[target_feature(enable = "lsx")]);
244+
algorithm_simd!(#[target_feature(enable = "lsx")]);
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#[cfg(all(feature = "loongarch64_lsx", target_feature = "lsx"))]
2+
pub(crate) mod lsx;
3+
4+
#[inline]
5+
#[cfg(all(feature = "loongarch64_lsx", target_feature = "lsx"))]
6+
pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
7+
if input.len() < super::helpers::SIMD_CHUNK_SIZE {
8+
return super::validate_utf8_basic_fallback(input);
9+
}
10+
11+
validate_utf8_basic_lsx(input)
12+
}
13+
14+
#[inline(never)]
15+
#[cfg(all(feature = "loongarch64_lsx", target_feature = "lsx"))]
16+
unsafe fn validate_utf8_basic_lsx(input: &[u8]) -> Result<(), crate::basic::Utf8Error> {
17+
lsx::validate_utf8_basic(input)
18+
}
19+
20+
#[cfg(not(all(feature = "loongarch64_lsx", target_feature = "lsx")))]
21+
pub(crate) use super::validate_utf8_basic_fallback as validate_utf8_basic;
22+
23+
#[inline]
24+
#[cfg(all(feature = "loongarch64_lsx", target_feature = "lsx"))]
25+
pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
26+
if input.len() < super::helpers::SIMD_CHUNK_SIZE {
27+
return super::validate_utf8_compat_fallback(input);
28+
}
29+
30+
validate_utf8_compat_lsx(input)
31+
}
32+
33+
#[inline(never)]
34+
#[cfg(all(feature = "loongarch64_lsx", target_feature = "lsx"))]
35+
unsafe fn validate_utf8_compat_lsx(input: &[u8]) -> Result<(), crate::compat::Utf8Error> {
36+
lsx::validate_utf8_compat(input)
37+
}
38+
39+
#[cfg(not(all(feature = "loongarch64_lsx", target_feature = "lsx")))]
40+
pub(crate) use super::validate_utf8_compat_fallback as validate_utf8_compat;

0 commit comments

Comments
 (0)