|
1 | | -#![allow(clippy::needless_range_loop)] |
2 | | -use crate::table::TABLE; |
3 | | - |
4 | 1 | pub(crate) const COLS: usize = 8; |
5 | | -const ROUNDS: u64 = 10; |
6 | 2 |
|
7 | | -#[inline(always)] |
8 | | -fn column(x: &[u64; COLS], c: [usize; 8]) -> u64 { |
9 | | - let mut t = 0; |
10 | | - for i in 0..8 { |
11 | | - let sl = 8 * (7 - i); |
12 | | - let idx = ((x[c[i]] >> sl) & 0xFF) as usize; |
13 | | - t ^= TABLE[i][idx]; |
14 | | - } |
15 | | - t |
16 | | -} |
| 3 | +mod soft; |
17 | 4 |
|
18 | | -#[inline(always)] |
19 | | -fn rndq(mut x: [u64; COLS], r: u64) -> [u64; COLS] { |
20 | | - for i in 0..COLS { |
21 | | - x[i] ^= u64::MAX.wrapping_sub((i as u64) << 4) ^ r; |
22 | | - } |
23 | | - [ |
24 | | - column(&x, [1, 3, 5, 7, 0, 2, 4, 6]), |
25 | | - column(&x, [2, 4, 6, 0, 1, 3, 5, 7]), |
26 | | - column(&x, [3, 5, 7, 1, 2, 4, 6, 0]), |
27 | | - column(&x, [4, 6, 0, 2, 3, 5, 7, 1]), |
28 | | - column(&x, [5, 7, 1, 3, 4, 6, 0, 2]), |
29 | | - column(&x, [6, 0, 2, 4, 5, 7, 1, 3]), |
30 | | - column(&x, [7, 1, 3, 5, 6, 0, 2, 4]), |
31 | | - column(&x, [0, 2, 4, 6, 7, 1, 3, 5]), |
32 | | - ] |
33 | | -} |
| 5 | +cfg_if::cfg_if! { |
| 6 | + if #[cfg(any(not(any(target_arch = "x86_64", target_arch = "x86")), groestl_force_soft))] { |
| 7 | + pub(crate) use soft::*; |
| 8 | + } else { |
| 9 | + mod avx512_gfni; |
34 | 10 |
|
35 | | -#[inline(always)] |
36 | | -fn rndp(mut x: [u64; COLS], r: u64) -> [u64; COLS] { |
37 | | - for i in 0..COLS { |
38 | | - x[i] ^= ((i as u64) << 60) ^ r; |
39 | | - } |
40 | | - [ |
41 | | - column(&x, [0, 1, 2, 3, 4, 5, 6, 7]), |
42 | | - column(&x, [1, 2, 3, 4, 5, 6, 7, 0]), |
43 | | - column(&x, [2, 3, 4, 5, 6, 7, 0, 1]), |
44 | | - column(&x, [3, 4, 5, 6, 7, 0, 1, 2]), |
45 | | - column(&x, [4, 5, 6, 7, 0, 1, 2, 3]), |
46 | | - column(&x, [5, 6, 7, 0, 1, 2, 3, 4]), |
47 | | - column(&x, [6, 7, 0, 1, 2, 3, 4, 5]), |
48 | | - column(&x, [7, 0, 1, 2, 3, 4, 5, 6]), |
49 | | - ] |
50 | | -} |
| 11 | + cpufeatures::new!(cpuid_avx512_gfni, "avx", "avx512f", "avx512vbmi", "gfni"); |
51 | 12 |
|
52 | | -pub(crate) fn compress(h: &mut [u64; COLS], block: &[u8; 64]) { |
53 | | - let mut q = [0u64; COLS]; |
54 | | - for (chunk, v) in block.chunks_exact(8).zip(q.iter_mut()) { |
55 | | - *v = u64::from_be_bytes(chunk.try_into().unwrap()); |
56 | | - } |
57 | | - let mut p = [0u64; COLS]; |
58 | | - for i in 0..COLS { |
59 | | - p[i] = h[i] ^ q[i]; |
60 | | - } |
61 | | - for i in 0..ROUNDS { |
62 | | - q = rndq(q, i); |
63 | | - } |
64 | | - for i in 0..ROUNDS { |
65 | | - p = rndp(p, i << 56); |
66 | | - } |
67 | | - for i in 0..COLS { |
68 | | - h[i] ^= q[i] ^ p[i]; |
69 | | - } |
70 | | -} |
| 13 | + #[inline(always)] |
| 14 | + pub(crate) fn compress(h: &mut [u64; COLS], blocks: &[[u8; 64]]) { |
| 15 | + if cpuid_avx512_gfni::get() { |
| 16 | + #[allow(unsafe_code)] |
| 17 | + unsafe { avx512_gfni::compress(h, blocks); } |
| 18 | + } else { |
| 19 | + soft::compress(h, blocks); |
| 20 | + } |
| 21 | + } |
71 | 22 |
|
72 | | -pub(crate) fn p(h: &[u64; COLS]) -> [u64; COLS] { |
73 | | - let mut p = *h; |
74 | | - for i in 0..ROUNDS { |
75 | | - p = rndp(p, i << 56); |
76 | | - } |
77 | | - for i in 0..COLS { |
78 | | - p[i] ^= h[i]; |
| 23 | + #[inline(always)] |
| 24 | + pub(crate) fn p(h: &[u64; COLS]) -> [u64; COLS] { |
| 25 | + if cpuid_avx512_gfni::get() { |
| 26 | + #[allow(unsafe_code)] |
| 27 | + unsafe { avx512_gfni::p(h) } |
| 28 | + } else { |
| 29 | + soft::p(h) |
| 30 | + } |
| 31 | + } |
79 | 32 | } |
80 | | - p |
81 | 33 | } |
0 commit comments