11/* SPDX-License-Identifier: GPL-2.0 */
22
3+ #include <linux/stringify.h>
34#include <linux/linkage.h>
45#include <asm/alternative.h>
56#include <asm/fpu-insn.h>
1213#define COPY1 %v5
1314#define COPY2 %v6
1415#define COPY3 %v7
15- #define PERM4 %v16
16- #define PERM8 %v17
17- #define PERM12 %v18
1816#define BEPERM %v19
1917#define TMP0 %v20
2018#define TMP1 %v21
2321
2422 .section .rodata
2523
26- .balign 128
24+ .balign 32
2725.Lconstants:
2826 .long 0x61707865 ,0x3320646e ,0x79622d32 ,0x6b206574 # endian-neutral
29- .long 0x04050607 ,0x08090a0b ,0x0c0d0e0f ,0x00010203 # rotl 4 bytes
30- .long 0x08090a0b ,0x0c0d0e0f ,0x00010203 ,0x04050607 # rotl 8 bytes
31- .long 0x0c0d0e0f ,0x00010203 ,0x04050607 ,0x08090a0b # rotl 12 bytes
3227 .long 0x03020100 ,0x07060504 ,0x0b0a0908 ,0x0f0e0d0c # byte swap
3328
3429 .text
@@ -48,8 +43,8 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
4843 /* COPY0 = "expand 32-byte k" */
4944 VL COPY0,0 ,,%r1
5045
51- /* PERM4-PERM12, BEPERM = byte selectors for VPERM */
52- VLM PERM4, BEPERM,16 ,%r1
46+ /* BEPERM = byte selectors for VPERM */
47+ ALTERNATIVE __stringify(VL BEPERM,16 ,, %r1), "brcl 0,0" , ALT_FACILITY( 148 )
5348
5449 /* COPY1,COPY2 = key */
5550 VLM COPY1,COPY2,0 ,%r3
@@ -89,11 +84,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
8984 VERLLF STATE1,STATE1,7
9085
9186 /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
92- VPERM STATE1,STATE1,STATE1,PERM4
87+ VSLDB STATE1,STATE1,STATE1,4
9388 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
94- VPERM STATE2,STATE2,STATE2,PERM8
89+ VSLDB STATE2,STATE2,STATE2,8
9590 /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
96- VPERM STATE3,STATE3,STATE3,PERM12
91+ VSLDB STATE3,STATE3,STATE3,12
9792
9893 /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
9994 VAF STATE0,STATE0,STATE1
@@ -116,11 +111,11 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
116111 VERLLF STATE1,STATE1,7
117112
118113 /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
119- VPERM STATE1,STATE1,STATE1,PERM12
114+ VSLDB STATE1,STATE1,STATE1,12
120115 /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
121- VPERM STATE2,STATE2,STATE2,PERM8
116+ VSLDB STATE2,STATE2,STATE2,8
122117 /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
123- VPERM STATE3,STATE3,STATE3,PERM4
118+ VSLDB STATE3,STATE3,STATE3,4
124119 brctg %r0,.Ldoubleround
125120
126121 /* OUTPUT0 = STATE0 + STATE0 */
0 commit comments