Skip to content

Commit 7b4a3e1

Browse files
s390x: Emit instructions for bitwise FP ops (#12232)
* s390x: Emit instructions for bitwise FP ops cranelift requires that bitwise operations work across all data types, including floating point. The prior implementation of bitwise operations, xor in this example, would cause a panic with the message: no rule matched for term aluop_xor This patch adds lowerings for bitwise operations on floating point registers using the vector instructions and the vector register overlay property of the s390x register file. * Add test for bitops on fp pulley and aarch64 omitted as they currently fail this test
1 parent 09b7422 commit 7b4a3e1

3 files changed

Lines changed: 159 additions & 0 deletions

File tree

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,10 @@
997997
(rule (lower (has_type (vr128_ty ty) (bnot x)))
998998
(vec_not ty x))
999999

1000+
;; Float version using vector NOR.
1001+
(rule 5 (lower (has_type (ty_scalar_float _) (bnot x)))
1002+
(vec_not $F64X2 x))
1003+
10001004
;; With z15 (bnot (bxor ...)) can be a single instruction, similar to the
10011005
;; (bxor _ (bnot _)) lowering.
10021006
(rule 3 (lower (has_type (and (mie3_enabled) (fits_in_64 ty)) (bnot (bxor x y))))
@@ -1033,6 +1037,10 @@
10331037
(rule 0 (lower (has_type (vr128_ty ty) (band x y)))
10341038
(vec_and ty x y))
10351039

1040+
;; And two float registers, using vector overlay.
1041+
(rule 11 (lower (has_type (ty_scalar_float _) (band x y)))
1042+
(vec_and $F64X2 x y))
1043+
10361044
;; Specialized lowerings for `(band x (bnot y))` which is additionally produced
10371045
;; by Cranelift's `band_not` instruction that is legalized into the simpler
10381046
;; forms early on.
@@ -1075,6 +1083,10 @@
10751083
(rule 0 (lower (has_type (vr128_ty ty) (bor x y)))
10761084
(vec_or ty x y))
10771085

1086+
;; Or two floating registers, using vector overlay
1087+
(rule 11 (lower (has_type (ty_scalar_float _) (bor x y)))
1088+
(vec_or $F64X2 x y))
1089+
10781090
;; Specialized lowerings for `(bor x (bnot y))` which is additionally produced
10791091
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
10801092
;; forms early on.
@@ -1114,6 +1126,10 @@
11141126
(rule 0 (lower (has_type (vr128_ty ty) (bxor x y)))
11151127
(vec_xor ty x y))
11161128

1129+
;; Xor two floating registers, using vector overlay
1130+
(rule 9 (lower (has_type (ty_scalar_float _) (bxor x y)))
1131+
(vec_xor $F64X2 x y))
1132+
11171133
;; Specialized lowerings for `(bxor x (bnot y))` which is additionally produced
11181134
;; by Cranelift's `bxor_not` instruction that is legalized into the simpler
11191135
;; forms early on.

cranelift/filetests/filetests/isa/s390x/bitwise.clif

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,3 +1076,67 @@ block0(v0: i32x4, v1: i32x4):
10761076
; vnx %v24, %v24, %v25
10771077
; br %r14
10781078

1079+
function %bnot_f64(f64) -> f64 {
1080+
block0(v0: f64):
1081+
v1 = bnot v0
1082+
return v1
1083+
}
1084+
1085+
; VCode:
1086+
; block0:
1087+
; vno %v0, %v0, %v0
1088+
; br %r14
1089+
;
1090+
; Disassembled:
1091+
; block0: ; offset 0x0
1092+
; vno %v0, %v0, %v0
1093+
; br %r14
1094+
1095+
function %band_f64(f64, f64) -> f64 {
1096+
block0(v0: f64, v1: f64):
1097+
v2 = band v0, v1
1098+
return v2
1099+
}
1100+
1101+
; VCode:
1102+
; block0:
1103+
; vn %v0, %v0, %v2
1104+
; br %r14
1105+
;
1106+
; Disassembled:
1107+
; block0: ; offset 0x0
1108+
; vn %v0, %v0, %v2
1109+
; br %r14
1110+
1111+
function %bor_f64(f64, f64) -> f64 {
1112+
block0(v0: f64, v1: f64):
1113+
v2 = bor v0, v1
1114+
return v2
1115+
}
1116+
1117+
; VCode:
1118+
; block0:
1119+
; vo %v0, %v0, %v2
1120+
; br %r14
1121+
;
1122+
; Disassembled:
1123+
; block0: ; offset 0x0
1124+
; vo %v0, %v0, %v2
1125+
; br %r14
1126+
1127+
function %bxor_f64(f64, f64) -> f64 {
1128+
block0(v0: f64, v1: f64):
1129+
v2 = bxor v0, v1
1130+
return v2
1131+
}
1132+
1133+
; VCode:
1134+
; block0:
1135+
; vx %v0, %v0, %v2
1136+
; br %r14
1137+
;
1138+
; Disassembled:
1139+
; block0: ; offset 0x0
1140+
; vx %v0, %v0, %v2
1141+
; br %r14
1142+
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
test interpret
2+
test run
3+
set opt_level=none
4+
target s390x
5+
target riscv64
6+
target riscv64 has_c has_zcb
7+
target s390x has_mie3
8+
target x86_64
9+
10+
set opt_level=speed
11+
target s390x
12+
target riscv64
13+
target riscv64 has_c has_zcb
14+
target s390x has_mie3
15+
target x86_64
16+
17+
function %test_bnot_f32(f32) -> f32 fast {
18+
block0(v0: f32):
19+
v2 = bnot v0
20+
return v2
21+
}
22+
23+
; run: %test_bnot_f32(0x1.0) == -0x1.fffffep1
24+
25+
function %test_bnot_f64(f64) -> f64 fast {
26+
block0(v0: f64):
27+
v2 = bnot v0
28+
return v2
29+
}
30+
31+
; run: %test_bnot_f64(0x1.0) == -0x1.fffffffffffffp1
32+
33+
function %test_band_f32(f32, f32) -> f32 fast {
34+
block0(v0: f32, v1: f32):
35+
v2 = band v0, v1
36+
return v2
37+
}
38+
39+
; run: %test_band_f32(0x1.ff, 0x1.0ff) == 0x1.0fp0
40+
41+
function %test_band_f64(f64, f64) -> f64 fast {
42+
block0(v0: f64, v1: f64):
43+
v2 = band v0, v1
44+
return v2
45+
}
46+
47+
; run: %test_band_f64(0x1.ff, 0x1.0ff) == 0x1.0fp0
48+
49+
function %test_bor_f32(f32, f32) -> f32 fast {
50+
block0(v0: f32, v1: f32):
51+
v2 = bor v0, v1
52+
return v2
53+
}
54+
55+
; run: %test_bor_f32(0x1.ff, 0x1.0ff) == 0x1.fffp0
56+
57+
function %test_bor_f64(f64, f64) -> f64 fast {
58+
block0(v0: f64, v1: f64):
59+
v2 = bor v0, v1
60+
return v2
61+
}
62+
63+
; run: %test_bor_f64(0x1.ff, 0x1.0ff) == 0x1.fffp0
64+
65+
function %test_bxor_f32(f32, f32) -> f32 fast {
66+
block0(v0: f32, v1: f32):
67+
v2 = bxor v0, v1
68+
return v2
69+
}
70+
71+
; run: %test_bxor_f32(0x1.ff, 0x1.0ff) == 0x0.f0fp-126
72+
73+
function %test_bxor_f64(f64, f64) -> f64 fast {
74+
block0(v0: f64, v1: f64):
75+
v2 = bxor v0, v1
76+
return v2
77+
}
78+
79+
; run: %test_bxor_f64(0x1.ff, 0x1.0ff) == 0x0.f0fp-1022

0 commit comments

Comments
 (0)