We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ae9d5eb commit 9cc5e90Copy full SHA for 9cc5e90
1 file changed
ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl
@@ -140,6 +140,12 @@ static inline half e8m0_to_fp16(uchar x) {
140
return as_half(bits);
141
}
142
143
+static inline float e8m0_to_fp32(uchar x) {
144
+ int bits;
145
+ bits = (x == 0) ? 0x00400000 : ((uint) x << 23);
146
+ return as_float(bits);
147
+}
148
+
149
150
__attribute__((qcom_wave_pair_mode(1))) // 1=force single 2=force pair
151
kernel void kernel_gemm_moe_mxfp4_f32_ns(
@@ -187,7 +193,7 @@ kernel void kernel_gemm_moe_mxfp4_f32_ns(
187
193
188
194
// Load scale for current mxfp4 block
189
195
uint s_offset = s_sub_offset + get_global_id(0);
190
- half s = e8m0_to_fp16(src0_d[s_offset]);
196
+ float s = e8m0_to_fp32(src0_d[s_offset]);
191
197
192
198
// Load 16 fp4 (64-bits) in transposed layout
199
uint2 mxfp4x16;
0 commit comments