Skip to content

Commit 9cc5e90

Browse files
shawngu-quiclhez
authored andcommitted
fix precision issue
1 parent ae9d5eb commit 9cc5e90

1 file changed

Lines changed: 7 additions & 1 deletion

File tree

ggml/src/ggml-opencl/kernels/gemm_moe_mxfp4_f32_ns.cl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,12 @@ static inline half e8m0_to_fp16(uchar x) {
140140
return as_half(bits);
141141
}
142142

143+
static inline float e8m0_to_fp32(uchar x) {
144+
int bits;
145+
bits = (x == 0) ? 0x00400000 : ((uint) x << 23);
146+
return as_float(bits);
147+
}
148+
143149

144150
__attribute__((qcom_wave_pair_mode(1))) // 1=force single 2=force pair
145151
kernel void kernel_gemm_moe_mxfp4_f32_ns(
@@ -187,7 +193,7 @@ kernel void kernel_gemm_moe_mxfp4_f32_ns(
187193

188194
// Load scale for current mxfp4 block
189195
uint s_offset = s_sub_offset + get_global_id(0);
190-
half s = e8m0_to_fp16(src0_d[s_offset]);
196+
float s = e8m0_to_fp32(src0_d[s_offset]);
191197

192198
// Load 16 fp4 (64-bits) in transposed layout
193199
uint2 mxfp4x16;

0 commit comments

Comments
 (0)