@@ -156,15 +156,15 @@ inline __m512 atan2Approximation_AVX512_32f_16Elements(__m512 y_ps, __m512 x_ps)
156156 const __m512 oneqtr_pi_ps = _mm512_set1_ps (ONEQTR_PI);
157157 const __m512 thrqtr_pi_ps = _mm512_set1_ps (THRQTR_PI);
158158
159- __m512 abs_y_ps = _mm512_add_ps ( _mm512_abs_ph (y_ps), _mm512_set1_ps (1e-10f ));
159+ __m512 abs_y_ps = _mm512_add_ps ( _mm512_abs_ps (y_ps), _mm512_set1_ps (1e-10f ));
160160
161161 __m512 r_cond1_ps = _mm512_div_ps ( _mm512_add_ps (x_ps, abs_y_ps), _mm512_sub_ps (abs_y_ps, x_ps));
162162 __m512 r_cond2_ps = _mm512_div_ps ( _mm512_sub_ps (x_ps, abs_y_ps), _mm512_add_ps (x_ps, abs_y_ps));
163163 __mmask16 r_cmp_m8 = _mm512_cmp_ps_mask (x_ps, zero_ps, _CMP_LT_OQ);
164164 __m512 r_ps = _mm512_mask_blend_ps ( r_cmp_m8, r_cond2_ps, r_cond1_ps);
165165 __m512 angle_ps = _mm512_mask_blend_ps ( r_cmp_m8, oneqtr_pi_ps, thrqtr_pi_ps);
166166
167- angle_ps = _mm512_fmadd_ps (_mm512_fmadd_ps (_mm512_mul_ph (_mm512_set1_ps (0 .1963f ), r_ps),
167+ angle_ps = _mm512_fmadd_ps (_mm512_fmadd_ps (_mm512_mul_ps (_mm512_set1_ps (0 .1963f ), r_ps),
168168 r_ps, _mm512_set1_ps (-0 .9817f )),
169169 r_ps, angle_ps);
170170
@@ -188,9 +188,9 @@ void GetHashValue_AVX512_32f_16Elements(float GTWG[3][16], int passIdx, int32_t
188188 const int cmp_le = _CMP_LE_OQ;
189189 const int cmp_gt = _CMP_GT_OQ;
190190
191- __m512 m_a_ps = _mm512_load_ph ( GTWG[0 ]);
192- __m512 m_b_ps = _mm512_load_ph ( GTWG[1 ]);
193- __m512 m_d_ps = _mm512_load_ph ( GTWG[2 ]);
191+ __m512 m_a_ps = _mm512_load_ps ( GTWG[0 ]);
192+ __m512 m_b_ps = _mm512_load_ps ( GTWG[1 ]);
193+ __m512 m_d_ps = _mm512_load_ps ( GTWG[2 ]);
194194
195195 __m512 T_ps = _mm512_add_ps (m_a_ps, m_d_ps);
196196 __m512 D_ps = _mm512_sub_ps ( _mm512_mul_ps ( m_a_ps, m_d_ps),
@@ -243,7 +243,7 @@ void GetHashValue_AVX512_32f_16Elements(float GTWG[3][16], int passIdx, int32_t
243243 __m512 gQCoh1_ps = _mm512_set1_ps (gQCoh_data [0 ]);
244244 __m512 gQCoh2_ps = _mm512_set1_ps (gQCoh_data [1 ]);
245245
246- __m512i strengthIdx_epi32 = _mm512_mask_blend_epi32 (_mm512_cmp_ph_mask (gQStr1_ps , strength_ps, _MM_CMPINT_LE),
246+ __m512i strengthIdx_epi32 = _mm512_mask_blend_epi32 (_mm512_cmp_ps_mask (gQStr1_ps , strength_ps, _MM_CMPINT_LE),
247247 zero_epi32,
248248 _mm512_mask_blend_epi32 (_mm512_cmp_ps_mask (gQStr2_ps , strength_ps, _MM_CMPINT_LE),
249249 two_epi32,
0 commit comments