Skip to content

Commit 67abbcc

Browse files
cabirdmexiaoxial
authored andcommitted
bug fix
1 parent 6968098 commit 67abbcc

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

Library/Raisr_AVX512.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,15 @@ inline __m512 atan2Approximation_AVX512_32f_16Elements(__m512 y_ps, __m512 x_ps)
156156
const __m512 oneqtr_pi_ps = _mm512_set1_ps(ONEQTR_PI);
157157
const __m512 thrqtr_pi_ps = _mm512_set1_ps(THRQTR_PI);
158158

159-
__m512 abs_y_ps = _mm512_add_ps( _mm512_abs_ph(y_ps), _mm512_set1_ps(1e-10f));
159+
__m512 abs_y_ps = _mm512_add_ps( _mm512_abs_ps(y_ps), _mm512_set1_ps(1e-10f));
160160

161161
__m512 r_cond1_ps = _mm512_div_ps( _mm512_add_ps(x_ps, abs_y_ps), _mm512_sub_ps(abs_y_ps, x_ps));
162162
__m512 r_cond2_ps = _mm512_div_ps( _mm512_sub_ps(x_ps, abs_y_ps), _mm512_add_ps(x_ps, abs_y_ps));
163163
__mmask16 r_cmp_m8 = _mm512_cmp_ps_mask(x_ps, zero_ps, _CMP_LT_OQ);
164164
__m512 r_ps = _mm512_mask_blend_ps( r_cmp_m8, r_cond2_ps, r_cond1_ps);
165165
__m512 angle_ps = _mm512_mask_blend_ps( r_cmp_m8, oneqtr_pi_ps, thrqtr_pi_ps);
166166

167-
angle_ps = _mm512_fmadd_ps(_mm512_fmadd_ps(_mm512_mul_ph(_mm512_set1_ps(0.1963f), r_ps),
167+
angle_ps = _mm512_fmadd_ps(_mm512_fmadd_ps(_mm512_mul_ps(_mm512_set1_ps(0.1963f), r_ps),
168168
r_ps, _mm512_set1_ps(-0.9817f)),
169169
r_ps, angle_ps);
170170

@@ -188,9 +188,9 @@ void GetHashValue_AVX512_32f_16Elements(float GTWG[3][16], int passIdx, int32_t
188188
const int cmp_le = _CMP_LE_OQ;
189189
const int cmp_gt = _CMP_GT_OQ;
190190

191-
__m512 m_a_ps = _mm512_load_ph( GTWG[0]);
192-
__m512 m_b_ps = _mm512_load_ph( GTWG[1]);
193-
__m512 m_d_ps = _mm512_load_ph( GTWG[2]);
191+
__m512 m_a_ps = _mm512_load_ps( GTWG[0]);
192+
__m512 m_b_ps = _mm512_load_ps( GTWG[1]);
193+
__m512 m_d_ps = _mm512_load_ps( GTWG[2]);
194194

195195
__m512 T_ps = _mm512_add_ps(m_a_ps, m_d_ps);
196196
__m512 D_ps = _mm512_sub_ps( _mm512_mul_ps( m_a_ps, m_d_ps),
@@ -243,7 +243,7 @@ void GetHashValue_AVX512_32f_16Elements(float GTWG[3][16], int passIdx, int32_t
243243
__m512 gQCoh1_ps = _mm512_set1_ps(gQCoh_data[0]);
244244
__m512 gQCoh2_ps = _mm512_set1_ps(gQCoh_data[1]);
245245

246-
__m512i strengthIdx_epi32 = _mm512_mask_blend_epi32(_mm512_cmp_ph_mask(gQStr1_ps, strength_ps, _MM_CMPINT_LE),
246+
__m512i strengthIdx_epi32 = _mm512_mask_blend_epi32(_mm512_cmp_ps_mask(gQStr1_ps, strength_ps, _MM_CMPINT_LE),
247247
zero_epi32,
248248
_mm512_mask_blend_epi32(_mm512_cmp_ps_mask(gQStr2_ps, strength_ps, _MM_CMPINT_LE),
249249
two_epi32,

0 commit comments

Comments
 (0)