@@ -527,8 +527,8 @@ static void CTCountOfBitsChanged_AVX2(float *LRImage, float *HRImage, float *out
527527 int highbit = 0x80000000 ;
528528 const __m256i highbit_epi32 = _mm256_setr_epi32 (highbit, highbit, highbit, highbit, highbit, highbit, highbit, highbit);
529529
530- __m256i cmp_lr_epi32 = compare3x3_ps (row_lr_f, center_lr_f, highbit_epi32);
531- __m256i cmp_hr_epi32 = compare3x3_ps (row_hr_f, center_hr_f, highbit_epi32);
530+ __m256i cmp_lr_epi32 = compare3x3_AVX256_32f (row_lr_f, center_lr_f, highbit_epi32);
531+ __m256i cmp_hr_epi32 = compare3x3_AVX256_32f (row_hr_f, center_hr_f, highbit_epi32);
532532
533533 // hammingDistance = abs( lr_cmp - hr_cmp )
534534 __m256i cmp_epi32 = _mm256_abs_epi32 (_mm256_sub_epi32 (cmp_lr_epi32, cmp_hr_epi32));
@@ -563,8 +563,8 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
563563 int highbit = 0x80000000 ;
564564 const __m256i highbit_epi32 = _mm256_setr_epi32 (highbit, highbit, highbit, highbit, highbit, highbit, highbit, highbit);
565565
566- __m256i cmp_lr_epi32 = compare3x3_ps (row_lr_f, center_lr_f, highbit_epi32);
567- __m256i cmp_hr_epi32 = compare3x3_ps (row_hr_f, center_hr_f, highbit_epi32);
566+ __m256i cmp_lr_epi32 = compare3x3_AVX256_32f (row_lr_f, center_lr_f, highbit_epi32);
567+ __m256i cmp_hr_epi32 = compare3x3_AVX256_32f (row_hr_f, center_hr_f, highbit_epi32);
568568
569569 // hammingDistance = abs( lr_cmp - hr_cmp )
570570 __m256i cmp_epi32 = _mm256_abs_epi32 (_mm256_sub_epi32 (cmp_lr_epi32, cmp_hr_epi32));
@@ -1184,7 +1184,16 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11841184 // CT-Blending, CTRandomness
11851185 if (blendingMode == Randomness)
11861186 {
1187- census = CTRandomness_AVX512_32f (pSeg32f, cols, rOffset, c, pix);
1187+ if (gAsmType == AVX2)
1188+ census = CTRandomness_AVX256_32f (pSeg32f, cols, rOffset, c, pix);
1189+ else if (gAsmType == AVX512)
1190+ census = CTRandomness_AVX512_32f (pSeg32f, cols, rOffset, c, pix);
1191+ else
1192+ {
1193+ std::cout << " expected avx512 or avx2, but got " << gAsmType << std::endl;
1194+ return RNLErrorBadParameter;
1195+ }
1196+
11881197 float weight = (float )census / (float )CTnumberofPixel;
11891198 // position in the whole image: r * cols + c + pix
11901199 float val = weight * curPix + (1 - weight) * pSeg32f[rOffset * cols + c + pix];
0 commit comments