Skip to content

Commit ea6e83b

Browse files
cabirdmexiaoxial
authored andcommitted
added CTRandomness AVX2
1 parent fa902ae commit ea6e83b

3 files changed

Lines changed: 17 additions & 8 deletions

File tree

Library/Raisr.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -527,8 +527,8 @@ static void CTCountOfBitsChanged_AVX2(float *LRImage, float *HRImage, float *out
527527
int highbit = 0x80000000;
528528
const __m256i highbit_epi32 = _mm256_setr_epi32(highbit, highbit, highbit, highbit, highbit, highbit, highbit, highbit);
529529

530-
__m256i cmp_lr_epi32 = compare3x3_ps(row_lr_f, center_lr_f, highbit_epi32);
531-
__m256i cmp_hr_epi32 = compare3x3_ps(row_hr_f, center_hr_f, highbit_epi32);
530+
__m256i cmp_lr_epi32 = compare3x3_AVX256_32f(row_lr_f, center_lr_f, highbit_epi32);
531+
__m256i cmp_hr_epi32 = compare3x3_AVX256_32f(row_hr_f, center_hr_f, highbit_epi32);
532532

533533
// hammingDistance = abs( lr_cmp - hr_cmp )
534534
__m256i cmp_epi32 = _mm256_abs_epi32(_mm256_sub_epi32(cmp_lr_epi32, cmp_hr_epi32));
@@ -563,8 +563,8 @@ static void CTCountOfBitsChangedSegment_AVX2(float *LRImage, float *HRImage, con
563563
int highbit = 0x80000000;
564564
const __m256i highbit_epi32 = _mm256_setr_epi32(highbit, highbit, highbit, highbit, highbit, highbit, highbit, highbit);
565565

566-
__m256i cmp_lr_epi32 = compare3x3_ps(row_lr_f, center_lr_f, highbit_epi32);
567-
__m256i cmp_hr_epi32 = compare3x3_ps(row_hr_f, center_hr_f, highbit_epi32);
566+
__m256i cmp_lr_epi32 = compare3x3_AVX256_32f(row_lr_f, center_lr_f, highbit_epi32);
567+
__m256i cmp_hr_epi32 = compare3x3_AVX256_32f(row_hr_f, center_hr_f, highbit_epi32);
568568

569569
// hammingDistance = abs( lr_cmp - hr_cmp )
570570
__m256i cmp_epi32 = _mm256_abs_epi32(_mm256_sub_epi32(cmp_lr_epi32, cmp_hr_epi32));
@@ -1184,7 +1184,16 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11841184
// CT-Blending, CTRandomness
11851185
if (blendingMode == Randomness)
11861186
{
1187-
census = CTRandomness_AVX512_32f(pSeg32f, cols, rOffset, c, pix);
1187+
if (gAsmType == AVX2)
1188+
census = CTRandomness_AVX256_32f(pSeg32f, cols, rOffset, c, pix);
1189+
else if (gAsmType == AVX512)
1190+
census = CTRandomness_AVX512_32f(pSeg32f, cols, rOffset, c, pix);
1191+
else
1192+
{
1193+
std::cout << "expected avx512 or avx2, but got " << gAsmType << std::endl;
1194+
return RNLErrorBadParameter;
1195+
}
1196+
11881197
float weight = (float)census / (float)CTnumberofPixel;
11891198
// position in the whole image: r * cols + c + pix
11901199
float val = weight * curPix + (1 - weight) * pSeg32f[rOffset * cols + c + pix];

Library/Raisr_AVX256.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212

13-
inline __m256i compare3x3_ps(__m256 a, __m256 b, __m256i highbit_epi32)
13+
inline __m256i compare3x3_AVX256_32f(__m256 a, __m256 b, __m256i highbit_epi32)
1414
{
1515
// compare if neighbors < centerpixel, toggle bit in mask if true
1616
// when cmp_ps is true, it returns 0x7fffff (-nan). When we convert that to int, it is 0x8000 0000
@@ -42,7 +42,7 @@ int inline CTRandomness_AVX256_32f(float *inYUpscaled32f, int cols, int r, int c
4242
int highbit = 0x80000000;
4343
const __m256i highbit_epi32 = _mm256_setr_epi32(highbit, highbit, highbit, highbit, highbit, highbit, highbit, highbit);
4444

45-
__m256i cmp_epi32 = compare3x3_ps(row_f, center_f, highbit_epi32);
45+
__m256i cmp_epi32 = compare3x3_AVX256_32f(row_f, center_f, highbit_epi32);
4646

4747
// count # of bits in mask
4848
census_count += sumitup_256_epi32(cmp_epi32);

Library/Raisr_AVX256.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ inline void load3x3_ps(float *img, unsigned int width, unsigned int height, unsi
2525
*out_8neighbors_ps = _mm256_insertf128_ps(_mm256_castps128_ps256(rowlo_f), rowhi_f, 1);
2626
}
2727

28-
inline __m256i compare3x3_ps(__m256 a, __m256 b, __m256i highbit_epi32);
28+
inline __m256i compare3x3_AVX256_32f(__m256 a, __m256 b, __m256i highbit_epi32);
2929
inline int sumitup_256_epi32(__m256i acc);
3030
inline float sumitup_ps_256(__m256 acc);
3131

0 commit comments

Comments
 (0)