Skip to content

Commit 252d33d

Browse files
cabirdmexiaoxial
authored andcommitted
added AVX2 functions CTCountOfBitsChangedSeg, GetHashValue, atanApproximation. If using Intel compiler, SVML uses atan. If using another compiler, atan approximation is used.
1 parent 99f5056 commit 252d33d

3 files changed

Lines changed: 262 additions & 9 deletions

File tree

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ set(flags_to_test
3030
-fPIC
3131
)
3232

33+
# Raisr can use SVML for atan2 if Intel Compiler can be found, otherwise uses an atanApproximation
34+
get_filename_component(CC_FILENAME ${CMAKE_CXX_COMPILER} NAME)
35+
string(FIND "${CC_FILENAME}" "icp" CC_SUBSTR_INTEL)
36+
if (${CC_SUBSTR_INTEL} LESS 0)
37+
message("Building with Atan Approximation")
38+
list(APPEND flags_to_test "-DUSE_ATAN2_APPROX")
39+
else()
40+
message("Building with Intel Compiler, using SVML")
41+
endif()
42+
3343
foreach(cflag ${flags_to_test})
3444
string(REGEX REPLACE "[^A-Za-z0-9]" "_" cflag_var "${cflag}")
3545
set(test_cxx_flag "CXX_FLAG${cflag_var}")

Library/Raisr.cpp

Lines changed: 238 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,134 @@ static void CTCountOfBitsChangedSegment(DT *LRImage, DT *HRImage, const int rows
896896
}
897897
}
898898

899+
// LRImage: cheap up scaled. HRImage: RAISR refined. outImage: output buffer in 8u.
900+
// rows: rows of LRImage/HRImage. startRow: seg start row. blendingZone: zone to run blending.
901+
// cols: stride for buffers in DT type.
902+
// outImageCols: stride for outImage buffer
903+
static void CTCountOfBitsChangedSegment_AVX256_32f(float *LRImage, float *HRImage, const int rows, const int startRow, const std::pair<int, int> blendingZone, unsigned char *outImage, const int cols, const int outImageCols)
904+
{
905+
int rowStartOffset = blendingZone.first - startRow;
906+
int rowEndOffset = blendingZone.second - startRow;
907+
908+
const __m256 zero_ps = _mm256_setzero_ps();
909+
const __m256 one_ps = _mm256_set1_ps(1.0);
910+
const int cmp_le = _CMP_LT_OQ;
911+
const __m256i one_epi32 = _mm256_set1_epi32(1);
912+
913+
for (auto r = rowStartOffset; r < rowEndOffset; r++)
914+
{
915+
const int c_limit = (cols - CTmargin);
916+
int c_limit_avx = c_limit - (c_limit%8)+1;
917+
for (auto c = CTmargin; c < c_limit_avx; c+=8)
918+
{
919+
__m256i hammingDistance_epi32 = _mm256_setzero_si256();
920+
921+
__m256 center_LR_ps = _mm256_loadu_ps( &LRImage[(r) * cols + c]);
922+
__m256 n1_LR_ps = _mm256_loadu_ps( &LRImage[(r-1) * cols + (c-1)]);
923+
__m256 n2_LR_ps = _mm256_loadu_ps( &LRImage[(r-1) * cols + (c)]);
924+
__m256 n3_LR_ps = _mm256_loadu_ps( &LRImage[(r-1) * cols + (c+1)]);
925+
__m256 n4_LR_ps = _mm256_loadu_ps( &LRImage[(r) * cols + (c-1)]);
926+
__m256 n5_LR_ps = _mm256_loadu_ps( &LRImage[(r) * cols + (c+1)]);
927+
__m256 n6_LR_ps = _mm256_loadu_ps( &LRImage[(r+1) * cols + (c-1)]);
928+
__m256 n7_LR_ps = _mm256_loadu_ps( &LRImage[(r+1) * cols + (c)]);
929+
__m256 n8_LR_ps = _mm256_loadu_ps( &LRImage[(r+1) * cols + (c+1)]);
930+
931+
__m256 center_HR_ps = _mm256_loadu_ps( &HRImage[(r) * cols + c]);
932+
__m256 n1_HR_ps = _mm256_loadu_ps( &HRImage[(r-1) * cols + (c-1)]);
933+
__m256 n2_HR_ps = _mm256_loadu_ps( &HRImage[(r-1) * cols + (c)]);
934+
__m256 n3_HR_ps = _mm256_loadu_ps( &HRImage[(r-1) * cols + (c+1)]);
935+
__m256 n4_HR_ps = _mm256_loadu_ps( &HRImage[(r) * cols + (c-1)]);
936+
__m256 n5_HR_ps = _mm256_loadu_ps( &HRImage[(r) * cols + (c+1)]);
937+
__m256 n6_HR_ps = _mm256_loadu_ps( &HRImage[(r+1) * cols + (c-1)]);
938+
__m256 n7_HR_ps = _mm256_loadu_ps( &HRImage[(r+1) * cols + (c)]);
939+
__m256 n8_HR_ps = _mm256_loadu_ps( &HRImage[(r+1) * cols + (c+1)]);
940+
941+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
942+
_mm256_abs_epi32(_mm256_sub_epi32(
943+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n1_LR_ps, center_LR_ps, cmp_le))),
944+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n1_HR_ps, center_HR_ps, cmp_le))))));
945+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
946+
_mm256_abs_epi32(_mm256_sub_epi32(
947+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n2_LR_ps, center_LR_ps, cmp_le))),
948+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n2_HR_ps, center_HR_ps, cmp_le))))));
949+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
950+
_mm256_abs_epi32(_mm256_sub_epi32(
951+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n3_LR_ps, center_LR_ps, cmp_le))),
952+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n3_HR_ps, center_HR_ps, cmp_le))))));
953+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
954+
_mm256_abs_epi32(_mm256_sub_epi32(
955+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n4_LR_ps, center_LR_ps, cmp_le))),
956+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n4_HR_ps, center_HR_ps, cmp_le))))));
957+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
958+
_mm256_abs_epi32(_mm256_sub_epi32(
959+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n5_LR_ps, center_LR_ps, cmp_le))),
960+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n5_HR_ps, center_HR_ps, cmp_le))))));
961+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
962+
_mm256_abs_epi32(_mm256_sub_epi32(
963+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n6_LR_ps, center_LR_ps, cmp_le))),
964+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n6_HR_ps, center_HR_ps, cmp_le))))));
965+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
966+
_mm256_abs_epi32(_mm256_sub_epi32(
967+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n7_LR_ps, center_LR_ps, cmp_le))),
968+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n7_HR_ps, center_HR_ps, cmp_le))))));
969+
hammingDistance_epi32 = _mm256_add_epi32( hammingDistance_epi32,
970+
_mm256_abs_epi32(_mm256_sub_epi32(
971+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n8_LR_ps, center_LR_ps, cmp_le))),
972+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(n8_HR_ps, center_HR_ps, cmp_le))))));
973+
974+
__m256 weight_ps = _mm256_div_ps( _mm256_cvtepi32_ps(hammingDistance_epi32), _mm256_set1_ps((float) CTnumberofPixel) );
975+
__m256 weight2_ps = _mm256_sub_ps(one_ps, weight_ps);
976+
__m256 val_ps = _mm256_add_ps( _mm256_mul_ps( weight_ps, center_LR_ps),
977+
_mm256_mul_ps(weight2_ps, center_HR_ps));
978+
val_ps = _mm256_add_ps( val_ps, _mm256_set1_ps(0.5));
979+
__m256i val_epi32 = _mm256_cvtps_epi32(_mm256_floor_ps(val_ps)), val_epi16, val_epu8, val_epu16, perm_epu;
980+
int64_t val_epu8_64_t;
981+
if (gBitDepth == 8) {
982+
val_epi32 = _mm256_max_epi32(_mm256_min_epi32( val_epi32, _mm256_set1_epi32(gMax8bit)), _mm256_set1_epi32(gMin8bit));
983+
val_epi16 = _mm256_packs_epi32(val_epi32,val_epi32);
984+
val_epu8 = _mm256_packus_epi16(val_epi16, val_epi16);
985+
perm_epu = _mm256_permutevar8x32_epi32(val_epu8, _mm256_setr_epi32(0,4,0,4,0,4,0,4));
986+
val_epu8_64_t = (_mm_cvtsi128_si64(_mm256_extractf128_si256(perm_epu, 0)));
987+
memcpy((void *) &outImage[(startRow + r) * outImageCols + c], (void *) &val_epu8_64_t, 8);
988+
}
989+
else {
990+
val_epi32 = _mm256_max_epi32(_mm256_min_epi32( val_epi32, _mm256_set1_epi32(gMax16bit)), _mm256_set1_epi32(gMin16bit));
991+
val_epu16 = _mm256_packus_epi32(val_epi32,val_epi32);
992+
perm_epu = _mm256_permute4x64_epi64(val_epu16, 0x88);
993+
unsigned short *out = (unsigned short *)outImage;
994+
_mm_storeu_si128((__m128i *) &out[(startRow + r) * outImageCols + c], _mm256_extractf128_si256(perm_epu, 0));
995+
}
996+
}
997+
998+
for (auto c = c_limit_avx; c < c_limit; c++) // handle edge, too small for SIMD
999+
{
1000+
int hammingDistance = 0;
1001+
1002+
// Census transform
1003+
for (int i = -CTmargin; i <= CTmargin; i++)
1004+
{
1005+
for (int j = -CTmargin; j <= CTmargin; j++)
1006+
{
1007+
if (unlikely(i == 0 && j == 0))
1008+
continue;
1009+
hammingDistance += std::abs((LRImage[(r + i) * cols + (c + j)] < LRImage[r * cols + c] ? 1 : 0) - (HRImage[(r + i) * cols + (c + j)] < HRImage[r * cols + c] ? 1 : 0));
1010+
}
1011+
}
1012+
float weight = (float)hammingDistance / (float)CTnumberofPixel;
1013+
float val = weight * LRImage[r * cols + c] + (1 - weight) * HRImage[r * cols + c];
1014+
val += 0.5; // to round the value
1015+
//convert 32f to 8bit/10bit
1016+
if (gBitDepth == 8) {
1017+
outImage[(startRow + r) * outImageCols + c] = (unsigned char)(val < gMin8bit ? gMin8bit : (val > gMax8bit ? gMax8bit : val));
1018+
}
1019+
else {
1020+
unsigned short *out = (unsigned short *)outImage;
1021+
out[(startRow + r) * outImageCols + c] = (unsigned short)(val < gMin16bit ? gMin16bit : (val > gMax16bit ? gMax16bit : val));
1022+
}
1023+
}
1024+
}
1025+
}
1026+
8991027
int inline CTRandomness_C(float *inYUpscaled32f, int cols, int r, int c, int pix)
9001028
{
9011029
// Census transform
@@ -980,6 +1108,113 @@ inline float atan2Approximation(float y, float x)
9801108
}
9811109
#endif
9821110

1111+
inline __m256 atan2Approximation_AVX256_32f(__m256 y_ps, __m256 x_ps)
1112+
{
1113+
const float ONEQTR_PI = M_PI / 4.0;
1114+
const float THRQTR_PI = 3.0 * M_PI / 4.0;
1115+
const __m256 zero_ps = _mm256_set1_ps(0.0);
1116+
const __m256 oneqtr_pi_ps = _mm256_set1_ps(ONEQTR_PI);
1117+
const __m256 thrqtr_pi_ps = _mm256_set1_ps(THRQTR_PI);
1118+
1119+
__m256 abs_y_ps = _mm256_add_ps( _mm256_andnot_ps( _mm256_set1_ps(-0.0f), y_ps),
1120+
_mm256_set1_ps(1e-10f));
1121+
1122+
__m256 r_cond1_ps = _mm256_div_ps( _mm256_add_ps(x_ps, abs_y_ps), _mm256_sub_ps(abs_y_ps, x_ps));
1123+
__m256 r_cond2_ps = _mm256_div_ps( _mm256_sub_ps(x_ps, abs_y_ps), _mm256_add_ps(x_ps, abs_y_ps));
1124+
__m256 r_cmp_ps = _mm256_cmp_ps(x_ps, zero_ps, _CMP_LT_OQ);
1125+
__m256 r_ps = _mm256_blendv_ps( r_cond2_ps, r_cond1_ps, r_cmp_ps);
1126+
__m256 angle_ps = _mm256_blendv_ps( oneqtr_pi_ps, thrqtr_pi_ps, r_cmp_ps );
1127+
1128+
angle_ps = _mm256_fmadd_ps(_mm256_fmadd_ps(_mm256_mul_ps(_mm256_set1_ps(0.1963f), r_ps),
1129+
r_ps, _mm256_set1_ps(-0.9817f)),
1130+
r_ps, angle_ps);
1131+
1132+
__m256 neg_angle_ps = _mm256_mul_ps(_mm256_set1_ps(-1), angle_ps);
1133+
return _mm256_blendv_ps( angle_ps, neg_angle_ps, _mm256_cmp_ps(y_ps, zero_ps, _CMP_LT_OQ));
1134+
}
1135+
1136+
void inline GetHashValue_AVX256_32f(float GTWG[8][4], int passIdx, int32_t *idx) {
1137+
const float one = 1.0;
1138+
const float two = 2.0;
1139+
const float four = 4.0;
1140+
const float pi = PI;
1141+
const float near_zero = 0.00000000000000001;
1142+
const __m256 zero_ps = _mm256_setzero_ps();
1143+
const __m256i zero_epi32 = _mm256_setzero_si256();
1144+
const __m256i one_epi32 = _mm256_set1_epi32(1);
1145+
const __m256i two_epi32 = _mm256_set1_epi32(2);
1146+
1147+
const int cmp_le = _CMP_LE_OQ;
1148+
const int cmp_gt = _CMP_GT_OQ;
1149+
1150+
__m256 m_a_ps = _mm256_setr_ps (GTWG[0][0], GTWG[1][0], GTWG[2][0], GTWG[3][0],
1151+
GTWG[4][0], GTWG[5][0], GTWG[6][0], GTWG[7][0]);
1152+
__m256 m_b_ps = _mm256_setr_ps (GTWG[0][1], GTWG[1][1], GTWG[2][1], GTWG[3][1],
1153+
GTWG[4][1], GTWG[5][1], GTWG[6][1], GTWG[7][1]);
1154+
__m256 m_d_ps = _mm256_setr_ps (GTWG[0][3], GTWG[1][3], GTWG[2][3], GTWG[3][3],
1155+
GTWG[4][3], GTWG[5][3], GTWG[6][3], GTWG[7][3]);
1156+
__m256 T_ps = _mm256_add_ps(m_a_ps, m_d_ps);
1157+
__m256 D_ps = _mm256_sub_ps( _mm256_mul_ps( m_a_ps, m_d_ps),
1158+
_mm256_mul_ps( m_b_ps, m_b_ps));
1159+
1160+
__m256 sqr_ps = _mm256_sqrt_ps( _mm256_sub_ps( _mm256_div_ps ( _mm256_mul_ps(T_ps, T_ps),
1161+
_mm256_broadcast_ss(&four)), D_ps));
1162+
1163+
__m256 half_T_ps = _mm256_div_ps ( T_ps, _mm256_broadcast_ss(&two) );
1164+
__m256 L1_ps = _mm256_add_ps( half_T_ps, sqr_ps);
1165+
__m256 L2_ps = _mm256_sub_ps( half_T_ps, sqr_ps);
1166+
1167+
__m256 angle_ps = zero_ps;
1168+
1169+
__m256 blend_ps = _mm256_blendv_ps( _mm256_broadcast_ss(&one), _mm256_sub_ps(L1_ps, m_d_ps),
1170+
_mm256_cmp_ps(m_b_ps, zero_ps, _CMP_NEQ_OQ) );
1171+
1172+
#ifdef USE_ATAN2_APPROX
1173+
angle_ps = atan2Approximation_AVX256_32f( m_b_ps, blend_ps);
1174+
#else
1175+
angle_ps = _mm256_atan2_ps( m_b_ps, blend_ps);
1176+
#endif
1177+
1178+
angle_ps = _mm256_add_ps ( angle_ps, _mm256_blendv_ps( zero_ps, _mm256_broadcast_ss(&pi),
1179+
_mm256_cmp_ps(angle_ps, zero_ps, _CMP_LT_OQ) ) );
1180+
1181+
__m256 sqrtL1_ps = _mm256_sqrt_ps( L1_ps );
1182+
__m256 sqrtL2_ps = _mm256_sqrt_ps( L2_ps );
1183+
__m256 coherence_ps = _mm256_div_ps( _mm256_sub_ps( sqrtL1_ps, sqrtL2_ps ),
1184+
_mm256_add_ps( _mm256_add_ps(sqrtL1_ps, sqrtL2_ps), _mm256_broadcast_ss(&near_zero) ) );
1185+
__m256 strength_ps = L1_ps;
1186+
1187+
__m256i angleIdx_epi32 = _mm256_cvtps_epi32( _mm256_floor_ps(_mm256_mul_ps (angle_ps, _mm256_broadcast_ss(&gQAngle))));
1188+
1189+
angleIdx_epi32 = _mm256_min_epi32( _mm256_sub_epi32( _mm256_set1_epi32(gQuantizationAngle), _mm256_set1_epi32(1)),
1190+
_mm256_max_epi32(angleIdx_epi32, zero_epi32 ) );
1191+
1192+
// AFAIK, today QStr & QCoh are vectors of size 2. I think searchsorted can return an index of 0,1, or 2
1193+
float *gQStr_data, *gQCoh_data;
1194+
if (passIdx == 0) gQStr_data = gQStr.data(); else gQStr_data = gQStr2.data();
1195+
if (passIdx == 0) gQCoh_data = gQCoh.data(); else gQCoh_data = gQCoh2.data();
1196+
__m256 gQStr1_ps = _mm256_broadcast_ss(gQStr_data);
1197+
__m256 gQStr2_ps = _mm256_broadcast_ss(gQStr_data + 1);
1198+
__m256 gQCoh1_ps = _mm256_broadcast_ss(gQCoh_data);
1199+
__m256 gQCoh2_ps = _mm256_broadcast_ss(gQCoh_data + 1);
1200+
1201+
__m256i strengthIdx_epi32 = _mm256_sub_epi32(two_epi32,
1202+
_mm256_add_epi32(
1203+
_mm256_and_si256(one_epi32, _mm256_castps_si256( _mm256_cmp_ps(strength_ps, gQStr1_ps, cmp_le))),
1204+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(strength_ps, gQStr2_ps, cmp_le)))));
1205+
__m256i coherenceIdx_epi32 = _mm256_sub_epi32(two_epi32,
1206+
_mm256_add_epi32(
1207+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(coherence_ps, gQCoh1_ps, cmp_le))),
1208+
_mm256_and_si256(one_epi32, _mm256_castps_si256(_mm256_cmp_ps(coherence_ps, gQCoh2_ps, cmp_le)))));
1209+
1210+
const __m256i gQuantizationCoherence_epi32 = _mm256_set1_epi32(gQuantizationCoherence);
1211+
__m256i idx_epi32 = _mm256_mullo_epi32(gQuantizationCoherence_epi32,
1212+
_mm256_mullo_epi32(angleIdx_epi32, _mm256_set1_epi32(gQuantizationStrength)));
1213+
idx_epi32 = _mm256_add_epi32(coherenceIdx_epi32,
1214+
_mm256_add_epi32(idx_epi32, _mm256_mullo_epi32(strengthIdx_epi32, gQuantizationCoherence_epi32)));
1215+
_mm256_storeu_si256((__m256i *)idx, idx_epi32);
1216+
}
1217+
9831218
inline int int_floor(float x)
9841219
{
9851220
int i = (int)x; /* truncate */
@@ -1345,10 +1580,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
13451580
computeGTWG_Segment(pSeg32f, rows, cols, rOffset, c + 2 * pix, &GTWG[2 * pix], &pixbuf[2 * pix][0], &pixbuf[2 * pix + 1][0]);
13461581
}
13471582

1348-
#pragma unroll(unrollSizePatchBased)
1583+
GetHashValue_AVX256_32f(GTWG, passIdx, hashValue);
1584+
13491585
for (pix = 0; pix < unrollSizePatchBased; pix++)
13501586
{
1351-
hashValue[pix] = GetHashValue(GTWG[pix], passIdx);
13521587
if (passIdx == 0)
13531588
fbase[pix] = gFilterBuckets[hashValue[pix]][pixelType[pix]];
13541589
else
@@ -1405,9 +1640,7 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
14051640
if (blendingMode == CountOfBitsChanged)
14061641
{
14071642
int segStart = gIppCtx.segZones[passIdx][threadIdx].scaleStartRow;
1408-
CTCountOfBitsChangedSegment<float>(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[passIdx][threadIdx].blendingStartRow, gIppCtx.segZones[passIdx][threadIdx].blendingEndRow}, outY->pData, cols, outY->step);
1409-
// No improve with AVX2
1410-
// CTCountOfBitsChangedSegment_AVX2(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[threadIdx].blendingStartRow, gIppCtx.segZones[threadIdx].blendingEndRow}, outY->pData, cols, outY->step);
1643+
CTCountOfBitsChangedSegment_AVX256_32f(pSeg32f, pRaisr32f, segRows, segStart, {gIppCtx.segZones[passIdx][threadIdx].blendingStartRow, gIppCtx.segZones[passIdx][threadIdx].blendingEndRow}, outY->pData, cols, outY->step);
14111644
}
14121645

14131646
threadStatus[threadIdx] = 1;

build.sh

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@ build() (
2727
*) break ;;
2828
esac
2929
done
30+
if $docker_flag; then
31+
# docker builds don't support intel cc, so reassign compiler
32+
if check_executable clang++; then
33+
CXX=$(check_executable -p clang++)
34+
elif check_executable g++; then
35+
CXX=$(check_executable -p g++)
36+
else
37+
die "No suitable cpp compiler found in path" \
38+
"Please either install one or set it via cxx=*"
39+
fi
40+
fi
3041
echo "Create folder: build, build type: $build_type"
3142
mkdir -p build > /dev/null 2>&1
3243
cd_safe build
@@ -74,11 +85,10 @@ check_executable() (
7485
done
7586
return 127
7687
)
77-
78-
if check_executable clang++; then
88+
if check_executable icpx; then
89+
CXX=$(check_executable -p icpx)
90+
elif check_executable clang++; then
7991
CXX=$(check_executable -p clang++)
80-
elif check_executable icpc "/opt/intel/bin"; then
81-
CXX=$(check_executable -p icpc "/opt/intel/bin")
8292
elif check_executable g++; then
8393
CXX=$(check_executable -p g++)
8494
else

0 commit comments

Comments
 (0)