Skip to content

Commit 4ed50d0

Browse files
cabirdmexiaoxial
authored andcommitted
cleanup avx2 and avx512 paths. added runtime cpu feature check for avx2/512 and tuned cmake build process
1 parent b74992f commit 4ed50d0

6 files changed

Lines changed: 116 additions & 13 deletions

File tree

CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,21 @@ else()
4040
message("Building with Intel Compiler, using SVML")
4141
endif()
4242

43+
INCLUDE(CheckCXXSourceRuns)
44+
SET(CMAKE_REQUIRED_FLAGS "-march=native")
45+
check_cxx_source_runs("
46+
#include <immintrin.h>
47+
int main(int argc, char** argv) {
48+
float data[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
49+
float output[16];
50+
__m512 a = _mm512_loadu_ps(data); // avx512f
51+
__m512 b = _mm512_loadu_ps(data);
52+
__mmask8 mask = _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ); // avx512vl
53+
__m512 c = _mm512_add_ps(a, b); // avx512f
54+
_mm512_storeu_ps(output, c); // avx512f
55+
return 0;
56+
}" HAVE_AVX512)
57+
4358
foreach(cflag ${flags_to_test})
4459
string(REGEX REPLACE "[^A-Za-z0-9]" "_" cflag_var "${cflag}")
4560
set(test_cxx_flag "CXX_FLAG${cflag_var}")

Library/CMakeLists.txt

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1-
set(SOURCES Raisr.cpp RaisrHandler.cpp)
2-
set(HEADERS Raisr.h ThreadPool.h RaisrHandler.h RaisrDefaults.h)
1+
set(SOURCES Raisr_AVX256.cpp Raisr.cpp RaisrHandler.cpp)
2+
set(HEADERS Raisr_globals.h Raisr_AVX256.h Raisr.h ThreadPool.h RaisrHandler.h RaisrDefaults.h)
3+
4+
if( HAVE_AVX512 )
5+
message("Building AVX512 library")
6+
list(APPEND SOURCES Raisr_AVX512.cpp Raisr_AVX512.h)
7+
else()
8+
message("Building AVX2 only")
9+
endif()
310

411
add_library(raisr STATIC ${SOURCES} ${HEADERS})
512

Library/Raisr.cpp

Lines changed: 82 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@
88
#include "Raisr.h"
99
#include "Raisr_globals.h"
1010
#include "Raisr_AVX256.h"
11-
#include "Raisr_AVX512.h"
1211
#include "Raisr_AVX256.cpp"
13-
#include "Raisr_AVX512.cpp"
1412
#include <fstream>
1513
#include <iterator>
1614
#include <iostream>
@@ -23,6 +21,11 @@
2321
#include "cpuid.h"
2422
#include <chrono>
2523

24+
#ifdef __AVX512F__
25+
#include "Raisr_AVX512.h"
26+
#include "Raisr_AVX512.cpp"
27+
#endif
28+
2629
#ifndef WIN32
2730
#include <unistd.h>
2831
#endif
@@ -34,9 +37,8 @@
3437
************************************************************/
3538
#define ALIGNED_SIZE(size, align) (((size) + (align)-1) & ~((align)-1))
3639

37-
static bool is_machine_intel()
40+
static MachineVendorType get_machine_vendor()
3841
{
39-
bool ret = false;
4042

4143
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
4244

@@ -49,7 +51,50 @@ static bool is_machine_intel()
4951
vendor_string[12] = 0;
5052

5153
if (!strcmp(vendor_string, "GenuineIntel"))
52-
ret = true;
54+
gMachineVendorType = INTEL;
55+
else if (!strcmp(vendor_string, "AuthenticAMD"))
56+
gMachineVendorType = AMD;
57+
else
58+
gMachineVendorType = VENDOR_UNSUPPORTED;
59+
return gMachineVendorType;
60+
}
61+
62+
static bool machine_supports_feature(MachineVendorType vendor, ASMType type)
63+
{
64+
bool ret = false;
65+
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
66+
67+
if (vendor == INTEL ) {
68+
__get_cpuid_count(0x7, 0x0, &eax, &ebx, &ecx, &edx);
69+
70+
if (type == AVX512) {
71+
// check for avx512f and avx512vl flags
72+
if ( ((ebx >> 16) & 0x1)
73+
&& ((ebx >> 31) & 0x1) )
74+
{
75+
ret = true;
76+
}
77+
} else if (type == AVX2) {
78+
// check for avx2 flag
79+
if ( (ebx >> 5) & 0x1)
80+
{
81+
ret = true;
82+
}
83+
}
84+
}
85+
else if (vendor == AMD)
86+
{
87+
__get_cpuid_count(0x7, 0x0, &eax, &ebx, &ecx, &edx);
88+
89+
if (type == AVX512) {
90+
ret = false;
91+
} else if (type == AVX2) {
92+
if ( (ebx >> 5) & 0x1)
93+
{
94+
ret = true;
95+
}
96+
}
97+
}
5398
return ret;
5499
}
55100

@@ -1149,8 +1194,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11491194
{
11501195
if (gAsmType == AVX2)
11511196
computeGTWG_Segment_AVX256_32f(pSeg32f, rows, cols, rOffset, c + 2 * pix, &GTWG[2 * pix], &pixbuf[2 * pix][0], &pixbuf[2 * pix + 1][0]);
1197+
#ifdef __AVX512F__
11521198
else if (gAsmType == AVX512)
11531199
computeGTWG_Segment_AVX512_32f(pSeg32f, rows, cols, rOffset, c + 2 * pix, &GTWG[2 * pix], &pixbuf[2 * pix][0], &pixbuf[2 * pix + 1][0]);
1200+
#endif
11541201
else
11551202
{
11561203
std::cout << "expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1176,8 +1223,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11761223
float curPix;
11771224
if (gAsmType == AVX2)
11781225
curPix = DotProdPatch_AVX256_32f(pixbuf[pix], fbase[pix]);
1226+
#ifdef __AVX512F__
11791227
else if (gAsmType == AVX512)
11801228
curPix = DotProdPatch_AVX512_32f(pixbuf[pix], fbase[pix]);
1229+
#endif
11811230
else
11821231
{
11831232
std::cout << "expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1194,8 +1243,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11941243
{
11951244
if (gAsmType == AVX2)
11961245
census = CTRandomness_AVX256_32f(pSeg32f, cols, rOffset, c, pix);
1246+
#ifdef __AVX512F__
11971247
else if (gAsmType == AVX512)
11981248
census = CTRandomness_AVX512_32f(pSeg32f, cols, rOffset, c, pix);
1249+
#endif
11991250
else
12001251
{
12011252
std::cout << "expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1262,8 +1313,6 @@ RNLERRORTYPE RNLProcess(VideoDataType *inY, VideoDataType *inCr, VideoDataType *
12621313
!inY || !inY->pData || !outY || !outY->pData)
12631314
return RNLErrorBadParameter;
12641315

1265-
#ifndef DISABLE_AVX512
1266-
12671316
memset((void *)threadStatus, 0, 120 * sizeof(threadStatus[0]));
12681317

12691318
// multi-threaded patch-based approach
@@ -1299,8 +1348,6 @@ RNLERRORTYPE RNLProcess(VideoDataType *inY, VideoDataType *inCr, VideoDataType *
12991348
result.get();
13001349
}
13011350

1302-
#endif
1303-
13041351
return RNLErrorNone;
13051352
}
13061353

@@ -1317,9 +1364,10 @@ RNLERRORTYPE RNLInit(std::string &modelPath,
13171364
std::cout << "LIB Build date: " << __DATE__ << ", " << __TIME__ << std::endl;
13181365
std::cout << "-------------------------------------------\n";
13191366

1320-
if (!is_machine_intel())
1367+
gMachineVendorType = get_machine_vendor();
1368+
if (gMachineVendorType == VENDOR_UNSUPPORTED)
13211369
{
1322-
std::cout << "[RAISR ERROR] Only supported on Intel platforms. " << std::endl;
1370+
std::cout << "[RAISR ERROR] Only supported on x86 (Intel, AMD) platforms. " << std::endl;
13231371
return RNLErrorUndefined;
13241372
}
13251373

@@ -1371,6 +1419,29 @@ RNLERRORTYPE RNLInit(std::string &modelPath,
13711419
}
13721420
gRatio = ratio;
13731421
gAsmType = asmType;
1422+
#ifdef __AVX512F__
1423+
if ( gAsmType != AVX512 && gAsmType != AVX2) gAsmType = AVX512;
1424+
#else
1425+
if ( gAsmType != AVX2) gAsmType = AVX2;
1426+
#endif
1427+
#ifdef __AVX512F__
1428+
if ( gAsmType == AVX512) {
1429+
if (machine_supports_feature(gMachineVendorType, AVX512)) {
1430+
std::cout << "ASM Type: AVX512\n";
1431+
} else {
1432+
std::cout << "ASM Type: AVX512 requested, but machine does not support it. Changing to AVX2\n";
1433+
gAsmType = AVX2;
1434+
}
1435+
}
1436+
#endif
1437+
if (gAsmType == AVX2) {
1438+
if (machine_supports_feature(gMachineVendorType, AVX2)) {
1439+
std::cout << "ASM Type: AVX2\n";
1440+
} else {
1441+
std::cout << "ASM Type: AVX2 requested, but machine does not support it.\n";
1442+
return RNLErrorBadParameter;
1443+
}
1444+
}
13741445
gBitDepth = bitDepth;
13751446

13761447
// Read config file

Library/RaisrDefaults.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ typedef enum ASMType
3939
AVX512 = 2
4040
} ASMType;
4141

42+
typedef enum MachineVendorType
43+
{
44+
INTEL = 1,
45+
AMD = 2,
46+
VENDOR_UNSUPPORTED = 3
47+
} MachineVendorType;
48+
4249
typedef enum RangeType
4350
{
4451
VideoRange = 1,

Library/Raisr_AVX512.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
*/
77
#include "Raisr_globals.h"
88
#include "Raisr_AVX512.h"
9+
#include "Raisr_AVX256.h"
910
#include <immintrin.h>
1011
#include <popcntintrin.h>
1112

Library/Raisr_globals.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <ipp.h>
1010
#include "ThreadPool.h"
11+
#include "RaisrDefaults.h"
1112

1213
/************************************************************
1314
* const variables
@@ -32,6 +33,7 @@ const int CTmargin = CTwindowSize >> 1;
3233
const int gHashingExpand = CTmargin + 1; // Segment is again expanded by CTmargin so that all the rows in the segment can be processed by CTCountOfBitsChanged(). "+1" is to make sure the resize zone is even.
3334
static unsigned int gRatio;
3435
static ASMType gAsmType;
36+
static MachineVendorType gMachineVendorType;
3537
static unsigned int gBitDepth;
3638

3739
// Process multiple columns in each pass of the loop

0 commit comments

Comments
 (0)