88#include " Raisr.h"
99#include " Raisr_globals.h"
1010#include " Raisr_AVX256.h"
11- #include " Raisr_AVX512.h"
1211#include " Raisr_AVX256.cpp"
13- #include " Raisr_AVX512.cpp"
1412#include < fstream>
1513#include < iterator>
1614#include < iostream>
2321#include " cpuid.h"
2422#include < chrono>
2523
24+ #ifdef __AVX512F__
25+ #include " Raisr_AVX512.h"
26+ #include " Raisr_AVX512.cpp"
27+ #endif
28+
2629#ifndef WIN32
2730#include < unistd.h>
2831#endif
3437 ************************************************************/
3538#define ALIGNED_SIZE (size, align ) (((size) + (align)-1 ) & ~((align)-1 ))
3639
37- static bool is_machine_intel ()
40+ static MachineVendorType get_machine_vendor ()
3841{
39- bool ret = false ;
4042
4143 unsigned int eax = 0 , ebx = 0 , ecx = 0 , edx = 0 ;
4244
@@ -49,7 +51,50 @@ static bool is_machine_intel()
4951 vendor_string[12 ] = 0 ;
5052
5153 if (!strcmp (vendor_string, " GenuineIntel" ))
52- ret = true ;
54+ gMachineVendorType = INTEL;
55+ else if (!strcmp (vendor_string, " AuthenticAMD" ))
56+ gMachineVendorType = AMD;
57+ else
58+ gMachineVendorType = VENDOR_UNSUPPORTED;
59+ return gMachineVendorType ;
60+ }
61+
62+ static bool machine_supports_feature (MachineVendorType vendor, ASMType type)
63+ {
64+ bool ret = false ;
65+ unsigned int eax = 0 , ebx = 0 , ecx = 0 , edx = 0 ;
66+
67+ if (vendor == INTEL ) {
68+ __get_cpuid_count (0x7 , 0x0 , &eax, &ebx, &ecx, &edx);
69+
70+ if (type == AVX512) {
71+ // check for avx512f and avx512vl flags
72+ if ( ((ebx >> 16 ) & 0x1 )
73+ && ((ebx >> 31 ) & 0x1 ) )
74+ {
75+ ret = true ;
76+ }
77+ } else if (type == AVX2) {
78+ // check for avx2 flag
79+ if ( (ebx >> 5 ) & 0x1 )
80+ {
81+ ret = true ;
82+ }
83+ }
84+ }
85+ else if (vendor == AMD)
86+ {
87+ __get_cpuid_count (0x7 , 0x0 , &eax, &ebx, &ecx, &edx);
88+
89+ if (type == AVX512) {
90+ ret = false ;
91+ } else if (type == AVX2) {
92+ if ( (ebx >> 5 ) & 0x1 )
93+ {
94+ ret = true ;
95+ }
96+ }
97+ }
5398 return ret;
5499}
55100
@@ -1149,8 +1194,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11491194 {
11501195 if (gAsmType == AVX2)
11511196 computeGTWG_Segment_AVX256_32f (pSeg32f, rows, cols, rOffset, c + 2 * pix, >WG[2 * pix], &pixbuf[2 * pix][0 ], &pixbuf[2 * pix + 1 ][0 ]);
1197+ #ifdef __AVX512F__
11521198 else if (gAsmType == AVX512)
11531199 computeGTWG_Segment_AVX512_32f (pSeg32f, rows, cols, rOffset, c + 2 * pix, >WG[2 * pix], &pixbuf[2 * pix][0 ], &pixbuf[2 * pix + 1 ][0 ]);
1200+ #endif
11541201 else
11551202 {
11561203 std::cout << " expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1176,8 +1223,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11761223 float curPix;
11771224 if (gAsmType == AVX2)
11781225 curPix = DotProdPatch_AVX256_32f (pixbuf[pix], fbase[pix]);
1226+ #ifdef __AVX512F__
11791227 else if (gAsmType == AVX512)
11801228 curPix = DotProdPatch_AVX512_32f (pixbuf[pix], fbase[pix]);
1229+ #endif
11811230 else
11821231 {
11831232 std::cout << " expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1194,8 +1243,10 @@ RNLERRORTYPE processSegment(VideoDataType *srcY, VideoDataType *final_outY, Blen
11941243 {
11951244 if (gAsmType == AVX2)
11961245 census = CTRandomness_AVX256_32f (pSeg32f, cols, rOffset, c, pix);
1246+ #ifdef __AVX512F__
11971247 else if (gAsmType == AVX512)
11981248 census = CTRandomness_AVX512_32f (pSeg32f, cols, rOffset, c, pix);
1249+ #endif
11991250 else
12001251 {
12011252 std::cout << " expected avx512 or avx2, but got " << gAsmType << std::endl;
@@ -1262,8 +1313,6 @@ RNLERRORTYPE RNLProcess(VideoDataType *inY, VideoDataType *inCr, VideoDataType *
12621313 !inY || !inY->pData || !outY || !outY->pData )
12631314 return RNLErrorBadParameter;
12641315
1265- #ifndef DISABLE_AVX512
1266-
12671316 memset ((void *)threadStatus, 0 , 120 * sizeof (threadStatus[0 ]));
12681317
12691318 // multi-threaded patch-based approach
@@ -1299,8 +1348,6 @@ RNLERRORTYPE RNLProcess(VideoDataType *inY, VideoDataType *inCr, VideoDataType *
12991348 result.get ();
13001349 }
13011350
1302- #endif
1303-
13041351 return RNLErrorNone;
13051352}
13061353
@@ -1317,9 +1364,10 @@ RNLERRORTYPE RNLInit(std::string &modelPath,
13171364 std::cout << " LIB Build date: " << __DATE__ << " , " << __TIME__ << std::endl;
13181365 std::cout << " -------------------------------------------\n " ;
13191366
1320- if (!is_machine_intel ())
1367+ gMachineVendorType = get_machine_vendor ();
1368+ if (gMachineVendorType == VENDOR_UNSUPPORTED)
13211369 {
1322- std::cout << " [RAISR ERROR] Only supported on Intel platforms. " << std::endl;
1370+ std::cout << " [RAISR ERROR] Only supported on x86 ( Intel, AMD) platforms. " << std::endl;
13231371 return RNLErrorUndefined;
13241372 }
13251373
@@ -1371,6 +1419,29 @@ RNLERRORTYPE RNLInit(std::string &modelPath,
13711419 }
13721420 gRatio = ratio;
13731421 gAsmType = asmType;
1422+ #ifdef __AVX512F__
1423+ if ( gAsmType != AVX512 && gAsmType != AVX2) gAsmType = AVX512;
1424+ #else
1425+ if ( gAsmType != AVX2) gAsmType = AVX2;
1426+ #endif
1427+ #ifdef __AVX512F__
1428+ if ( gAsmType == AVX512) {
1429+ if (machine_supports_feature (gMachineVendorType , AVX512)) {
1430+ std::cout << " ASM Type: AVX512\n " ;
1431+ } else {
1432+ std::cout << " ASM Type: AVX512 requested, but machine does not support it. Changing to AVX2\n " ;
1433+ gAsmType = AVX2;
1434+ }
1435+ }
1436+ #endif
1437+ if (gAsmType == AVX2) {
1438+ if (machine_supports_feature (gMachineVendorType , AVX2)) {
1439+ std::cout << " ASM Type: AVX2\n " ;
1440+ } else {
1441+ std::cout << " ASM Type: AVX2 requested, but machine does not support it.\n " ;
1442+ return RNLErrorBadParameter;
1443+ }
1444+ }
13741445 gBitDepth = bitDepth;
13751446
13761447 // Read config file
0 commit comments