Skip to content

Commit ddebf67

Browse files
authored
Merge pull request #334 from NotRequiem/dev
improved timing attacks
2 parents 2b61a55 + 982ca3d commit ddebf67

2 files changed

Lines changed: 207 additions & 81 deletions

File tree

src/vmaware.hpp

Lines changed: 104 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@
2727
*
2828
*
2929
* ============================== SECTIONS ==================================
30-
* - enums for publicly accessible techniques => line 556
31-
* - struct for internal cpu operations => line 749
32-
* - struct for internal memoization => line 1220
33-
* - struct for internal utility functions => line 1344
34-
* - struct for internal core components => line 10066
35-
* - start of VM detection technique list => line 2345
36-
* - start of public VM detection functions => line 10730
37-
* - start of externally defined variables => line 11679
30+
* - enums for publicly accessible techniques => line 551
31+
* - struct for internal cpu operations => line 742
32+
* - struct for internal memoization => line 1213
33+
* - struct for internal utility functions => line 1337
34+
* - struct for internal core components => line 10026
35+
* - start of VM detection technique list => line 2338
36+
* - start of public VM detection functions => line 10690
37+
* - start of externally defined variables => line 11638
3838
*
3939
*
4040
* ============================== EXAMPLE ===================================
@@ -1563,7 +1563,7 @@ struct VM {
15631563
#pragma GCC diagnostic push
15641564
#pragma GCC diagnostic ignored "-Wignored-attributes"
15651565
#endif
1566-
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
1566+
std::unique_ptr<FILE, int(*)(FILE*)> pipe(popen(cmd, "r"), pclose);
15671567

15681568
#if (ARM)
15691569
#pragma GCC diagnostic pop
@@ -7831,41 +7831,53 @@ struct VM {
78317831
* @author Requiem (https://github.com/NotRequiem)
78327832
* @implements VM::TIMER
78337833
*/
7834+
#if defined(MSVC)
7835+
#pragma optimize("", off)
7836+
#elif defined(__GNUC__)
7837+
#pragma GCC push_options
7838+
#pragma GCC optimize("O0")
7839+
#endif
78347840
[[nodiscard]]
78357841
#if (LINUX)
7836-
// Disable specific sanitizers for more accurate timing measurements.
78377842
__attribute__((no_sanitize("address", "leak", "thread", "undefined")))
78387843
#endif
78397844
static bool timer() {
78407845
#if (ARM || !x86)
78417846
return false;
78427847
#else
7843-
constexpr i32 classicIterations = 10; // Number of iterations for the classic RDTSC check
7844-
constexpr u32 classicThreshold = 20000u; // Cycle threshold per iteration for classic RDTSC check
7845-
constexpr i32 requiredClassicSpikes = classicIterations / 2; // At least 50% of iterations must spike
7848+
constexpr u8 classicIterations = 10; // Number of iterations for the classic RDTSC check
7849+
constexpr u16 classicThreshold = 20000u; // Cycle threshold per iteration for classic RDTSC check
7850+
constexpr u8 requiredClassicSpikes = classicIterations / 2; // At least 50% of iterations must spike
78467851

7847-
constexpr i32 spammerIterations = 1000; // Iterations for the multi-CPU/spammer check
7848-
constexpr u32 spammerAvgThreshold = 55000u; // Average cycle threshold for the spammer check
7852+
constexpr u16 spammerIterations = 1000; // Iterations for the multi-CPU/spammer check
7853+
constexpr u16 spammerAvgThreshold = 20000u; // Average cycle threshold for the spammer check
78497854

78507855
#if (WINDOWS)
7851-
constexpr int qpcRatioThreshold = 3000; // QPC ratio threshold
7856+
constexpr u16 qpcRatioThreshold = 3000; // QPC ratio threshold
7857+
constexpr u8 tscIterations = 10; // Number of iterations for the TSC synchronization check
7858+
constexpr u16 tscSyncDiffThreshold = 500; // TSC difference threshold
78527859
#endif
78537860

7854-
constexpr i32 tscIterations = 10; // Number of iterations for the TSC synchronization check
7855-
constexpr u64 tscSyncDiffThreshold = 1000000000LL; // TSC difference threshold
7856-
78577861
// to minimize context switching/scheduling
78587862
#if (WINDOWS)
7859-
HANDLE hThread = GetCurrentThread();
7860-
int oldPriority = GetThreadPriority(hThread);
7863+
const HANDLE hThread = GetCurrentThread();
7864+
const int oldPriority = GetThreadPriority(hThread);
78617865
SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL);
78627866
#else
7863-
int oldPolicy = sched_getscheduler(0);
7864-
sched_param oldParam;
7865-
sched_getparam(0, &oldParam);
7866-
sched_param newParam{};
7867-
newParam.sched_priority = sched_get_priority_max(SCHED_FIFO);
7868-
sched_setscheduler(0, SCHED_FIFO, &newParam);
7867+
bool hasSchedPriority = (geteuid() == 0);
7868+
int oldPolicy = SCHED_OTHER;
7869+
sched_param oldParam{};
7870+
7871+
if (hasSchedPriority) {
7872+
oldPolicy = sched_getscheduler(0);
7873+
sched_getparam(0, &oldParam);
7874+
sched_param newParam{};
7875+
newParam.sched_priority = sched_get_priority_max(SCHED_FIFO);
7876+
7877+
if (sched_setscheduler(0, SCHED_FIFO, &newParam) == -1) {
7878+
hasSchedPriority = false;
7879+
}
7880+
}
78697881
#endif
78707882

78717883
auto restoreThreadPriority = [&]() {
@@ -7877,7 +7889,9 @@ struct VM {
78777889
};
78787890

78797891
// --- 1. Classic Timing Check (rdtsc + cpuid + rdtsc) ---
7892+
#ifdef __VMAWARE_DEBUG__
78807893
u64 totalCycles = 0;
7894+
#endif
78817895
int spikeCount = 0;
78827896
for (int i = 0; i < classicIterations; i++) {
78837897
u64 start = __rdtsc();
@@ -7892,12 +7906,26 @@ struct VM {
78927906
#endif
78937907
u64 end = __rdtsc();
78947908
u64 cycles = end - start;
7909+
#ifdef __VMAWARE_DEBUG__
78957910
totalCycles += cycles;
7911+
#endif
78967912
if (cycles >= classicThreshold) {
78977913
spikeCount++;
78987914
}
7899-
// Sleep to induce cache flushing
7900-
std::this_thread::sleep_for(std::chrono::microseconds(500));
7915+
// to induce cache flushing
7916+
constexpr size_t bufferSize = static_cast<size_t>(64 * 1024) * 1024;
7917+
volatile char* flushBuffer = new volatile char[bufferSize];
7918+
7919+
// better than thread sleeps
7920+
for (size_t j = 0; j < bufferSize; j += 64) {
7921+
flushBuffer[j] = static_cast<char>(j);
7922+
#if (x86 && (GCC || CLANG || MSVC))
7923+
_mm_clflush(const_cast<const void*>(
7924+
reinterpret_cast<const volatile void*>(&flushBuffer[j])));
7925+
#endif
7926+
}
7927+
7928+
delete[] flushBuffer;
79017929
}
79027930

79037931
#ifdef __VMAWARE_DEBUG__
@@ -8014,9 +8042,15 @@ struct VM {
80148042
QueryPerformanceCounter(&startQPC);
80158043
volatile int dummy = 0;
80168044
for (int i = 0; i < 100000; i++) {
8017-
dummy ^= i; // to prevent optimization
8045+
dummy ^= i;
8046+
#if (GCC || CLANG)
8047+
asm volatile("" ::: "memory"); // memory clobber
8048+
#elif (MSVC)
80188049
_ReadWriteBarrier();
8050+
_mm_mfence();
8051+
#endif
80198052
}
8053+
80208054
QueryPerformanceCounter(&endQPC);
80218055
LONGLONG dummyTime = endQPC.QuadPart - startQPC.QuadPart;
80228056

@@ -8038,33 +8072,57 @@ struct VM {
80388072
// Try reading the invariant TSC on two different cores to attempt to detect vCPU timers being shared
80398073
const bool tscSyncDetected = [&]() noexcept -> bool {
80408074
int tscIssueCount = 0;
8041-
unsigned long long tscCore1 = 0, tscCore2 = 0;
8075+
u64 tscCore1 = 0, tscCore2 = 0;
80428076
for (int i = 0; i < tscIterations; i++) {
8043-
__try {
8044-
unsigned int aux = 0;
8077+
unsigned int aux = 0;
8078+
8079+
try {
8080+
#if (WINDOWS)
80458081
DWORD_PTR oldAffinity = SetThreadAffinityMask(GetCurrentThread(), 1);
8046-
tscCore1 = __rdtscp(&aux); // the use of a serializing variant for the instruction stream is done on purpose
8082+
tscCore1 = __rdtscp(&aux);
80478083
SetThreadAffinityMask(GetCurrentThread(), 2);
80488084
tscCore2 = __rdtscp(&aux);
80498085
SetThreadAffinityMask(GetCurrentThread(), oldAffinity);
8086+
#elif (LINUX)
8087+
cpu_set_t origSet;
8088+
pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &origSet);
8089+
8090+
// Core 0
8091+
cpu_set_t set;
8092+
CPU_ZERO(&set);
8093+
CPU_SET(0, &set);
8094+
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &set);
8095+
tscCore1 = __rdtscp(&aux);
8096+
8097+
// Core 1
8098+
CPU_ZERO(&set);
8099+
CPU_SET(1, &set);
8100+
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &set);
8101+
tscCore2 = __rdtscp(&aux);
8102+
8103+
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &origSet);
8104+
#endif
80508105
}
8051-
__except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION
8052-
? EXCEPTION_EXECUTE_HANDLER
8053-
: EXCEPTION_CONTINUE_SEARCH) {
8054-
// RDTSCP is widely supported on real hardware, most likely VM
8106+
catch (...) {
80558107
tscIssueCount++;
80568108
continue;
80578109
}
8058-
if (std::llabs(static_cast<long long>(tscCore2 - tscCore1)) > tscSyncDiffThreshold) {
8110+
8111+
// hypervisors often have nearly identical TSCs across vCPUs
8112+
const u64 diff = (tscCore2 > tscCore1)
8113+
? (tscCore2 - tscCore1)
8114+
: (tscCore1 - tscCore2);
8115+
8116+
if (diff < tscSyncDiffThreshold) {
80598117
tscIssueCount++;
80608118
}
80618119
}
80628120
#ifdef __VMAWARE_DEBUG__
80638121
debug("TIMER: TSC sync check",
80648122
" - Core1: ", tscCore1,
80658123
" Core2: ", tscCore2,
8066-
" Diff: ", tscCore2 - tscCore1,
8067-
" (Threshold: ", tscSyncDiffThreshold,
8124+
" Delta: ", tscCore2 - tscCore1,
8125+
" (Threshold: <", tscSyncDiffThreshold,
80688126
')');
80698127
#endif
80708128
return (tscIssueCount >= tscIterations / 2);
@@ -8080,6 +8138,11 @@ struct VM {
80808138
return false;
80818139
#endif
80828140
}
8141+
#if defined(MSVC)
8142+
#pragma optimize("", on)
8143+
#elif defined(__GNUC__)
8144+
#pragma GCC pop_options
8145+
#endif
80838146

80848147

80858148
/**

0 commit comments

Comments
 (0)