Skip to content

Commit bec4623

Browse files
committed
<fix>[kvm]: configurable orphan skip timeout
Resolves: ZSTAC-80821 Change-Id: Ia9a9597feceb96b3e6e22259e2d0be7bde8ae499
1 parent 7a6d5d7 commit bec4623

2 files changed

Lines changed: 13 additions & 6 deletions

File tree

plugin/kvm/src/main/java/org/zstack/kvm/KVMGlobalConfig.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ public class KVMGlobalConfig {
120120
@GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "enable install host shutdown hook")
121121
public static GlobalConfig INSTALL_HOST_SHUTDOWN_HOOK = new GlobalConfig(CATEGORY, "install.host.shutdown.hook");
122122

123+
@GlobalConfigValidation(numberGreaterThan = 0)
124+
@GlobalConfigDef(defaultValue = "600", type = Long.class, description = "timeout in seconds for orphaned VM skip entries from departed management nodes")
125+
public static GlobalConfig ORPHANED_VM_SKIP_TIMEOUT = new GlobalConfig(CATEGORY, "vm.orphanedSkipTimeout");
126+
123127
@GlobalConfigValidation(validValues = {"true", "false"})
124128
@GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "enable memory auto balloon")
125129
@BindResourceConfig({VmInstanceVO.class, HostVO.class, ClusterVO.class})

plugin/kvm/src/main/java/org/zstack/kvm/KvmVmSyncPingTask.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,13 @@ public class KvmVmSyncPingTask extends VmTracer implements KVMPingAgentNoFailure
7070
private Map<String, Integer> vmInShutdownMap = new ConcurrentHashMap<>();
7171

7272
// Orphaned skip entries from departed MN nodes. Key=vmUuid, Value=timestamp when orphaned.
73-
// These VMs remain in skip-trace state for ORPHAN_TTL_MS to avoid false HA triggers
73+
// These VMs remain in skip-trace state to avoid false HA triggers
7474
// when a MN restarts and its in-flight VM operations haven't completed yet. See ZSTAC-80821.
7575
private final ConcurrentHashMap<String, Long> orphanedSkipVms = new ConcurrentHashMap<>();
76-
private static final long ORPHAN_TTL_MS = 10 * 60 * 1000; // 10 minutes
76+
77+
private long getOrphanTtlMs() {
78+
return KVMGlobalConfig.ORPHANED_VM_SKIP_TIMEOUT.value(Long.class) * 1000;
79+
}
7780

7881
{
7982
getReflections().getTypesAnnotatedWith(SkipVmTracer.class).forEach(clz -> {
@@ -469,7 +472,7 @@ public void nodeLeft(ManagementNodeInventory inv) {
469472
for (String vmUuid : skippedVms) {
470473
orphanedSkipVms.put(vmUuid, now);
471474
logger.info(String.format("moved VM[uuid:%s] from departed MN[uuid:%s] skip list to orphaned set" +
472-
" (will expire in %d minutes)", vmUuid, inv.getUuid(), ORPHAN_TTL_MS / 60000));
475+
" (will expire in %d minutes)", vmUuid, inv.getUuid(), getOrphanTtlMs() / 60000));
473476
}
474477
}
475478
}
@@ -492,14 +495,14 @@ public boolean isVmDoNotNeedToTrace(String vmUuid) {
492495
// ZSTAC-80821: Also check orphaned skip entries from departed MN nodes
493496
Long orphanedAt = orphanedSkipVms.get(vmUuid);
494497
if (orphanedAt != null) {
495-
if (System.currentTimeMillis() - orphanedAt < ORPHAN_TTL_MS) {
498+
if (System.currentTimeMillis() - orphanedAt < getOrphanTtlMs()) {
496499
logger.debug(String.format("VM[uuid:%s] is in orphaned skip set, skipping trace", vmUuid));
497500
return true;
498501
} else {
499502
// Expired, clean up
500503
orphanedSkipVms.remove(vmUuid, orphanedAt);
501504
logger.info(String.format("orphaned skip entry for VM[uuid:%s] expired after %d minutes, resuming trace",
502-
vmUuid, ORPHAN_TTL_MS / 60000));
505+
vmUuid, getOrphanTtlMs() / 60000));
503506
}
504507
}
505508

@@ -514,7 +517,7 @@ private void cleanupExpiredOrphanedSkipVms() {
514517

515518
long now = System.currentTimeMillis();
516519
for (Map.Entry<String, Long> entry : orphanedSkipVms.entrySet()) {
517-
if (now - entry.getValue() >= ORPHAN_TTL_MS) {
520+
if (now - entry.getValue() >= getOrphanTtlMs()) {
518521
orphanedSkipVms.remove(entry.getKey(), entry.getValue());
519522
logger.info(String.format("cleaned up expired orphaned skip entry for VM[uuid:%s]", entry.getKey()));
520523
}

0 commit comments

Comments
 (0)