Skip to content

Commit 6014606

Browse files
author
gitlab
committed
Merge branch 'fix/ZSTAC-80821@@2' into '5.5.12'
<fix>[kvm]: configurable orphan skip timeout See merge request zstackio/zstack!9203
2 parents f55f04a + bec4623 commit 6014606

2 files changed

Lines changed: 67 additions & 2 deletions

File tree

plugin/kvm/src/main/java/org/zstack/kvm/KVMGlobalConfig.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,10 @@ public class KVMGlobalConfig {
120120
@GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "enable install host shutdown hook")
121121
public static GlobalConfig INSTALL_HOST_SHUTDOWN_HOOK = new GlobalConfig(CATEGORY, "install.host.shutdown.hook");
122122

123+
@GlobalConfigValidation(numberGreaterThan = 0)
124+
@GlobalConfigDef(defaultValue = "600", type = Long.class, description = "timeout in seconds for orphaned VM skip entries from departed management nodes")
125+
public static GlobalConfig ORPHANED_VM_SKIP_TIMEOUT = new GlobalConfig(CATEGORY, "vm.orphanedSkipTimeout");
126+
123127
@GlobalConfigValidation(validValues = {"true", "false"})
124128
@GlobalConfigDef(defaultValue = "false", type = Boolean.class, description = "enable memory auto balloon")
125129
@BindResourceConfig({VmInstanceVO.class, HostVO.class, ClusterVO.class})

plugin/kvm/src/main/java/org/zstack/kvm/KvmVmSyncPingTask.java

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,15 @@ public class KvmVmSyncPingTask extends VmTracer implements KVMPingAgentNoFailure
6969
private List<Class> skipVmTracerReplies = new ArrayList<>();
7070
private Map<String, Integer> vmInShutdownMap = new ConcurrentHashMap<>();
7171

72+
// Orphaned skip entries from departed MN nodes. Key=vmUuid, Value=timestamp when orphaned.
73+
// These VMs remain in skip-trace state to avoid false HA triggers
74+
// when a MN restarts and its in-flight VM operations haven't completed yet. See ZSTAC-80821.
75+
private final ConcurrentHashMap<String, Long> orphanedSkipVms = new ConcurrentHashMap<>();
76+
77+
private long getOrphanTtlMs() {
78+
return KVMGlobalConfig.ORPHANED_VM_SKIP_TIMEOUT.value(Long.class) * 1000;
79+
}
80+
7281
{
7382
getReflections().getTypesAnnotatedWith(SkipVmTracer.class).forEach(clz -> {
7483
skipVmTracerMessages.add(clz.asSubclass(Message.class));
@@ -196,8 +205,13 @@ private void syncVm(final HostInventory host, final Completion completion) {
196205
// Get vms to skip before send command to host to confirm the vm will be skipped after sync command finished.
197206
// The problem is if one vm-sync skipped operation is started and finished during vm sync command's handling
198207
// vm state would still be sync to mn
208+
// ZSTAC-80821: clean up expired orphaned entries each sync cycle
209+
cleanupExpiredOrphanedSkipVms();
210+
199211
Set<String> vmsToSkipSetHostSide = new HashSet<>();
200212
vmsToSkip.values().forEach(vmsToSkipSetHostSide::addAll);
213+
// ZSTAC-80821: also skip VMs from departed MN nodes that are still within TTL
214+
vmsToSkipSetHostSide.addAll(orphanedSkipVms.keySet());
201215

202216
// if the vm is not running on host when sync command executing but started as soon as possible
203217
// before response handling of vm sync, mgmtSideStates will including the running vm but not result in
@@ -228,6 +242,8 @@ public void run(MessageReply reply) {
228242

229243
// Get vms to skip after sync result returned.
230244
vmsToSkip.values().forEach(vmsToSkipSetHostSide::addAll);
245+
// ZSTAC-80821: include orphaned entries from departed MN nodes
246+
vmsToSkipSetHostSide.addAll(orphanedSkipVms.keySet());
231247

232248
Collection<String> vmUuidsInDeleteVmGC = DeleteVmGC.queryVmInGC(host.getUuid(), ret.getStates().keySet());
233249

@@ -446,7 +462,19 @@ public void nodeJoin(ManagementNodeInventory inv) {
446462
@Override
447463
public void nodeLeft(ManagementNodeInventory inv) {
448464
vmApis.remove(inv.getUuid());
449-
vmsToSkip.remove(inv.getUuid());
465+
466+
// ZSTAC-80821: Instead of immediately removing skip list entries, move them
467+
// to the orphaned set with a TTL. This prevents false HA triggers for VMs that
468+
// are still being started by kvmagent but whose controlling MN has restarted.
469+
Set<String> skippedVms = vmsToSkip.remove(inv.getUuid());
470+
if (skippedVms != null && !skippedVms.isEmpty()) {
471+
long now = System.currentTimeMillis();
472+
for (String vmUuid : skippedVms) {
473+
orphanedSkipVms.put(vmUuid, now);
474+
logger.info(String.format("moved VM[uuid:%s] from departed MN[uuid:%s] skip list to orphaned set" +
475+
" (will expire in %d minutes)", vmUuid, inv.getUuid(), getOrphanTtlMs() / 60000));
476+
}
477+
}
450478
}
451479

452480
@Override
@@ -460,6 +488,39 @@ public void iJoin(ManagementNodeInventory inv) {
460488
}
461489

462490
public boolean isVmDoNotNeedToTrace(String vmUuid) {
463-
return vmsToSkip.values().stream().anyMatch(vmsToSkipSet -> vmsToSkipSet.contains(vmUuid));
491+
if (vmsToSkip.values().stream().anyMatch(vmsToSkipSet -> vmsToSkipSet.contains(vmUuid))) {
492+
return true;
493+
}
494+
495+
// ZSTAC-80821: Also check orphaned skip entries from departed MN nodes
496+
Long orphanedAt = orphanedSkipVms.get(vmUuid);
497+
if (orphanedAt != null) {
498+
if (System.currentTimeMillis() - orphanedAt < getOrphanTtlMs()) {
499+
logger.debug(String.format("VM[uuid:%s] is in orphaned skip set, skipping trace", vmUuid));
500+
return true;
501+
} else {
502+
// Expired, clean up
503+
orphanedSkipVms.remove(vmUuid, orphanedAt);
504+
logger.info(String.format("orphaned skip entry for VM[uuid:%s] expired after %d minutes, resuming trace",
505+
vmUuid, getOrphanTtlMs() / 60000));
506+
}
507+
}
508+
509+
return false;
510+
}
511+
512+
// Periodically clean up expired orphaned entries. Called from VM sync cycle.
513+
private void cleanupExpiredOrphanedSkipVms() {
514+
if (orphanedSkipVms.isEmpty()) {
515+
return;
516+
}
517+
518+
long now = System.currentTimeMillis();
519+
for (Map.Entry<String, Long> entry : orphanedSkipVms.entrySet()) {
520+
if (now - entry.getValue() >= getOrphanTtlMs()) {
521+
orphanedSkipVms.remove(entry.getKey(), entry.getValue());
522+
logger.info(String.format("cleaned up expired orphaned skip entry for VM[uuid:%s]", entry.getKey()));
523+
}
524+
}
464525
}
465526
}

0 commit comments

Comments
 (0)