@@ -69,6 +69,15 @@ public class KvmVmSyncPingTask extends VmTracer implements KVMPingAgentNoFailure
6969 private List <Class > skipVmTracerReplies = new ArrayList <>();
7070 private Map <String , Integer > vmInShutdownMap = new ConcurrentHashMap <>();
7171
72+ // Orphaned skip entries from departed MN nodes. Key=vmUuid, Value=timestamp when orphaned.
73+ // These VMs remain in skip-trace state to avoid false HA triggers
74+ // when a MN restarts and its in-flight VM operations haven't completed yet. See ZSTAC-80821.
75+ private final ConcurrentHashMap <String , Long > orphanedSkipVms = new ConcurrentHashMap <>();
76+
77+ private long getOrphanTtlMs () {
78+ return KVMGlobalConfig .ORPHANED_VM_SKIP_TIMEOUT .value (Long .class ) * 1000 ;
79+ }
80+
7281 {
7382 getReflections ().getTypesAnnotatedWith (SkipVmTracer .class ).forEach (clz -> {
7483 skipVmTracerMessages .add (clz .asSubclass (Message .class ));
@@ -196,8 +205,13 @@ private void syncVm(final HostInventory host, final Completion completion) {
196205 // Get vms to skip before send command to host to confirm the vm will be skipped after sync command finished.
197206 // The problem is if one vm-sync skipped operation is started and finished during vm sync command's handling
198207 // vm state would still be sync to mn
208+ // ZSTAC-80821: clean up expired orphaned entries each sync cycle
209+ cleanupExpiredOrphanedSkipVms ();
210+
199211 Set <String > vmsToSkipSetHostSide = new HashSet <>();
200212 vmsToSkip .values ().forEach (vmsToSkipSetHostSide ::addAll );
213+ // ZSTAC-80821: also skip VMs from departed MN nodes that are still within TTL
214+ vmsToSkipSetHostSide .addAll (orphanedSkipVms .keySet ());
201215
202216 // if the vm is not running on host when sync command executing but started as soon as possible
203217 // before response handling of vm sync, mgmtSideStates will including the running vm but not result in
@@ -228,6 +242,8 @@ public void run(MessageReply reply) {
228242
229243 // Get vms to skip after sync result returned.
230244 vmsToSkip .values ().forEach (vmsToSkipSetHostSide ::addAll );
245+ // ZSTAC-80821: include orphaned entries from departed MN nodes
246+ vmsToSkipSetHostSide .addAll (orphanedSkipVms .keySet ());
231247
232248 Collection <String > vmUuidsInDeleteVmGC = DeleteVmGC .queryVmInGC (host .getUuid (), ret .getStates ().keySet ());
233249
@@ -446,7 +462,19 @@ public void nodeJoin(ManagementNodeInventory inv) {
446462 @ Override
447463 public void nodeLeft (ManagementNodeInventory inv ) {
448464 vmApis .remove (inv .getUuid ());
449- vmsToSkip .remove (inv .getUuid ());
465+
466+ // ZSTAC-80821: Instead of immediately removing skip list entries, move them
467+ // to the orphaned set with a TTL. This prevents false HA triggers for VMs that
468+ // are still being started by kvmagent but whose controlling MN has restarted.
469+ Set <String > skippedVms = vmsToSkip .remove (inv .getUuid ());
470+ if (skippedVms != null && !skippedVms .isEmpty ()) {
471+ long now = System .currentTimeMillis ();
472+ for (String vmUuid : skippedVms ) {
473+ orphanedSkipVms .put (vmUuid , now );
474+ logger .info (String .format ("moved VM[uuid:%s] from departed MN[uuid:%s] skip list to orphaned set" +
475+ " (will expire in %d minutes)" , vmUuid , inv .getUuid (), getOrphanTtlMs () / 60000 ));
476+ }
477+ }
450478 }
451479
452480 @ Override
@@ -460,6 +488,39 @@ public void iJoin(ManagementNodeInventory inv) {
460488 }
461489
462490 public boolean isVmDoNotNeedToTrace (String vmUuid ) {
463- return vmsToSkip .values ().stream ().anyMatch (vmsToSkipSet -> vmsToSkipSet .contains (vmUuid ));
491+ if (vmsToSkip .values ().stream ().anyMatch (vmsToSkipSet -> vmsToSkipSet .contains (vmUuid ))) {
492+ return true ;
493+ }
494+
495+ // ZSTAC-80821: Also check orphaned skip entries from departed MN nodes
496+ Long orphanedAt = orphanedSkipVms .get (vmUuid );
497+ if (orphanedAt != null ) {
498+ if (System .currentTimeMillis () - orphanedAt < getOrphanTtlMs ()) {
499+ logger .debug (String .format ("VM[uuid:%s] is in orphaned skip set, skipping trace" , vmUuid ));
500+ return true ;
501+ } else {
502+ // Expired, clean up
503+ orphanedSkipVms .remove (vmUuid , orphanedAt );
504+ logger .info (String .format ("orphaned skip entry for VM[uuid:%s] expired after %d minutes, resuming trace" ,
505+ vmUuid , getOrphanTtlMs () / 60000 ));
506+ }
507+ }
508+
509+ return false ;
510+ }
511+
512+ // Periodically clean up expired orphaned entries. Called from VM sync cycle.
513+ private void cleanupExpiredOrphanedSkipVms () {
514+ if (orphanedSkipVms .isEmpty ()) {
515+ return ;
516+ }
517+
518+ long now = System .currentTimeMillis ();
519+ for (Map .Entry <String , Long > entry : orphanedSkipVms .entrySet ()) {
520+ if (now - entry .getValue () >= getOrphanTtlMs ()) {
521+ orphanedSkipVms .remove (entry .getKey (), entry .getValue ());
522+ logger .info (String .format ("cleaned up expired orphaned skip entry for VM[uuid:%s]" , entry .getKey ()));
523+ }
524+ }
464525 }
465526}
0 commit comments