@@ -69,6 +69,12 @@ public class KvmVmSyncPingTask extends VmTracer implements KVMPingAgentNoFailure
6969 private List <Class > skipVmTracerReplies = new ArrayList <>();
7070 private Map <String , Integer > vmInShutdownMap = new ConcurrentHashMap <>();
7171
72+ // Orphaned skip entries from departed MN nodes. Key=vmUuid, Value=timestamp when orphaned.
73+ // These VMs remain in skip-trace state for ORPHAN_TTL_MS to avoid false HA triggers
74+ // when a MN restarts and its in-flight VM operations haven't completed yet. See ZSTAC-80821.
75+ private final ConcurrentHashMap <String , Long > orphanedSkipVms = new ConcurrentHashMap <>();
76+ private static final long ORPHAN_TTL_MS = 10 * 60 * 1000 ; // 10 minutes
77+
7278 {
7379 getReflections ().getTypesAnnotatedWith (SkipVmTracer .class ).forEach (clz -> {
7480 skipVmTracerMessages .add (clz .asSubclass (Message .class ));
@@ -196,8 +202,13 @@ private void syncVm(final HostInventory host, final Completion completion) {
196202 // Get vms to skip before send command to host to confirm the vm will be skipped after sync command finished.
197203 // The problem is if one vm-sync skipped operation is started and finished during vm sync command's handling
198204 // vm state would still be sync to mn
205+ // ZSTAC-80821: clean up expired orphaned entries each sync cycle
206+ cleanupExpiredOrphanedSkipVms ();
207+
199208 Set <String > vmsToSkipSetHostSide = new HashSet <>();
200209 vmsToSkip .values ().forEach (vmsToSkipSetHostSide ::addAll );
210+ // ZSTAC-80821: also skip VMs from departed MN nodes that are still within TTL
211+ vmsToSkipSetHostSide .addAll (orphanedSkipVms .keySet ());
201212
202213 // if the vm is not running on host when sync command executing but started as soon as possible
203214 // before response handling of vm sync, mgmtSideStates will including the running vm but not result in
@@ -228,6 +239,8 @@ public void run(MessageReply reply) {
228239
229240 // Get vms to skip after sync result returned.
230241 vmsToSkip .values ().forEach (vmsToSkipSetHostSide ::addAll );
242+ // ZSTAC-80821: include orphaned entries from departed MN nodes
243+ vmsToSkipSetHostSide .addAll (orphanedSkipVms .keySet ());
231244
232245 Collection <String > vmUuidsInDeleteVmGC = DeleteVmGC .queryVmInGC (host .getUuid (), ret .getStates ().keySet ());
233246
@@ -446,7 +459,19 @@ public void nodeJoin(ManagementNodeInventory inv) {
446459 @ Override
447460 public void nodeLeft (ManagementNodeInventory inv ) {
448461 vmApis .remove (inv .getUuid ());
449- vmsToSkip .remove (inv .getUuid ());
462+
463+ // ZSTAC-80821: Instead of immediately removing skip list entries, move them
464+ // to the orphaned set with a TTL. This prevents false HA triggers for VMs that
465+ // are still being started by kvmagent but whose controlling MN has restarted.
466+ Set <String > skippedVms = vmsToSkip .remove (inv .getUuid ());
467+ if (skippedVms != null && !skippedVms .isEmpty ()) {
468+ long now = System .currentTimeMillis ();
469+ for (String vmUuid : skippedVms ) {
470+ orphanedSkipVms .put (vmUuid , now );
471+ logger .info (String .format ("moved VM[uuid:%s] from departed MN[uuid:%s] skip list to orphaned set" +
472+ " (will expire in %d minutes)" , vmUuid , inv .getUuid (), ORPHAN_TTL_MS / 60000 ));
473+ }
474+ }
450475 }
451476
452477 @ Override
@@ -460,6 +485,41 @@ public void iJoin(ManagementNodeInventory inv) {
460485 }
461486
462487 public boolean isVmDoNotNeedToTrace (String vmUuid ) {
463- return vmsToSkip .values ().stream ().anyMatch (vmsToSkipSet -> vmsToSkipSet .contains (vmUuid ));
488+ if (vmsToSkip .values ().stream ().anyMatch (vmsToSkipSet -> vmsToSkipSet .contains (vmUuid ))) {
489+ return true ;
490+ }
491+
492+ // ZSTAC-80821: Also check orphaned skip entries from departed MN nodes
493+ Long orphanedAt = orphanedSkipVms .get (vmUuid );
494+ if (orphanedAt != null ) {
495+ if (System .currentTimeMillis () - orphanedAt < ORPHAN_TTL_MS ) {
496+ logger .debug (String .format ("VM[uuid:%s] is in orphaned skip set, skipping trace" , vmUuid ));
497+ return true ;
498+ } else {
499+ // Expired, clean up
500+ orphanedSkipVms .remove (vmUuid );
501+ logger .info (String .format ("orphaned skip entry for VM[uuid:%s] expired after %d minutes, resuming trace" ,
502+ vmUuid , ORPHAN_TTL_MS / 60000 ));
503+ }
504+ }
505+
506+ return false ;
507+ }
508+
509+ // Periodically clean up expired orphaned entries. Called from VM sync cycle.
510+ private void cleanupExpiredOrphanedSkipVms () {
511+ if (orphanedSkipVms .isEmpty ()) {
512+ return ;
513+ }
514+
515+ long now = System .currentTimeMillis ();
516+ Iterator <Map .Entry <String , Long >> it = orphanedSkipVms .entrySet ().iterator ();
517+ while (it .hasNext ()) {
518+ Map .Entry <String , Long > entry = it .next ();
519+ if (now - entry .getValue () >= ORPHAN_TTL_MS ) {
520+ it .remove ();
521+ logger .info (String .format ("cleaned up expired orphaned skip entry for VM[uuid:%s]" , entry .getKey ()));
522+ }
523+ }
464524 }
465525}
0 commit comments