@@ -73,6 +73,13 @@ class EndpointLifecycleManager {
7373 */
7474 private static final int MAX_TRANSIENT_FAILURE_COUNT = 3 ;
7575
76+ private enum EvictionReason {
77+ TRANSIENT_FAILURE ,
78+ SHUTDOWN ,
79+ IDLE ,
80+ STALE
81+ }
82+
7683 /** Per-endpoint lifecycle state. */
7784 static final class EndpointState {
7885 final String address ;
@@ -95,6 +102,7 @@ static final class EndpointState {
95102
96103 private final ChannelEndpointCache endpointCache ;
97104 private final Map <String , EndpointState > endpoints = new ConcurrentHashMap <>();
105+ private final Set <String > transientFailureEvictedAddresses = ConcurrentHashMap .newKeySet ();
98106
99107 /**
100108 * Active addresses reported by each ChannelFinder, keyed by database id.
@@ -103,8 +111,8 @@ static final class EndpointState {
103111 * stable database-id key instead of a strong ChannelFinder reference. KeyAwareChannel unregisters
104112 * stale entries when a finder is cleared.
105113 *
106- * <p>All reads and writes to this map, and stale-endpoint eviction based on it, are synchronized
107- * on {@link #activeAddressLock}.
114+ * <p>All reads and writes to this map, and all updates to {@link
115+ * #transientFailureEvictedAddresses}, are synchronized on {@link #activeAddressLock}.
108116 */
109117 private final Map <String , Set <String >> activeAddressesPerFinder = new ConcurrentHashMap <>();
110118
@@ -187,6 +195,24 @@ private boolean ensureEndpointExists(String address) {
187195 return created [0 ];
188196 }
189197
198+ private void retainTransientFailureEvictionMarkers (Set <String > activeAddresses ) {
199+ synchronized (activeAddressLock ) {
200+ transientFailureEvictedAddresses .retainAll (activeAddresses );
201+ }
202+ }
203+
204+ private void markTransientFailureEvicted (String address ) {
205+ synchronized (activeAddressLock ) {
206+ transientFailureEvictedAddresses .add (address );
207+ }
208+ }
209+
210+ private void clearTransientFailureEvictionMarker (String address ) {
211+ synchronized (activeAddressLock ) {
212+ transientFailureEvictedAddresses .remove (address );
213+ }
214+ }
215+
190216 /**
191217 * Records that real (non-probe) traffic was routed to an endpoint. This refreshes the idle
192218 * eviction timer for this endpoint.
@@ -235,6 +261,7 @@ void updateActiveAddresses(String finderKey, Set<String> activeAddresses) {
235261 for (Set <String > addresses : activeAddressesPerFinder .values ()) {
236262 allActive .addAll (addresses );
237263 }
264+ retainTransientFailureEvictionMarkers (allActive );
238265
239266 // Evict managed endpoints not referenced by any finder.
240267 List <String > stale = new ArrayList <>();
@@ -276,6 +303,7 @@ void unregisterFinder(String finderKey) {
276303 for (Set <String > addresses : activeAddressesPerFinder .values ()) {
277304 allActive .addAll (addresses );
278305 }
306+ retainTransientFailureEvictionMarkers (allActive );
279307
280308 List <String > stale = new ArrayList <>();
281309 for (String address : endpoints .keySet ()) {
@@ -412,6 +440,7 @@ private void probe(String address) {
412440 case READY :
413441 state .lastReadyAt = clock .instant ();
414442 state .consecutiveTransientFailures = 0 ;
443+ clearTransientFailureEvictionMarker (address );
415444 break ;
416445
417446 case IDLE :
@@ -439,13 +468,13 @@ private void probe(String address) {
439468 Level .FINE ,
440469 "Evicting endpoint {0}: {1} consecutive TRANSIENT_FAILURE probes" ,
441470 new Object [] {address , state .consecutiveTransientFailures });
442- evictEndpoint (address );
471+ evictEndpoint (address , EvictionReason . TRANSIENT_FAILURE );
443472 }
444473 break ;
445474
446475 case SHUTDOWN :
447476 logger .log (Level .FINE , "Probe for {0}: channel SHUTDOWN, evicting endpoint" , address );
448- evictEndpoint (address );
477+ evictEndpoint (address , EvictionReason . SHUTDOWN );
449478 break ;
450479
451480 default :
@@ -482,16 +511,26 @@ void checkIdleEviction() {
482511 }
483512
484513 for (String address : toEvict ) {
485- evictEndpoint (address );
514+ evictEndpoint (address , EvictionReason . IDLE );
486515 }
487516 }
488517
489518 /** Evicts an endpoint: stops probing, removes from tracking, shuts down the channel. */
490519 private void evictEndpoint (String address ) {
520+ evictEndpoint (address , EvictionReason .STALE );
521+ }
522+
523+ /** Evicts an endpoint and records whether it should still be reported as unhealthy. */
524+ private void evictEndpoint (String address , EvictionReason reason ) {
491525 logger .log (Level .FINE , "Evicting endpoint {0}" , address );
492526
493527 stopProbing (address );
494528 endpoints .remove (address );
529+ if (reason == EvictionReason .TRANSIENT_FAILURE ) {
530+ markTransientFailureEvicted (address );
531+ } else {
532+ clearTransientFailureEvictionMarker (address );
533+ }
495534 endpointCache .evict (address );
496535 }
497536
@@ -526,6 +565,10 @@ boolean isManaged(String address) {
526565 return endpoints .containsKey (address );
527566 }
528567
568+ boolean wasRecentlyEvictedTransientFailure (String address ) {
569+ return transientFailureEvictedAddresses .contains (address );
570+ }
571+
529572 /** Returns the endpoint state for testing. */
530573 @ VisibleForTesting
531574 EndpointState getEndpointState (String address ) {
@@ -558,6 +601,7 @@ void shutdown() {
558601 }
559602 }
560603 endpoints .clear ();
604+ transientFailureEvictedAddresses .clear ();
561605
562606 scheduler .shutdown ();
563607 try {
0 commit comments