@@ -1367,14 +1367,16 @@ func expandServiceDependencies(project *types.Project, services []string) []stri
13671367// dependencies are being recreated and that have restart: true in their
13681368// depends_on config.
13691369func addCascadingRestarts (project * types.Project , observed * ObservedState , plan * ReconciliationPlan , opts ReconcileOptions ) {
1370- // Collect services being recreated and map service name -> rename op IDs.
1371- // The rename op is the point at which the old container is fully replaced,
1372- // so dependent stops should wait for it.
1370+ // Collect services being recreated.
13731371 recreatedServiceRenameOps := map [string ][]string {}
1372+ recreatedServiceEntryOps := map [string ][]string {} // emit-recreate IDs (entry points of recreate chains)
13741373 for _ , op := range plan .Operations {
13751374 if op .Type == OpRenameContainer {
13761375 recreatedServiceRenameOps [op .ServiceName ] = append (recreatedServiceRenameOps [op .ServiceName ], op .ID )
13771376 }
1377+ if op .Type == OpEmitEvent && op .EventOp != nil && op .EventOp .Text == "Recreate" {
1378+ recreatedServiceEntryOps [op .ServiceName ] = append (recreatedServiceEntryOps [op .ServiceName ], op .ID )
1379+ }
13781380 }
13791381 if len (recreatedServiceRenameOps ) == 0 {
13801382 return
@@ -1383,88 +1385,101 @@ func addCascadingRestarts(project *types.Project, observed *ObservedState, plan
13831385 // For each service in the project, check if it depends on a recreated service with restart: true
13841386 for _ , service := range project .Services {
13851387 for depName , dep := range service .DependsOn {
1386- renameOps := recreatedServiceRenameOps [depName ]
1387- if ! dep .Restart || len (renameOps ) == 0 {
1388+ if ! dep .Restart || len (recreatedServiceRenameOps [depName ]) == 0 {
13881389 continue
13891390 }
1390- // This service's dependency is being recreated and has restart: true.
1391- // Add stop+start ops for its running containers (if not already being stopped).
13921391 for _ , ctr := range observed .Containers [service .Name ] {
1393- ctrName := getCanonicalContainerName (ctr )
1394- if _ , exists := plan .Operations ["stop-container:" + ctrName ]; exists {
1395- continue
1396- }
1397- if ctr .State != container .StateRunning {
1398- continue
1399- }
1400-
1401- eventName := "Container " + ctrName
1392+ addCascadingRestartOps (service , ctr , depName , recreatedServiceEntryOps [depName ], plan , opts )
1393+ }
1394+ break // Only need to process once per service
1395+ }
1396+ }
1397+ }
14021398
1403- // Emit Stopping event (depends on rename ops of the dependency)
1404- emitStoppingID := fmt .Sprintf ("emit-stopping:%s" , ctrName )
1405- stoppingDeps := make ([]string , len (renameOps ))
1406- copy (stoppingDeps , renameOps )
1407- plan .Operations [emitStoppingID ] = emitEventOp (emitStoppingID , service .Name , ctrName , eventName , api .Working , api .StatusStopping , stoppingDeps )
1399+ // addCascadingRestartOps adds stop+start operations for a single container
1400+ // of a dependent service whose dependency is being recreated.
1401+ func addCascadingRestartOps (
1402+ service types.ServiceConfig ,
1403+ ctr container.Summary ,
1404+ depName string ,
1405+ recreateEntryOps []string ,
1406+ plan * ReconciliationPlan ,
1407+ opts ReconcileOptions ,
1408+ ) {
1409+ ctrName := getCanonicalContainerName (ctr )
1410+ if _ , exists := plan .Operations ["stop-container:" + ctrName ]; exists {
1411+ return
1412+ }
1413+ if ctr .State != container .StateRunning {
1414+ return
1415+ }
14081416
1409- stopID := fmt .Sprintf ("stop-container:%s" , ctrName )
1410- plan .Operations [stopID ] = & Operation {
1411- ID : stopID ,
1412- Type : OpStopContainer ,
1413- ServiceName : service .Name ,
1414- Resource : ctrName ,
1415- ContainerOp : & ContainerOperation {
1416- Service : service ,
1417- ContainerName : ctrName ,
1418- Existing : & ctr ,
1419- Timeout : opts .Timeout ,
1420- },
1421- DependsOn : []string {emitStoppingID },
1422- Reason : fmt .Sprintf ("dependency %q is being recreated (restart: true)" , depName ),
1423- }
1417+ eventName := "Container " + ctrName
14241418
1425- emitStoppedID := fmt .Sprintf ("emit-stopped:%s" , ctrName )
1426- plan .Operations [emitStoppedID ] = emitEventOp (emitStoppedID , service .Name , ctrName , eventName , api .Done , api .StatusStopped , []string {stopID })
1419+ // Stop chain: emit-stopping → stop → emit-stopped.
1420+ // The stop runs BEFORE the dependency recreate chain starts: the
1421+ // recreate entry point (emit-recreate) is updated to depend on
1422+ // emit-stopped, ensuring the dependent is fully stopped before the
1423+ // dependency is recreated (e.g. fluentd logging driver flushing).
1424+ emitStoppingID := fmt .Sprintf ("emit-stopping:%s" , ctrName )
1425+ plan .Operations [emitStoppingID ] = emitEventOp (emitStoppingID , service .Name , ctrName , eventName , api .Working , api .StatusStopping , nil )
1426+
1427+ stopID := fmt .Sprintf ("stop-container:%s" , ctrName )
1428+ plan .Operations [stopID ] = & Operation {
1429+ ID : stopID ,
1430+ Type : OpStopContainer ,
1431+ ServiceName : service .Name ,
1432+ Resource : ctrName ,
1433+ ContainerOp : & ContainerOperation {
1434+ Service : service ,
1435+ ContainerName : ctrName ,
1436+ Existing : & ctr ,
1437+ Timeout : opts .Timeout ,
1438+ },
1439+ DependsOn : []string {emitStoppingID },
1440+ Reason : fmt .Sprintf ("dependency %q is being recreated (restart: true)" , depName ),
1441+ }
14271442
1428- // Emit Starting event
1429- emitStartingID := fmt . Sprintf ( "emit-starting:%s" , ctrName )
1443+ emitStoppedID := fmt . Sprintf ( "emit-stopped:%s" , ctrName )
1444+ plan . Operations [ emitStoppedID ] = emitEventOp ( emitStoppedID , service . Name , ctrName , eventName , api . Done , api . StatusStopped , [] string { stopID } )
14301445
1431- startID := fmt .Sprintf ("start-container:%s" , ctrName )
1432- if existingStart , exists := plan .Operations [startID ]; exists {
1433- // A start operation already exists (e.g. from network recreation).
1434- // Add our emit-stopped as a dependency of the existing emit-starting.
1435- if existingEmitStarting , ok := plan .Operations [emitStartingID ]; ok {
1436- if ! slices .Contains (existingEmitStarting .DependsOn , emitStoppedID ) {
1437- existingEmitStarting .DependsOn = append (existingEmitStarting .DependsOn , emitStoppedID )
1438- }
1439- } else {
1440- // No emit-starting exists yet; add stop as dep to start directly.
1441- if ! slices .Contains (existingStart .DependsOn , stopID ) {
1442- existingStart .DependsOn = append (existingStart .DependsOn , stopID )
1443- }
1444- }
1445- } else {
1446- plan .Operations [emitStartingID ] = emitEventOp (emitStartingID , service .Name , ctrName , eventName , api .Working , api .StatusStarting , []string {emitStoppedID })
1447-
1448- plan .Operations [startID ] = & Operation {
1449- ID : startID ,
1450- Type : OpStartContainer ,
1451- ServiceName : service .Name ,
1452- Resource : ctrName ,
1453- ContainerOp : & ContainerOperation {
1454- Service : service ,
1455- ContainerName : ctrName ,
1456- Existing : & ctr ,
1457- },
1458- DependsOn : []string {emitStartingID },
1459- Reason : fmt .Sprintf ("restart after dependency %q recreated" , depName ),
1460- }
1446+ // Block the dependency's recreate chain until the dependent is stopped.
1447+ for _ , entryID := range recreateEntryOps {
1448+ if entryOp , ok := plan .Operations [entryID ]; ok {
1449+ if ! slices .Contains (entryOp .DependsOn , emitStoppedID ) {
1450+ entryOp .DependsOn = append (entryOp .DependsOn , emitStoppedID )
1451+ }
1452+ }
1453+ }
14611454
1462- emitStartedID := fmt .Sprintf ("emit-started:%s" , ctrName )
1463- plan .Operations [emitStartedID ] = emitEventOp (emitStartedID , service .Name , ctrName , eventName , api .Done , api .StatusStarted , []string {startID })
1464- }
1455+ // Start chain: merge with existing start or create new one.
1456+ emitStartingID := fmt .Sprintf ("emit-starting:%s" , ctrName )
1457+ startID := fmt .Sprintf ("start-container:%s" , ctrName )
1458+ if existingStart , exists := plan .Operations [startID ]; exists {
1459+ if existingEmitStarting , ok := plan .Operations [emitStartingID ]; ok {
1460+ if ! slices .Contains (existingEmitStarting .DependsOn , emitStoppedID ) {
1461+ existingEmitStarting .DependsOn = append (existingEmitStarting .DependsOn , emitStoppedID )
14651462 }
1466- break // Only need to process once per service
1463+ } else if ! slices .Contains (existingStart .DependsOn , stopID ) {
1464+ existingStart .DependsOn = append (existingStart .DependsOn , stopID )
1465+ }
1466+ } else {
1467+ plan .Operations [emitStartingID ] = emitEventOp (emitStartingID , service .Name , ctrName , eventName , api .Working , api .StatusStarting , []string {emitStoppedID })
1468+ plan .Operations [startID ] = & Operation {
1469+ ID : startID ,
1470+ Type : OpStartContainer ,
1471+ ServiceName : service .Name ,
1472+ Resource : ctrName ,
1473+ ContainerOp : & ContainerOperation {
1474+ Service : service ,
1475+ ContainerName : ctrName ,
1476+ Existing : & ctr ,
1477+ },
1478+ DependsOn : []string {emitStartingID },
1479+ Reason : fmt .Sprintf ("restart after dependency %q recreated" , depName ),
14671480 }
1481+ emitStartedID := fmt .Sprintf ("emit-started:%s" , ctrName )
1482+ plan .Operations [emitStartedID ] = emitEventOp (emitStartedID , service .Name , ctrName , eventName , api .Done , api .StatusStarted , []string {startID })
14681483 }
14691484}
14701485
0 commit comments