@@ -188,9 +188,11 @@ func newHcsTask(
188188 ownsParent bool ,
189189 req * task.CreateTaskRequest ,
190190 s * specs.Spec ) (_ shimTask , err error ) {
191- log .G (ctx ).WithFields (logrus.Fields {
192- "tid" : req .ID ,
193- "ownsParent" : ownsParent ,
191+ ctx , entry := log .SetEntry (ctx , logrus.Fields {logfields .TaskID : req .ID })
192+ entry .WithFields (logrus.Fields {
193+ "hasHost" : parent != nil ,
194+ "isVirtualPod" : s .Annotations != nil && s .Annotations [annotations .VirtualPodID ] != "" ,
195+ "ownsHost" : ownsParent ,
194196 }).Debug ("newHcsTask" )
195197
196198 owner := filepath .Base (os .Args [0 ])
@@ -495,22 +497,30 @@ func (ht *hcsTask) DeleteExec(ctx context.Context, eid string) (int, uint32, tim
495497 }
496498
497499 if eid == "" {
500+ entry := log .G (ctx ).WithFields (logrus.Fields {
501+ logfields .TaskID : ht .id ,
502+ logfields .ExecID : eid ,
503+ })
504+
498505 // We are killing the init task, so we expect the container to be
499506 // stopped after this.
500507 //
501508 // The task process may have already exited, and the status set to
502509 // shimExecStateExited, but resources may still be in the process
503510 // of being cleaned up. Wait for ht.closed to be closed. This signals
504- // that waitInitExit() has finished destroying container resources,
505- // and layers were umounted.
511+ // that [hcsTask. waitInitExit]/[hcsTask.waitForHostExit] has finished destroying
512+ // container resources and that layers were umounted.
506513 // If the shim exits before resources are cleaned up, those resources
507514 // will remain locked and untracked, which leads to lingering sandboxes
508515 // and container resources like base vhdx.
516+ const timeout = 30 * time .Second
517+ entry .WithField (logfields .Timeout , timeout ).Trace ("waiting for task to be closed" )
509518 select {
510- case <- time .After (30 * time . Second ):
511- log . G ( ctx ).Error ("timed out waiting for resource cleanup " )
519+ case <- time .After (timeout ):
520+ entry . WithField ( logfields . Timeout , timeout ).Error ("timed out waiting for task to close while deleting init exec " )
512521 return 0 , 0 , time.Time {}, errors .Wrap (hcs .ErrTimeout , "waiting for container resource cleanup" )
513522 case <- ht .closed :
523+ entry .Trace ("received task close signal" )
514524 }
515525
516526 // The init task has now exited. A ForceExit() has already been sent to
@@ -608,7 +618,10 @@ func (ht *hcsTask) Wait() *task.StateResponse {
608618func (ht * hcsTask ) waitInitExit () {
609619 ctx , span := oc .StartSpan (context .Background (), "hcsTask::waitInitExit" )
610620 defer span .End ()
611- span .AddAttributes (trace .StringAttribute ("tid" , ht .id ))
621+ span .AddAttributes (
622+ trace .StringAttribute ("tid" , ht .id ),
623+ trace .BoolAttribute ("host" , ht .host != nil ),
624+ trace .BoolAttribute ("ownsHost" , ht .ownsHost ))
612625
613626 // Wait for it to exit on its own
614627 ht .init .Wait ()
@@ -627,7 +640,10 @@ func (ht *hcsTask) waitInitExit() {
627640func (ht * hcsTask ) waitForHostExit () {
628641 ctx , span := oc .StartSpan (context .Background (), "hcsTask::waitForHostExit" )
629642 defer span .End ()
630- span .AddAttributes (trace .StringAttribute ("tid" , ht .id ))
643+ span .AddAttributes (
644+ trace .StringAttribute ("tid" , ht .id ),
645+ trace .BoolAttribute ("host" , ht .host != nil ),
646+ trace .BoolAttribute ("ownsHost" , ht .ownsHost ))
631647
632648 err := ht .host .WaitCtx (ctx )
633649 if err != nil {
@@ -655,13 +671,16 @@ func (ht *hcsTask) waitForHostExit() {
655671// == nil`.
656672func (ht * hcsTask ) close (ctx context.Context ) {
657673 ht .closeOnce .Do (func () {
658- log .G (ctx ).Debug ("hcsTask::closeOnce" )
674+ entry := log .G (ctx )
675+ entry .Debug ("hcsTask::closeOnce" )
659676
660677 // ht.c should never be nil for a real task but in testing we stub
661678 // this to avoid a nil dereference. We really should introduce a
662679 // method or interface for ht.c operations that we can stub for
663680 // testing.
664681 if ht .c != nil {
682+ const tearDownTimeout = 30 * time .Second
683+
665684 // Do our best attempt to tear down the container.
666685 // TODO: unify timeout select statements and use [ht.c.WaitCtx] and [context.WithTimeout]
667686 var werr error
@@ -670,52 +689,63 @@ func (ht *hcsTask) close(ctx context.Context) {
670689 werr = ht .c .Wait ()
671690 close (ch )
672691 }()
692+
673693 err := ht .c .Shutdown (ctx )
674694 if err != nil {
675- log . G ( ctx ) .WithError (err ).Error ("failed to shutdown container" )
695+ entry .WithError (err ).Error ("failed to shutdown container" )
676696 } else {
677- t := time .NewTimer (time . Second * 30 )
697+ t := time .NewTimer (tearDownTimeout )
678698 select {
679699 case <- ch :
680700 err = werr
681701 t .Stop ()
682702 if err != nil {
683- log . G ( ctx ) .WithError (err ).Error ("failed to wait for container shutdown" )
703+ entry .WithError (err ).Error ("failed to wait for container shutdown" )
684704 }
685705 case <- t .C :
686706 err = hcs .ErrTimeout
687- log .G (ctx ).WithError (err ).Error ("failed to wait for container shutdown" )
707+ entry .WithFields (logrus.Fields {
708+ logfields .Timeout : tearDownTimeout ,
709+ logrus .ErrorKey : err ,
710+ }).Error ("timed out while waiting for container shutdown" )
688711 }
689712 }
690713
691714 if err != nil {
692715 err = ht .c .Terminate (ctx )
693716 if err != nil {
694- log . G ( ctx ) .WithError (err ).Error ("failed to terminate container" )
717+ entry .WithError (err ).Error ("failed to terminate container" )
695718 } else {
696- t := time .NewTimer (time . Second * 30 )
719+ t := time .NewTimer (tearDownTimeout )
697720 select {
698721 case <- ch :
699722 err = werr
700723 t .Stop ()
701724 if err != nil {
702- log . G ( ctx ) .WithError (err ).Error ("failed to wait for container terminate" )
725+ entry .WithError (err ).Error ("failed to wait for container terminate" )
703726 }
704727 case <- t .C :
705- log .G (ctx ).WithError (hcs .ErrTimeout ).Error ("failed to wait for container terminate" )
728+ entry .WithFields (logrus.Fields {
729+ logfields .Timeout : tearDownTimeout ,
730+ logrus .ErrorKey : hcs .ErrTimeout ,
731+ }).Error ("timed out while waiting for container terminate" )
706732 }
707733 }
708734 }
709735
710736 // Release any resources associated with the container.
737+ entry .Trace ("starting task resource cleanup" )
738+
711739 if err := resources .ReleaseResources (ctx , ht .cr , ht .host , true ); err != nil {
712- log . G ( ctx ) .WithError (err ).Error ("failed to release container resources" )
740+ entry .WithError (err ).Error ("failed to release container resources" )
713741 }
714742
715743 // Close the container handle invalidating all future access.
716744 if err := ht .c .Close (); err != nil {
717- log . G ( ctx ) .WithError (err ).Error ("failed to close container" )
745+ entry .WithError (err ).Error ("failed to close container" )
718746 }
747+
748+ entry .Trace ("task resource cleanup completed" )
719749 }
720750 ht .closeHost (ctx )
721751 })
@@ -730,11 +760,12 @@ func (ht *hcsTask) close(ctx context.Context) {
730760// This call is idempotent and safe to call multiple times.
731761func (ht * hcsTask ) closeHost (ctx context.Context ) {
732762 ht .closeHostOnce .Do (func () {
733- log .G (ctx ).Debug ("hcsTask::closeHostOnce" )
763+ entry := log .G (ctx )
764+ entry .Debug ("hcsTask::closeHostOnce" )
734765
735766 if ht .ownsHost && ht .host != nil {
736767 if err := ht .host .Close (); err != nil {
737- log . G ( ctx ) .WithError (err ).Error ("failed host vm shutdown" )
768+ entry .WithError (err ).Error ("failed host vm shutdown" )
738769 }
739770 }
740771 // Send the `init` exec exit notification always.
@@ -750,7 +781,7 @@ func (ht *hcsTask) closeHost(ctx context.Context) {
750781 ExitStatus : exit .ExitStatus ,
751782 ExitedAt : exit .ExitedAt ,
752783 }); err != nil {
753- log . G ( ctx ) .WithError (err ).Error ("failed to publish TaskExitEventTopic" )
784+ entry .WithError (err ).Error ("failed to publish TaskExitEventTopic" )
754785 }
755786 close (ht .closed )
756787 })
0 commit comments