@@ -184,6 +184,18 @@ func (c *Controller) ReconcileComponent(comp *v1.Component) error {
184184 return nil
185185 }
186186
187+ // Advance the state machine for each VM in this component. This is
188+ // required for paths (e.g. waitForComponentHealth) that call
189+ // ReconcileComponent directly without first calling ReconcileVM.
190+ var compVMs []v1.VirtualMachine
191+ if err := db .DB .Where ("component = ? AND (observed_state IS NULL OR observed_state <> ?)" , comp .Metadata .Name , "Removing" ).Find (& compVMs ).Error ; err == nil {
192+ for _ , vm := range compVMs {
193+ if err := c .ReconcileVM (& vm ); err != nil {
194+ log .Printf ("ReconcileComponent: failed to reconcile VM %s: %v" , vm .Metadata .Name , err )
195+ }
196+ }
197+ }
198+
187199 // Check component health
188200 healthy , err := c .CheckComponentHealth (comp )
189201 if err != nil {
@@ -220,6 +232,32 @@ func (c *Controller) ReconcileVM(vm *v1.VirtualMachine) error {
220232 return err
221233 }
222234
235+ // VMNotFound: the CS VM is gone — clear the stale ID and recreate.
236+ if vm .Status .ObservedState == "VMNotFound" {
237+ log .Printf ("ReconcileVM: VM %s not found in CloudStack, clearing ID and recreating" , vm .Metadata .Name )
238+ vm .CloudStackID = ""
239+ vm .Status .ObservedState = "Created"
240+ vm .Status .Ready = false
241+ if err := db .DB .Save (vm ).Error ; err != nil {
242+ return err
243+ }
244+ }
245+
246+ // Stopped: start the VM in CloudStack.
247+ if vm .Status .ObservedState == "Stopped" {
248+ log .Printf ("ReconcileVM: VM %s is Stopped, starting it" , vm .Metadata .Name )
249+ sp := c .csClient .VirtualMachine .NewStartVirtualMachineParams (vm .CloudStackID )
250+ if _ , err := c .csClient .VirtualMachine .StartVirtualMachine (sp ); err != nil {
251+ log .Printf ("ReconcileVM: failed to start VM %s: %v" , vm .Metadata .Name , err )
252+ return err
253+ }
254+ vm .Status .ObservedState = "Starting"
255+ vm .Status .Ready = false
256+ if err := db .DB .Save (vm ).Error ; err != nil {
257+ return err
258+ }
259+ }
260+
223261 // Check if VM exists; if not, create it
224262 if vm .CloudStackID == "" {
225263 if id , err := handlers .ApplyVirtualMachineManaged (vm , true ); err != nil {
@@ -249,6 +287,21 @@ func (c *Controller) ReconcileVM(vm *v1.VirtualMachine) error {
249287 return err
250288}
251289
290+ // vmHasHealthChecks returns true if the VM has health checks defined either
291+ // on its own spec or inherited from its owning component.
292+ func vmHasHealthChecks (vm * v1.VirtualMachine ) bool {
293+ if len (vm .Spec .HealthChecks ) > 0 {
294+ return true
295+ }
296+ if vm .Component != "" {
297+ var comp v1.Component
298+ if db .DB .Where ("name = ?" , vm .Component ).First (& comp ).Error == nil {
299+ return len (comp .Spec .HealthChecks ) > 0
300+ }
301+ }
302+ return false
303+ }
304+
252305// populateObservedSpec queries CloudStack for VM details and fills ObservedSpec
253306func (c * Controller ) populateObservedSpec (vm * v1.VirtualMachine ) error {
254307 // Use SDK to list by id or name
@@ -264,6 +317,13 @@ func (c *Controller) populateObservedSpec(vm *v1.VirtualMachine) error {
264317 return err
265318 }
266319 if resp == nil || len (resp .VirtualMachines ) == 0 {
320+ // VM had a CloudStack ID but is no longer found — mark as VMNotFound.
321+ if vm .CloudStackID != "" && vm .Status .ObservedState != "Removing" {
322+ vm .Status .ObservedState = "VMNotFound"
323+ vm .Status .Ready = false
324+ vm .Status .LastChecked = time .Now ()
325+ return db .DB .Save (vm ).Error
326+ }
267327 return nil
268328 }
269329
@@ -367,9 +427,51 @@ func (c *Controller) populateObservedSpec(vm *v1.VirtualMachine) error {
367427 }
368428 }
369429
370- // Record observed state
430+ // State machine: map CloudStack hypervisor state to controller-managed states.
431+ //
432+ // "" / Created ──(CS ID assigned)──► Starting
433+ // Starting ──(CS Running)──► Started (has health checks) | Running (no checks)
434+ // Starting ──(CS Error)──► Error
435+ // Starting ──(CS Stopped)──► Stopped (reconciler will start it)
436+ // Error ──(CS Running)──► Starting (recovered, re-enter flow)
437+ // Started/IPNotFound/Running/Healthy/Unhealthy ──(CS Stopped)──► Stopped
438+ // Started/IPNotFound/Running/Healthy/Unhealthy ──(CS other non-Running)──► state unchanged
439+ // Removing ──── never overwritten
440+ // VMNotFound ──── never overwritten (handled above)
371441 if v .State != "" {
372- vm .Status .ObservedState = v .State
442+ current := vm .Status .ObservedState
443+ switch current {
444+ case "Removing" , "VMNotFound" :
445+ // Terminal / in-progress states — never overwrite.
446+ case "" , "Created" :
447+ // Initial: CloudStack ID just assigned.
448+ vm .Status .ObservedState = "Starting"
449+ case "Starting" , "Error" :
450+ switch v .State {
451+ case "Running" :
452+ if vmHasHealthChecks (vm ) {
453+ vm .Status .ObservedState = "Started"
454+ } else {
455+ vm .Status .ObservedState = "Running"
456+ vm .Status .Ready = true
457+ }
458+ case "Error" :
459+ vm .Status .ObservedState = "Error"
460+ vm .Status .Ready = false
461+ case "Stopped" :
462+ vm .Status .ObservedState = "Stopped"
463+ vm .Status .Ready = false
464+ // All other CS transient states ("Starting", etc.) stay in current state.
465+ }
466+ case "Started" , "IPNotFound" , "Running" , "Healthy" , "Unhealthy" :
467+ // Stable / health-check states.
468+ // Only act on Stopped — everything else (transient CS states) leaves
469+ // the controller state untouched so health checks continue normally.
470+ if v .State == "Stopped" {
471+ vm .Status .ObservedState = "Stopped"
472+ vm .Status .Ready = false
473+ }
474+ }
373475 }
374476
375477 vm .ObservedSpec = obs
0 commit comments