Skip to content

Commit c6dc36e

Browse files
committed
controller: fix vm health checks
1 parent eaefc67 commit c6dc36e

4 files changed

Lines changed: 161 additions & 65 deletions

File tree

controller/cmd_delete.go

Lines changed: 1 addition & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ func DeleteVM(name string) {
7979
if resp != nil && len(resp.VirtualMachines) > 0 {
8080
id := resp.VirtualMachines[0].Id
8181
dp := csClient.VirtualMachine.NewDestroyVirtualMachineParams(id)
82+
dp.SetExpunge(true)
8283
if _, err := csClient.VirtualMachine.DestroyVirtualMachine(dp); err != nil {
8384
log.Printf("Warning: Failed to delete VM %s from CloudStack: %v", name, err)
8485
}
@@ -106,53 +107,3 @@ func DeleteVM(name string) {
106107

107108
log.Printf("VM %s deleted successfully", name)
108109
}
109-
110-
// DeleteNetwork deletes a Network resource (handles standalone and controller modes)
111-
func DeleteNetwork(name string) {
112-
if db.DB == nil {
113-
// standalone or DB not initialized: try CloudStack directly
114-
cs, err := cloudstack.NewClient()
115-
if err != nil {
116-
log.Fatalf("CloudStack client unavailable: %v", err)
117-
}
118-
params := cs.Network.NewListNetworksParams()
119-
params.SetName(name)
120-
resp, err := cs.Network.ListNetworks(params)
121-
if err != nil {
122-
log.Fatalf("CloudStack network lookup failed: %v", err)
123-
}
124-
if resp == nil || len(resp.Networks) == 0 {
125-
log.Fatalf("Network %s not found in CloudStack", name)
126-
}
127-
nid := resp.Networks[0].Id
128-
delp := cs.Network.NewDeleteNetworkParams(nid)
129-
if _, err := cs.Network.DeleteNetwork(delp); err != nil {
130-
log.Fatalf("Failed to delete Network %s from CloudStack: %v", name, err)
131-
}
132-
log.Printf("Network %s deleted from CloudStack (id=%s)", name, nid)
133-
return
134-
}
135-
136-
var n v1.Network
137-
if err := db.DB.Where("name = ?", name).First(&n).Error; err != nil {
138-
log.Fatalf("Network %s not found: %v", name, err)
139-
}
140-
141-
if n.Status.CloudStackID != "" {
142-
cs, err := cloudstack.NewClient()
143-
if err != nil {
144-
log.Printf("Warning: CloudStack client unavailable, skipping external delete: %v", err)
145-
} else {
146-
delp := cs.Network.NewDeleteNetworkParams(n.Status.CloudStackID)
147-
if _, err := cs.Network.DeleteNetwork(delp); err != nil {
148-
log.Printf("Warning: Failed to delete network %s from CloudStack: %v", name, err)
149-
}
150-
}
151-
}
152-
153-
if err := db.DB.Delete(&n).Error; err != nil {
154-
log.Fatalf("Failed to delete network %s from database: %v", name, err)
155-
}
156-
157-
log.Printf("Network %s deleted successfully", name)
158-
}

controller/controller.go

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func (c *Controller) handleApply(w http.ResponseWriter, r *http.Request) {
8282
kind, _ := meta["kind"].(string)
8383
var applyErr error
8484
var appliedID string
85+
var appliedOp string // "created" | "updated" | "accepted"
8586

8687
switch kind {
8788
case "VirtualMachineSpec":
@@ -90,6 +91,17 @@ func (c *Controller) handleApply(w http.ResponseWriter, r *http.Request) {
9091
http.Error(w, "failed to parse VirtualMachineSpec", http.StatusBadRequest)
9192
return
9293
}
94+
// detect create vs update
95+
if db.DB == nil {
96+
appliedOp = "accepted"
97+
} else {
98+
var existing v1.VirtualMachineSpecResource
99+
if db.DB.Where("name = ?", vs.Metadata.Name).First(&existing).Error != nil {
100+
appliedOp = "created"
101+
} else {
102+
appliedOp = "updated"
103+
}
104+
}
93105
applyErr = c.applyVMSpec(&vs)
94106
case "Application":
95107
var app v1.Application
@@ -153,6 +165,17 @@ func (c *Controller) handleApply(w http.ResponseWriter, r *http.Request) {
153165
}
154166
}
155167

168+
// detect create vs update
169+
if db.DB == nil {
170+
appliedOp = "accepted"
171+
} else {
172+
var existing v1.Component
173+
if db.DB.Where("name = ?", comp.Metadata.Name).First(&existing).Error != nil {
174+
appliedOp = "created"
175+
} else {
176+
appliedOp = "updated"
177+
}
178+
}
156179
applyErr = c.applyComponent(&comp)
157180
case "VirtualMachine":
158181
var vm v1.VirtualMachine
@@ -179,6 +202,15 @@ func (c *Controller) handleApply(w http.ResponseWriter, r *http.Request) {
179202

180203
// Build response including created/applied resource id when available
181204
respMap := map[string]string{"status": "success", "message": "resource accepted for reconciliation"}
205+
if appliedOp != "" {
206+
respMap["action"] = appliedOp
207+
switch appliedOp {
208+
case "created":
209+
respMap["message"] = "resource created"
210+
case "updated":
211+
respMap["message"] = "resource updated"
212+
}
213+
}
182214
if appliedID != "" {
183215
respMap["id"] = appliedID
184216
respMap["kind"] = kind
@@ -524,6 +556,18 @@ func (c *Controller) handleDelete(w http.ResponseWriter, r *http.Request) {
524556
w.WriteHeader(http.StatusOK)
525557
w.Write([]byte(`{"status":"deleted"}`))
526558
return
559+
case "VirtualMachineSpec":
560+
var spec v1.VirtualMachineSpecResource
561+
if db.DB == nil || db.DB.Where("name = ?", name).First(&spec).Error != nil {
562+
http.Error(w, "virtualmachinespec not found", http.StatusNotFound)
563+
return
564+
}
565+
db.DB.Delete(&spec)
566+
respMap := map[string]string{"status": "deleted", "kind": "VirtualMachineSpec", "name": name}
567+
b, _ := json.Marshal(respMap)
568+
w.WriteHeader(http.StatusOK)
569+
w.Write(b)
570+
return
527571
case "VirtualMachine":
528572
var vm v1.VirtualMachine
529573
if db.DB == nil || db.DB.Where("name = ?", name).First(&vm).Error != nil {
@@ -533,6 +577,7 @@ func (c *Controller) handleDelete(w http.ResponseWriter, r *http.Request) {
533577
if resp != nil && len(resp.VirtualMachines) > 0 {
534578
id := resp.VirtualMachines[0].Id
535579
dp := c.csClient.VirtualMachine.NewDestroyVirtualMachineParams(id)
580+
dp.SetExpunge(true)
536581
c.csClient.VirtualMachine.DestroyVirtualMachine(dp)
537582
w.WriteHeader(http.StatusOK)
538583
w.Write([]byte(`{"status":"deleted"}`))
@@ -682,7 +727,7 @@ func (c *Controller) applyVM(vm *v1.VirtualMachine) error {
682727

683728
// Persist desired state to DB (create or update)
684729
var existing v1.VirtualMachine
685-
if err := db.DB.Where("metadata ->> 'name' = ?", vm.Metadata.Name).First(&existing).Error; err != nil {
730+
if err := db.DB.Where("name = ?", vm.Metadata.Name).First(&existing).Error; err != nil {
686731
// record not found: create new record with observed CloudStack info
687732
if err := db.DB.Save(vm).Error; err != nil {
688733
return err

controller/health.go

Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
11
package controller
22

33
import (
4+
"context"
5+
"fmt"
6+
"os/exec"
7+
"time"
8+
49
v1 "cloudstackctl/apis/v1"
510
"cloudstackctl/db"
611
"log"
712
"net"
8-
"time"
913
)
1014

15+
// fmtTimeoutSeconds converts a time.Duration to a seconds string for ping -W
16+
func fmtTimeoutSeconds(d time.Duration) string {
17+
s := int(d.Seconds())
18+
if s <= 0 {
19+
s = 1
20+
}
21+
return fmt.Sprintf("%d", s)
22+
}
23+
1124
// CheckComponentHealth verifies all VMs in a component are healthy
1225
func (c *Controller) CheckComponentHealth(component *v1.Component) (bool, error) {
1326
log.Printf("Checking health for component: %s", component.Metadata.Name)
@@ -51,23 +64,93 @@ func (c *Controller) CheckVMHealth(vm *v1.VirtualMachine) (bool, error) {
5164
if vm.Status.CloudStackID == "" {
5265
return false, nil
5366
}
67+
// Query CloudStack for the VM to obtain its IP(s)
68+
params := c.csClient.VirtualMachine.NewListVirtualMachinesParams()
69+
params.SetId(vm.Status.CloudStackID)
70+
resp, err := c.csClient.VirtualMachine.ListVirtualMachines(params)
71+
if err != nil {
72+
log.Printf("failed to describe VM %s: %v", vm.Metadata.Name, err)
73+
vm.Status.Ready = false
74+
vm.Status.LastChecked = time.Now()
75+
return false, db.DB.Save(vm).Error
76+
}
77+
if resp == nil || len(resp.VirtualMachines) == 0 {
78+
log.Printf("no CloudStack VM found for %s (id=%s)", vm.Metadata.Name, vm.Status.CloudStackID)
79+
vm.Status.Ready = false
80+
vm.Status.LastChecked = time.Now()
81+
return false, db.DB.Save(vm).Error
82+
}
5483

55-
// Get VM IP from CloudStack (implement with SDK)
56-
vmIP := vm.Status.ObservedState // Replace with actual IP retrieval
84+
v := resp.VirtualMachines[0]
5785

58-
// 1. TCP ping to port 22 (SSH)
59-
conn, err := net.DialTimeout("tcp", vmIP+":22", 5*time.Second)
60-
if err != nil {
61-
log.Printf("VM %s SSH check failed: %v", vm.Metadata.Name, err)
86+
// extract an IP address from NICs (prefer IPv4)
87+
vmIP := ""
88+
for _, n := range v.Nic {
89+
if n.Ipaddress != "" {
90+
vmIP = n.Ipaddress
91+
break
92+
}
93+
}
94+
if vmIP == "" {
95+
log.Printf("no IP address found for VM %s (id=%s)", vm.Metadata.Name, vm.Status.CloudStackID)
6296
vm.Status.Ready = false
97+
vm.Status.LastChecked = time.Now()
6398
return false, db.DB.Save(vm).Error
6499
}
65-
defer conn.Close()
66100

67-
// 2. Additional health checks (HTTP/ping/custom) can be added here
101+
// Determine health checks to run: use Spec.HealthChecks if present, otherwise return healthy (no checks to run)
102+
checks := vm.Spec.HealthChecks
103+
if len(checks) == 0 {
104+
return true, nil
105+
}
106+
107+
overallHealthy := true
108+
for _, hc := range checks {
109+
timeout := 5 * time.Second
110+
if hc.Timeout != "" {
111+
if d, err := time.ParseDuration(hc.Timeout); err == nil {
112+
timeout = d
113+
}
114+
}
115+
116+
switch hc.Type {
117+
case "ping":
118+
// Use system ping command (platform: Linux). Run with context timeout.
119+
ctx, cancel := context.WithTimeout(context.Background(), timeout+1*time.Second)
120+
defer cancel()
121+
// `-c 1` send one packet, `-W` sets timeout in seconds for Linux ping
122+
cmd := exec.CommandContext(ctx, "ping", "-c", "1", "-W", fmtTimeoutSeconds(timeout), vmIP)
123+
if err := cmd.Run(); err != nil {
124+
log.Printf("VM %s ping check to %s failed: %v", vm.Metadata.Name, vmIP, err)
125+
overallHealthy = false
126+
}
127+
case "ssh":
128+
// TCP connect to SSH port (default 22)
129+
port := "22"
130+
if hc.Port != 0 {
131+
port = fmt.Sprintf("%d", hc.Port)
132+
}
133+
conn, err := net.DialTimeout("tcp", net.JoinHostPort(vmIP, port), timeout)
134+
if err != nil {
135+
log.Printf("VM %s SSH check to %s:%s failed: %v", vm.Metadata.Name, vmIP, port, err)
136+
overallHealthy = false
137+
} else {
138+
conn.Close()
139+
}
140+
default:
141+
// Unknown check type: mark as not healthy and log
142+
log.Printf("Unknown health check type %s for VM %s", hc.Type, vm.Metadata.Name)
143+
overallHealthy = false
144+
}
145+
}
68146

69-
// Update VM status
70-
vm.Status.Ready = true
147+
vm.Status.Ready = overallHealthy
71148
vm.Status.LastChecked = time.Now()
72-
return true, db.DB.Save(vm).Error
149+
if overallHealthy {
150+
vm.Status.ObservedState = "Healthy"
151+
} else {
152+
vm.Status.ObservedState = "Unhealthy"
153+
}
154+
155+
return overallHealthy, db.DB.Save(vm).Error
73156
}

controller/reconcile.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func (c *Controller) ReconcileAll() {
1717

1818
// Reconcile Applications
1919
var apps []v1.Application
20-
if err := db.DB.Find(&apps).Error; err != nil {
20+
if err := db.DB.Where("deleted_at IS NULL").Find(&apps).Error; err != nil {
2121
log.Printf("Failed to list applications: %v", err)
2222
return
2323
}
@@ -29,7 +29,7 @@ func (c *Controller) ReconcileAll() {
2929
}
3030
// Reconcile VMs
3131
var vms []v1.VirtualMachine
32-
if err := db.DB.Find(&vms).Error; err != nil {
32+
if err := db.DB.Where("deleted_at IS NULL").Find(&vms).Error; err != nil {
3333
log.Printf("Failed to list VMs: %v", err)
3434
return
3535
}
@@ -81,6 +81,18 @@ func (c *Controller) ReconcileVM(vm *v1.VirtualMachine) error {
8181
return err
8282
}
8383

84+
// Check if VM exists; if not, create it
85+
if vm.Status.CloudStackID == "" {
86+
if id, err := handlers.ApplyVirtualMachineManaged(vm, true); err != nil {
87+
return err
88+
} else {
89+
if id != "" {
90+
vm.Status.CloudStackID = id
91+
db.DB.Save(vm)
92+
}
93+
}
94+
}
95+
8496
// Check for drift
8597
if err := c.DetectDrift(vm); err != nil {
8698
return err
@@ -116,6 +128,11 @@ func (c *Controller) populateObservedSpec(vm *v1.VirtualMachine) error {
116128

117129
v := resp.VirtualMachines[0]
118130

131+
// if vm.Status.CloudStackID is not set, set it from the observed VM
132+
if vm.Status.CloudStackID == "" && v.Id != "" {
133+
vm.Status.CloudStackID = v.Id
134+
}
135+
119136
// Map some observed fields into ObservedSpec using SDK types directly
120137
obs := vm.ObservedSpec
121138
if v.Templateid != "" {

0 commit comments

Comments
 (0)