@@ -129,6 +129,10 @@ const (
129129 authzLabelValueInline = "inline"
130130)
131131
132+ const defaultTerminationGracePeriodSeconds = int64 (30 )
133+
134+ const stdioTransport = "stdio"
135+
132136// detectPlatform detects the Kubernetes platform type (Kubernetes vs OpenShift)
133137// It uses the shared platform detector to ensure detection is only performed once and cached
134138func (r * MCPServerReconciler ) detectPlatform (ctx context.Context ) (kubernetes.Platform , error ) {
@@ -189,6 +193,10 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
189193 // Validate CABundleRef if specified
190194 r .validateCABundleRef (ctx , mcpServer )
191195
196+ // Validate stdio replica cap and session storage requirements
197+ r .validateStdioReplicaCap (ctx , mcpServer )
198+ r .validateSessionStorageForReplicas (ctx , mcpServer )
199+
192200 // Validate PodTemplateSpec early - before other validations
193201 // This ensures we fail fast if the spec is invalid
194202 if ! r .validateAndUpdatePodTemplateStatus (ctx , mcpServer ) {
@@ -392,7 +400,7 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
392400 // Enforce stdio transport replica cap: stdio requires 1:1 proxy-to-backend
393401 // connections and cannot scale beyond 1. Other transports are hands-off
394402 // to allow HPAs, KEDA, or manual kubectl scale to manage replicas freely.
395- if mcpServer .Spec .Transport == "stdio" &&
403+ if mcpServer .Spec .Transport == stdioTransport &&
396404 deployment .Spec .Replicas != nil && * deployment .Spec .Replicas > 1 {
397405 deployment .Spec .Replicas = int32Ptr (1 )
398406 err = r .Update (ctx , deployment )
@@ -450,13 +458,18 @@ func (r *MCPServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
450458
451459 // Check if the deployment spec changed
452460 if r .deploymentNeedsUpdate (ctx , deployment , mcpServer , runConfigChecksum ) {
453- // Update template and metadata only — preserve Spec.Replicas so that
454- // HPAs, KEDA, and manual scaling are not overwritten by the controller.
461+ // Update template and metadata. Also sync Spec.Replicas when spec.replicas is
462+ // explicitly set — this makes the operator authoritative for spec-driven scaling.
463+ // When spec.replicas is nil, preserve the live count so HPAs, KEDA, and manual
464+ // kubectl scale remain in control.
455465 newDeployment := r .deploymentForMCPServer (ctx , mcpServer , runConfigChecksum )
456466 deployment .Spec .Template = newDeployment .Spec .Template
457467 deployment .Spec .Selector = newDeployment .Spec .Selector
458468 deployment .Labels = newDeployment .Labels
459469 deployment .Annotations = ctrlutil .MergeAnnotations (newDeployment .Annotations , deployment .Annotations )
470+ if newDeployment .Spec .Replicas != nil {
471+ deployment .Spec .Replicas = newDeployment .Spec .Replicas
472+ }
460473 err = r .Update (ctx , deployment )
461474 if err != nil {
462475 ctxLogger .Error (err , "Failed to update Deployment" ,
@@ -931,7 +944,6 @@ func (r *MCPServerReconciler) deploymentForMCPServer(
931944 ctx context.Context , m * mcpv1alpha1.MCPServer , runConfigChecksum string ,
932945) * appsv1.Deployment {
933946 ls := labelsForMCPServer (m .Name )
934- replicas := int32 (1 )
935947
936948 // Prepare container args
937949 args := []string {"run" }
@@ -1196,7 +1208,7 @@ func (r *MCPServerReconciler) deploymentForMCPServer(
11961208 Annotations : deploymentAnnotations ,
11971209 },
11981210 Spec : appsv1.DeploymentSpec {
1199- Replicas : & replicas ,
1211+ Replicas : resolveDeploymentReplicas ( m . Spec . Transport , m . Spec . Replicas ) ,
12001212 Selector : & metav1.LabelSelector {
12011213 MatchLabels : ls , // Keep original labels for selector
12021214 },
@@ -1206,8 +1218,9 @@ func (r *MCPServerReconciler) deploymentForMCPServer(
12061218 Annotations : deploymentTemplateAnnotations ,
12071219 },
12081220 Spec : corev1.PodSpec {
1209- ServiceAccountName : ctrlutil .ProxyRunnerServiceAccountName (m .Name ),
1210- ImagePullSecrets : imagePullSecrets ,
1221+ ServiceAccountName : ctrlutil .ProxyRunnerServiceAccountName (m .Name ),
1222+ ImagePullSecrets : imagePullSecrets ,
1223+ TerminationGracePeriodSeconds : int64Ptr (defaultTerminationGracePeriodSeconds ),
12111224 Containers : []corev1.Container {{
12121225 Image : getToolhiveRunnerImage (),
12131226 Name : "toolhive" ,
@@ -1694,6 +1707,15 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate(
16941707 return true
16951708 }
16961709
1710+ // Check if spec.replicas has changed. Only compare when spec.replicas is non-nil;
1711+ // nil means hands-off mode (HPA/KEDA manages replicas) and the live count is authoritative.
1712+ expectedReplicas := resolveDeploymentReplicas (mcpServer .Spec .Transport , mcpServer .Spec .Replicas )
1713+ if expectedReplicas != nil {
1714+ if deployment .Spec .Replicas == nil || * deployment .Spec .Replicas != * expectedReplicas {
1715+ return true
1716+ }
1717+ }
1718+
16971719 return false
16981720}
16991721
@@ -1879,6 +1901,88 @@ func int32Ptr(i int32) *int32 {
18791901 return & i
18801902}
18811903
1904+ // int64Ptr returns a pointer to an int64
1905+ func int64Ptr (i int64 ) * int64 {
1906+ return & i
1907+ }
1908+
1909+ // resolveDeploymentReplicas returns the replica count to set on Deployment.Spec.Replicas.
1910+ // Returns nil when spec.replicas is nil (hands-off mode for HPA/KEDA).
1911+ // Enforces stdio cap at 1 as defense-in-depth (reconciler also enforces this via status condition).
1912+ func resolveDeploymentReplicas (mcpTransport string , specReplicas * int32 ) * int32 {
1913+ if specReplicas == nil {
1914+ return nil
1915+ }
1916+ if mcpTransport == stdioTransport && * specReplicas > 1 {
1917+ return int32Ptr (1 )
1918+ }
1919+ return specReplicas
1920+ }
1921+
1922+ // setStdioReplicaCappedCondition sets the StdioReplicaCapped status condition
1923+ func setStdioReplicaCappedCondition (mcpServer * mcpv1alpha1.MCPServer , status metav1.ConditionStatus , reason , message string ) {
1924+ meta .SetStatusCondition (& mcpServer .Status .Conditions , metav1.Condition {
1925+ Type : mcpv1alpha1 .ConditionStdioReplicaCapped ,
1926+ Status : status ,
1927+ Reason : reason ,
1928+ Message : message ,
1929+ ObservedGeneration : mcpServer .Generation ,
1930+ })
1931+ }
1932+
1933+ // validateStdioReplicaCap checks if spec.replicas > 1 for stdio transport and sets a warning condition.
1934+ // The deployment builder enforces the cap at 1 as defense-in-depth.
1935+ // Clears the condition when transport or replica count no longer violates the cap.
1936+ func (r * MCPServerReconciler ) validateStdioReplicaCap (ctx context.Context , mcpServer * mcpv1alpha1.MCPServer ) {
1937+ if mcpServer .Spec .Transport == stdioTransport && mcpServer .Spec .Replicas != nil && * mcpServer .Spec .Replicas > 1 {
1938+ setStdioReplicaCappedCondition (mcpServer , metav1 .ConditionTrue ,
1939+ mcpv1alpha1 .ConditionReasonStdioReplicaCapped ,
1940+ "stdio transport requires exactly 1 replica; deployment will use 1 regardless of spec.replicas" )
1941+ } else {
1942+ setStdioReplicaCappedCondition (mcpServer , metav1 .ConditionFalse ,
1943+ mcpv1alpha1 .ConditionReasonStdioReplicaCapNotActive ,
1944+ "stdio replica cap is not active" )
1945+ }
1946+ if err := r .Status ().Update (ctx , mcpServer ); err != nil {
1947+ log .FromContext (ctx ).Error (err , "Failed to update MCPServer status after stdio replica cap validation" )
1948+ }
1949+ }
1950+
1951+ // setSessionStorageCondition sets the SessionStorageWarning status condition
1952+ func setSessionStorageCondition (mcpServer * mcpv1alpha1.MCPServer , status metav1.ConditionStatus , reason , message string ) {
1953+ meta .SetStatusCondition (& mcpServer .Status .Conditions , metav1.Condition {
1954+ Type : mcpv1alpha1 .ConditionSessionStorageWarning ,
1955+ Status : status ,
1956+ Reason : reason ,
1957+ Message : message ,
1958+ ObservedGeneration : mcpServer .Generation ,
1959+ })
1960+ }
1961+
1962+ // validateSessionStorageForReplicas emits a Warning condition when replicas > 1 but session storage
1963+ // is not configured with a Redis backend. The deployment still proceeds; this is advisory only.
1964+ // Clears the condition when replicas drop back to nil or <= 1.
1965+ func (r * MCPServerReconciler ) validateSessionStorageForReplicas (ctx context.Context , mcpServer * mcpv1alpha1.MCPServer ) {
1966+ if mcpServer .Spec .Replicas != nil && * mcpServer .Spec .Replicas > 1 {
1967+ if mcpServer .Spec .SessionStorage == nil || mcpServer .Spec .SessionStorage .Provider != "redis" {
1968+ setSessionStorageCondition (mcpServer , metav1 .ConditionTrue ,
1969+ mcpv1alpha1 .ConditionReasonSessionStorageMissing ,
1970+ "replicas > 1 but sessionStorage.provider is not redis; sessions are not shared across replicas" )
1971+ } else {
1972+ setSessionStorageCondition (mcpServer , metav1 .ConditionFalse ,
1973+ mcpv1alpha1 .ConditionReasonSessionStorageConfigured ,
1974+ "Redis session storage is configured" )
1975+ }
1976+ } else {
1977+ setSessionStorageCondition (mcpServer , metav1 .ConditionFalse ,
1978+ mcpv1alpha1 .ConditionReasonSessionStorageNotApplicable ,
1979+ "session storage warning is not active" )
1980+ }
1981+ if err := r .Status ().Update (ctx , mcpServer ); err != nil {
1982+ log .FromContext (ctx ).Error (err , "Failed to update MCPServer status after session storage validation" )
1983+ }
1984+ }
1985+
18821986// SetupWithManager sets up the controller with the Manager.
18831987func (r * MCPServerReconciler ) SetupWithManager (mgr ctrl.Manager ) error {
18841988 // Create a handler that maps MCPExternalAuthConfig changes to MCPServer reconciliation requests
0 commit comments