Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 60 additions & 49 deletions packages/cmd/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,6 @@
newAccessTokenNotificationChan chan bool
cachedUniversalAuthClientSecret string
templateFirstRenderOnce map[int]*sync.Once // Track first render per template
certificateFirstIssueOnce map[int]*sync.Once // Track first issue per certificate
exitAfterAuth bool
revokeCredentialsOnShutdown bool

Expand Down Expand Up @@ -1131,22 +1130,18 @@

certificates := make([]CertificateWithID, len(options.Certificates))
certificateStates := make(map[int]*CertificateState)
certificateFirstIssueOnce := make(map[int]*sync.Once)
for i, certificate := range options.Certificates {
certificates[i] = CertificateWithID{ID: i + 1, Certificate: certificate}
certificateStates[i+1] = &CertificateState{
Status: "pending",
}
certificateFirstIssueOnce[i+1] = &sync.Once{}
}

agentManager := &AgentManager{
filePaths: options.FileDeposits,
templates: templates,
certificates: certificates,
certificateStates: certificateStates,
certificateFirstIssueOnce: certificateFirstIssueOnce,

authConfigBytes: options.AuthConfigBytes,
authStrategy: options.AuthStrategy,
retryConfig: options.RetryConfig,
Expand Down Expand Up @@ -2507,13 +2502,33 @@
time.Sleep(1 * time.Second)
}

const maxIssuanceAttempts = 3
for _, cert := range tm.certificates {
tm.certificateFirstIssueOnce[cert.ID].Do(func() {
cert := cert
displayName := tm.getCertificateDisplayName(cert.ID, &cert.Certificate)
var lastErr error
for attempt := 1; attempt <= maxIssuanceAttempts; attempt++ {
if err := tm.IssueCertificate(cert.ID, &cert.Certificate); err != nil {
displayName := tm.getCertificateDisplayName(cert.ID, &cert.Certificate)
log.Error().Str("Certificate", displayName).Msgf("initial certificate issuance failed: %v", err)
lastErr = err
log.Error().
Str("Certificate", displayName).
Int("attempt", attempt).
Int("maxAttempts", maxIssuanceAttempts).
Msgf("initial certificate issuance failed: %v", err)
if attempt < maxIssuanceAttempts {
backoff := time.Duration(1<<uint(attempt-1)) * 2 * time.Second
time.Sleep(backoff)
}
} else {
lastErr = nil
break
}
})
}
if lastErr != nil {
log.Error().
Str("Certificate", displayName).
Msgf("all %d issuance attempts failed, will retry on next renewal check", maxIssuanceAttempts)
}

Check failure on line 2531 in packages/cmd/agent.go

View check run for this annotation

Claude / Claude Code Review

Misleading retry log and missing error in issuance failure summary

The final issuance failure summary log has two bugs: it incorrectly states the certificate 'will retry on next renewal check' (it will not — the cert is permanently stuck in `failed` status until the agent restarts), and it omits `lastErr` so operators see no root cause in the summary. Fix by updating the message to say no automatic retry will occur and adding `.Err(lastErr)` to the log call.
Comment thread
jdoss marked this conversation as resolved.
Outdated
}

for {
Expand Down Expand Up @@ -3116,38 +3131,6 @@

go tm.ManageTokenLifecycle()

if len(agentConfig.Certificates) > 0 {
go func() {
for {
if tm.getTokenUnsafe() != "" {
break
}
time.Sleep(100 * time.Millisecond)
}

httpClient, err := tm.createAuthenticatedClient()
if err != nil {
log.Error().Msgf("failed to create authenticated client for name resolution: %v", err)
return
}

err = resolveCertificateNameReferences(&agentConfig.Certificates, httpClient)
if err != nil {
log.Error().Msgf("failed to resolve certificate name references: %v", err)
return
}

for i := range tm.certificates {
for j := range agentConfig.Certificates {
if tm.certificates[i].ID == j+1 {
tm.certificates[i].Certificate = agentConfig.Certificates[j]
break
}
}
}
}()
}

var monitoredTemplatesFinished atomic.Int32

// when all templates have finished rendering once, we delete the unused leases from the cache.
Expand Down Expand Up @@ -3178,8 +3161,38 @@
}

if len(tm.certificates) > 0 {
log.Info().Msg("certificate management engine starting...")
go tm.MonitorCertificates(ctx)
go func() {
for {
if tm.getTokenUnsafe() != "" {
break
}
time.Sleep(100 * time.Millisecond)
}

httpClient, err := tm.createAuthenticatedClient()
if err != nil {
log.Error().Msgf("failed to create authenticated client for name resolution: %v", err)
return
}

err = resolveCertificateNameReferences(&agentConfig.Certificates, httpClient)
if err != nil {
log.Error().Msgf("failed to resolve certificate name references: %v", err)
return
}

for i := range tm.certificates {
for j := range agentConfig.Certificates {
if tm.certificates[i].ID == j+1 {
tm.certificates[i].Certificate = agentConfig.Certificates[j]
break
}
}
}

log.Info().Msg("certificate management engine starting...")
tm.MonitorCertificates(ctx)
}()
}

for {
Expand Down Expand Up @@ -3366,7 +3379,7 @@

go tm.ManageTokenLifecycle()

if len(agentConfig.Certificates) > 0 {
if len(tm.certificates) > 0 {
go func() {
for {
if tm.getTokenUnsafe() != "" {
Expand Down Expand Up @@ -3395,12 +3408,10 @@
}
}
}
}()
}

if len(tm.certificates) > 0 {
log.Info().Msg("certificate management engine starting...")
go tm.MonitorCertificates(ctx)
log.Info().Msg("certificate management engine starting...")
tm.MonitorCertificates(ctx)
}()
}

for {
Expand Down
Loading
Loading