Skip to content

Commit 9012aa2

Browse files
Don't continue processing on client error (#20)
* Update builder image to latest * Don't retry request on client errors * Added metric for client errors * Spelling error
1 parent cb60273 commit 9012aa2

4 files changed

Lines changed: 40 additions & 3 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ The metrics exposed beyond the default Prometheus metrics are:
154154
* `deptracker_post_record_hard_fail`: the number of failures to
155155
persist a record via the HTTP API (either an irrecoverable error or
156156
all retries are exhausted).
157+
* `deptracker_post_record_client_error`: the number of client errors,
158+
these are never retried nor reprocessed.
157159

158160
## License
159161

internal/controller/controller.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,20 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
323323
)
324324

325325
if err := c.apiClient.PostOne(ctx, record); err != nil {
326+
// Make sure to not retry on client error messages
327+
var clientErr *deploymentrecord.ClientError
328+
if errors.As(err, &clientErr) {
329+
slog.Warn("Failed to post record",
330+
"event_type", eventType,
331+
"name", record.Name,
332+
"deployment_name", record.DeploymentName,
333+
"status", record.Status,
334+
"digest", record.Digest,
335+
"error", err,
336+
)
337+
return nil
338+
}
339+
326340
slog.Error("Failed to post record",
327341
"event_type", eventType,
328342
"name", record.Name,

pkg/deploymentrecord/client.go

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,19 @@ func WithAPIToken(token string) ClientOption {
6565
}
6666
}
6767

68+
// ClientError represents a client error that can not be retried.
69+
type ClientError struct {
70+
err error
71+
}
72+
73+
func (c *ClientError) Error() string {
74+
return fmt.Sprintf("client_error: %s", c.err.Error())
75+
}
76+
77+
func (c *ClientError) Unwrap() error {
78+
return c.err
79+
}
80+
6881
// PostOne posts a single deployment record to the GitHub deployment
6982
// records API.
7083
func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
@@ -129,11 +142,11 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
129142
// Don't retry on client errors (4xx) except for 429
130143
// (rate limit)
131144
if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 {
132-
metrics.PostDeploymentRecordHardFail.Inc()
133-
slog.Error("irrecoverable error, aborting",
145+
metrics.PostDeploymentRecordClientError.Inc()
146+
slog.Warn("client error, aborting",
134147
"attempt", attempt,
135148
"error", lastErr)
136-
return lastErr
149+
return &ClientError{err: lastErr}
137150
}
138151
metrics.PostDeploymentRecordSoftFail.Inc()
139152
}

pkg/metrics/prom.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,12 @@ var (
6464
Help: "The total number of hard post failures",
6565
},
6666
)
67+
68+
//nolint: revive
69+
PostDeploymentRecordClientError = promauto.NewCounter(
70+
prometheus.CounterOpts{
71+
Name: "deptracker_post_record_client_error",
72+
Help: "The total number of non-retryable client failures",
73+
},
74+
)
6775
)

0 commit comments

Comments
 (0)