Skip to content

Commit bca1752

Browse files
authored
fix: Retry on 408 and respect Retry-After header (#426)
* fix: Retry on 408 and respect Retry-After header 408 (Request Timeout) was incorrectly treated as a non-retryable client error. Retry-After response headers were ignored during backoff. Replace backoff library usage with a manual retry loop that honours Retry-After when present and falls back to exponential backoff otherwise. * fix: Parse HTTP-date Retry-After values Retry-After can be seconds or an HTTP-date per RFC 7231. Fall back to email.utils.parsedate_to_datetime when the numeric parse fails. * fix: Don't retry on unclassifiable APIError status When APIError.status is "N/A" (no HTTP status), treat it as non-retryable to avoid unexpected retry loops on errors the SDK cannot classify. * test: Add retry delay tests for Retry-After and exponential backoff Verify time.sleep is called with the Retry-After value when present, uses exponential backoff (2^attempt) when absent, and that 408 is retried.
1 parent fe3a9bb commit bca1752

3 files changed

Lines changed: 114 additions & 25 deletions

File tree

posthog/consumer.py

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import time
44
from threading import Thread
55

6-
import backoff
7-
86
from posthog.request import APIError, DatetimeSerializer, batch_post
97

108
try:
@@ -128,29 +126,41 @@ def next(self):
128126
def request(self, batch):
129127
"""Attempt to upload the batch and retry before raising an error"""
130128

131-
def fatal_exception(exc):
129+
def is_retryable(exc):
132130
if isinstance(exc, APIError):
133131
# retry on server errors and client errors
134-
# with 429 status code (rate limited),
132+
# with 408 (request timeout) or 429 (rate limited),
135133
# don't retry on other client errors
136134
if exc.status == "N/A":
137135
return False
138-
return (400 <= exc.status < 500) and exc.status != 429
136+
return not ((400 <= exc.status < 500) and exc.status not in (408, 429))
139137
else:
140138
# retry on all other errors (eg. network)
141-
return False
142-
143-
@backoff.on_exception(
144-
backoff.expo, Exception, max_tries=self.retries + 1, giveup=fatal_exception
145-
)
146-
def send_request():
147-
batch_post(
148-
self.api_key,
149-
self.host,
150-
gzip=self.gzip,
151-
timeout=self.timeout,
152-
batch=batch,
153-
historical_migration=self.historical_migration,
154-
)
155-
156-
send_request()
139+
return True
140+
141+
last_exc = None
142+
for attempt in range(self.retries + 1):
143+
try:
144+
batch_post(
145+
self.api_key,
146+
self.host,
147+
gzip=self.gzip,
148+
timeout=self.timeout,
149+
batch=batch,
150+
historical_migration=self.historical_migration,
151+
)
152+
return
153+
except Exception as e:
154+
last_exc = e
155+
if not is_retryable(e):
156+
raise
157+
if attempt < self.retries:
158+
# Respect Retry-After header if present, otherwise use exponential backoff
159+
retry_after = getattr(e, "retry_after", None)
160+
if retry_after and retry_after > 0:
161+
time.sleep(retry_after)
162+
else:
163+
time.sleep(min(2**attempt, 30))
164+
165+
if last_exc:
166+
raise last_exc

posthog/request.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44
import socket
55
from dataclasses import dataclass
6-
from datetime import date, datetime
6+
from datetime import date, datetime, timezone
77
from gzip import GzipFile
88
from io import BytesIO
99
from typing import Any, List, Optional, Tuple, Union
@@ -235,12 +235,31 @@ def _process_response(
235235
)
236236
raise QuotaLimitError(res.status_code, "Feature flags quota limited")
237237
return response
238+
retry_after = None
239+
retry_after_header = res.headers.get("Retry-After")
240+
if retry_after_header:
241+
try:
242+
retry_after = float(retry_after_header)
243+
except (ValueError, TypeError):
244+
try:
245+
from email.utils import parsedate_to_datetime
246+
247+
retry_after = max(
248+
0.0,
249+
(
250+
parsedate_to_datetime(retry_after_header)
251+
- datetime.now(timezone.utc)
252+
).total_seconds(),
253+
)
254+
except (ValueError, TypeError):
255+
pass
256+
238257
try:
239258
payload = res.json()
240259
log.debug("received response: %s", payload)
241-
raise APIError(res.status_code, payload["detail"])
260+
raise APIError(res.status_code, payload["detail"], retry_after=retry_after)
242261
except (KeyError, ValueError):
243-
raise APIError(res.status_code, res.text)
262+
raise APIError(res.status_code, res.text, retry_after=retry_after)
244263

245264

246265
def decide(
@@ -348,9 +367,12 @@ def get(
348367

349368

350369
class APIError(Exception):
351-
def __init__(self, status: Union[int, str], message: str):
370+
def __init__(
371+
self, status: Union[int, str], message: str, retry_after: Optional[float] = None
372+
):
352373
self.message = message
353374
self.status = status
375+
self.retry_after = retry_after
354376

355377
def __str__(self):
356378
msg = "[PostHog] {0} ({1})"

posthog/test/test_consumer.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,63 @@ def mock_post_fn(_: str, data: str, **kwargs: Any) -> mock.Mock:
167167
q.join()
168168
self.assertEqual(mock_post.call_count, 2)
169169

170+
def test_request_sleeps_with_retry_after(self) -> None:
171+
error = APIError(429, "Too Many Requests", retry_after=5.0)
172+
call_count = [0]
173+
174+
def mock_post(*args: Any, **kwargs: Any) -> None:
175+
call_count[0] += 1
176+
if call_count[0] <= 1:
177+
raise error
178+
179+
consumer = Consumer(None, TEST_API_KEY, retries=3)
180+
with (
181+
mock.patch("posthog.consumer.batch_post", side_effect=mock_post),
182+
mock.patch("posthog.consumer.time.sleep") as mock_sleep,
183+
):
184+
consumer.request([_track_event()])
185+
mock_sleep.assert_called_once_with(5.0)
186+
187+
def test_request_uses_exponential_backoff_without_retry_after(self) -> None:
188+
error = APIError(503, "Service Unavailable")
189+
call_count = [0]
190+
191+
def mock_post(*args: Any, **kwargs: Any) -> None:
192+
call_count[0] += 1
193+
if call_count[0] <= 3:
194+
raise error
195+
196+
consumer = Consumer(None, TEST_API_KEY, retries=3)
197+
with (
198+
mock.patch("posthog.consumer.batch_post", side_effect=mock_post),
199+
mock.patch("posthog.consumer.time.sleep") as mock_sleep,
200+
):
201+
consumer.request([_track_event()])
202+
self.assertEqual(
203+
mock_sleep.call_args_list,
204+
[
205+
mock.call(1), # 2^0
206+
mock.call(2), # 2^1
207+
mock.call(4), # 2^2
208+
],
209+
)
210+
211+
def test_request_retries_on_408(self) -> None:
212+
call_count = [0]
213+
214+
def mock_post(*args: Any, **kwargs: Any) -> None:
215+
call_count[0] += 1
216+
if call_count[0] <= 1:
217+
raise APIError(408, "Request Timeout")
218+
219+
consumer = Consumer(None, TEST_API_KEY, retries=3)
220+
with (
221+
mock.patch("posthog.consumer.batch_post", side_effect=mock_post),
222+
mock.patch("posthog.consumer.time.sleep"),
223+
):
224+
consumer.request([_track_event()])
225+
self.assertEqual(call_count[0], 2)
226+
170227
@parameterized.expand(
171228
[
172229
("on_error_succeeds", False),

0 commit comments

Comments
 (0)