Skip to content

Commit 3dbfa28

Browse files
committed
feat(poller): honor Retry-After on HTTP 429 with bounded retries
Replace the single-shot 4xx raise in fetch_page with up to 3 attempts. Honor server Retry-After (RFC 9110 §10.2.3, both delta-seconds and all three HTTP-date forms, never capped) and fall back to exponential backoff capped at 60s when the header is missing or unparseable. Final give-up still raises NEMPException so existing error handling keeps working. Per-attempt cool-downs log at INFO with attempt counter; exhaustion logs at WARNING. Fixes recurring HTTP 429 polling failures for the seven raw.githubusercontent.com hosted mods.
1 parent 5b821fb commit 3dbfa28

2 files changed

Lines changed: 202 additions & 11 deletions

File tree

mod_polling/poller.py

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
import logging
55
import re
66
import urllib.parse
7+
from datetime import UTC, datetime
78

89
import aiohttp
910
import yaml
11+
from aiohttp.helpers import parse_http_date
1012
from jinja2 import Environment, FileSystemLoader, select_autoescape
1113

1214
logger = logging.getLogger("mod_polling.poller")
@@ -21,6 +23,29 @@ def __str__(self):
2123
return repr(self.args[0])
2224

2325

26+
MAX_FETCH_ATTEMPTS = 3
27+
RETRY_AFTER_FALLBACK_CAP = 60.0
28+
29+
30+
def _retry_after_seconds(value: str | None) -> float | None:
31+
"""Parse a Retry-After header value (RFC 9110 §10.2.3) into a non-negative
32+
seconds delta, or ``None`` if missing/unparseable.
33+
34+
Accepts delta-seconds or HTTP-date in any of the three forms allowed by
35+
§5.6.7 (IMF-fixdate, RFC 850, or asctime).
36+
"""
37+
if not value:
38+
return None
39+
try:
40+
return max(0.0, float(value))
41+
except ValueError:
42+
pass
43+
dt = parse_http_date(value)
44+
if dt is None:
45+
return None
46+
return max(0.0, (dt - datetime.now(UTC)).total_seconds())
47+
48+
2449
class ModPoller:
2550
def __init__(self):
2651
self.nem_versions = []
@@ -48,19 +73,44 @@ async def fetch_page(self, url, timeout=10, decode_json=False, ratelimit=True, *
4873
lock = self._host_locks[host] if ratelimit else contextlib.nullcontext()
4974

5075
async with lock:
51-
async with self.session.get(url, timeout=aiohttp.ClientTimeout(total=timeout), **kwargs) as response:
52-
if 400 <= response.status < 500:
53-
raise NEMPException(f"HTTP {response.status} for {url}")
54-
response.raise_for_status()
76+
for attempt in range(MAX_FETCH_ATTEMPTS):
77+
delay = None
78+
async with self.session.get(url, timeout=aiohttp.ClientTimeout(total=timeout), **kwargs) as response:
79+
if response.status == 429:
80+
if attempt == MAX_FETCH_ATTEMPTS - 1:
81+
logger.warning(
82+
"Got HTTP 429 for %s and exhausted %d attempts; giving up",
83+
url,
84+
MAX_FETCH_ATTEMPTS,
85+
)
86+
raise NEMPException(f"HTTP 429 for {url}")
87+
delay = _retry_after_seconds(response.headers.get("Retry-After"))
88+
if delay is None:
89+
delay = min(self._host_delay * (2**attempt), RETRY_AFTER_FALLBACK_CAP)
90+
logger.info(
91+
"Got HTTP 429 for %s; sleeping %.1fs before retry (attempt %d/%d)",
92+
url,
93+
delay,
94+
attempt + 1,
95+
MAX_FETCH_ATTEMPTS,
96+
)
97+
elif 400 <= response.status < 500:
98+
raise NEMPException(f"HTTP {response.status} for {url}")
99+
else:
100+
response.raise_for_status()
55101

56-
if decode_json:
57-
result = await response.json(content_type=None)
58-
else:
59-
result = await response.text()
102+
if decode_json:
103+
result = await response.json(content_type=None)
104+
else:
105+
result = await response.text()
60106

61-
if ratelimit:
62-
await asyncio.sleep(self._host_delay)
63-
return result
107+
if ratelimit:
108+
await asyncio.sleep(self._host_delay)
109+
return result
110+
111+
# Outside response context (socket returned to pool); only
112+
# reached via the 429 branch above, which always sets `delay`.
113+
await asyncio.sleep(delay)
64114

65115
async def fetch_json(self, *args, **kwargs):
66116
return await self.fetch_page(*args, decode_json=True, **kwargs)

tests/test_poller_fetch.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from datetime import UTC, datetime, timedelta
2+
from email.utils import format_datetime
3+
from unittest.mock import AsyncMock, patch
4+
15
import aiohttp
26
import pytest
37
from aioresponses import aioresponses
@@ -45,6 +49,143 @@ async def test_timeout_propagation(self, poller_with_session):
4549
await poller_with_session.fetch_page("https://example.com/slow", timeout=1)
4650

4751

52+
class TestFetchPageRetryAfter:
53+
async def test_429_with_integer_retry_after_waits_and_retries(self, poller_with_session):
54+
url = "https://example.com/throttled"
55+
with aioresponses() as mocked:
56+
mocked.get(url, status=429, headers={"Retry-After": "5"})
57+
mocked.get(url, body="recovered")
58+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
59+
result = await poller_with_session.fetch_page(url)
60+
assert result == "recovered"
61+
mock_sleep.assert_any_await(5.0)
62+
63+
async def test_429_with_http_date_retry_after_waits_and_retries(self, poller_with_session):
64+
url = "https://example.com/throttled-date"
65+
future = datetime.now(UTC) + timedelta(seconds=30)
66+
http_date = format_datetime(future, usegmt=True)
67+
with aioresponses() as mocked:
68+
mocked.get(url, status=429, headers={"Retry-After": http_date})
69+
mocked.get(url, body="recovered")
70+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
71+
result = await poller_with_session.fetch_page(url)
72+
assert result == "recovered"
73+
slept = [call.args[0] for call in mock_sleep.await_args_list]
74+
assert any(29 <= v <= 31 for v in slept), f"Expected ~30s sleep, got {slept}"
75+
76+
async def test_429_with_asctime_obs_date_retry_after(self, poller_with_session):
77+
"""RFC 9110 §5.6.7 obs-date allows asctime form, which has no timezone."""
78+
url = "https://example.com/throttled-asctime"
79+
future = datetime.now(UTC) + timedelta(seconds=20)
80+
asctime_str = future.strftime("%a %b %d %H:%M:%S %Y")
81+
with aioresponses() as mocked:
82+
mocked.get(url, status=429, headers={"Retry-After": asctime_str})
83+
mocked.get(url, body="recovered")
84+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
85+
result = await poller_with_session.fetch_page(url)
86+
assert result == "recovered"
87+
slept = [call.args[0] for call in mock_sleep.await_args_list]
88+
assert any(18 <= v <= 22 for v in slept), f"Expected ~20s sleep, got {slept}"
89+
90+
async def test_429_without_retry_after_uses_exponential_backoff_fallback(self, poller_with_session):
91+
poller_with_session._host_delay = 0.5
92+
url = "https://example.com/throttled-no-header"
93+
with aioresponses() as mocked:
94+
mocked.get(url, status=429)
95+
mocked.get(url, body="recovered")
96+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
97+
result = await poller_with_session.fetch_page(url)
98+
assert result == "recovered"
99+
# Backoff fires BEFORE the retry, so it's the first sleep call.
100+
# attempt 0 fallback: min(0.5 * 2^0, 60) = 0.5
101+
slept = [c.args[0] for c in mock_sleep.await_args_list]
102+
assert slept[0] == 0.5, f"Expected backoff 0.5 as first sleep, got: {slept}"
103+
104+
async def test_429_with_garbled_retry_after_uses_exponential_backoff_fallback(self, poller_with_session):
105+
poller_with_session._host_delay = 0.5
106+
url = "https://example.com/throttled-garbled"
107+
with aioresponses() as mocked:
108+
mocked.get(url, status=429, headers={"Retry-After": "soon-ish"})
109+
mocked.get(url, body="recovered")
110+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
111+
result = await poller_with_session.fetch_page(url)
112+
assert result == "recovered"
113+
slept = [c.args[0] for c in mock_sleep.await_args_list]
114+
assert slept[0] == 0.5, f"Expected backoff 0.5 as first sleep, got: {slept}"
115+
116+
async def test_429_fallback_caps_at_60s_with_large_host_delay(self, poller_with_session):
117+
poller_with_session._host_delay = 1000
118+
url = "https://example.com/throttled-big-delay"
119+
with aioresponses() as mocked:
120+
mocked.get(url, status=429)
121+
mocked.get(url, body="recovered")
122+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
123+
result = await poller_with_session.fetch_page(url)
124+
assert result == "recovered"
125+
# min(1000 * 2^0, 60) = 60 — the cap kicks in
126+
mock_sleep.assert_any_await(60.0)
127+
128+
async def test_429_then_429_then_success(self, poller_with_session):
129+
url = "https://example.com/twice-throttled"
130+
with aioresponses() as mocked:
131+
mocked.get(url, status=429, headers={"Retry-After": "5"})
132+
mocked.get(url, status=429, headers={"Retry-After": "3"})
133+
mocked.get(url, body="finally")
134+
with patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
135+
result = await poller_with_session.fetch_page(url)
136+
assert result == "finally"
137+
mock_sleep.assert_any_await(5.0)
138+
mock_sleep.assert_any_await(3.0)
139+
140+
async def test_429_three_times_raises_after_two_retries(self, poller_with_session):
141+
url = "https://example.com/persistently-throttled"
142+
with aioresponses() as mocked:
143+
mocked.get(url, status=429, headers={"Retry-After": "1"})
144+
mocked.get(url, status=429, headers={"Retry-After": "1"})
145+
mocked.get(url, status=429, headers={"Retry-After": "1"})
146+
with (
147+
patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock),
148+
pytest.raises(NEMPException, match="HTTP 429"),
149+
):
150+
await poller_with_session.fetch_page(url)
151+
152+
async def test_429_exhausted_logs_warning(self, poller_with_session, caplog):
153+
url = "https://example.com/exhausted"
154+
with aioresponses() as mocked:
155+
mocked.get(url, status=429, headers={"Retry-After": "1"})
156+
mocked.get(url, status=429, headers={"Retry-After": "1"})
157+
mocked.get(url, status=429, headers={"Retry-After": "1"})
158+
with (
159+
patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock),
160+
caplog.at_level("WARNING", logger="mod_polling.poller"),
161+
pytest.raises(NEMPException),
162+
):
163+
await poller_with_session.fetch_page(url)
164+
matching = [
165+
r
166+
for r in caplog.records
167+
if r.levelname == "WARNING" and url in r.getMessage() and "exhausted" in r.getMessage().lower()
168+
]
169+
messages = [r.getMessage() for r in caplog.records]
170+
assert matching, f"Expected WARNING log mentioning exhausted attempts, got: {messages}"
171+
172+
async def test_429_logs_cooldown_at_info(self, poller_with_session, caplog):
173+
url = "https://example.com/throttled-logged"
174+
with aioresponses() as mocked:
175+
mocked.get(url, status=429, headers={"Retry-After": "7"})
176+
mocked.get(url, body="recovered")
177+
with (
178+
patch("mod_polling.poller.asyncio.sleep", new_callable=AsyncMock),
179+
caplog.at_level("INFO", logger="mod_polling.poller"),
180+
):
181+
await poller_with_session.fetch_page(url)
182+
matching = [
183+
r for r in caplog.records if r.levelname == "INFO" and url in r.getMessage() and "7" in r.getMessage()
184+
]
185+
messages = [r.getMessage() for r in caplog.records]
186+
assert matching, f"Expected INFO log mentioning URL and duration, got: {messages}"
187+
188+
48189
class TestFetchJson:
49190
async def test_delegates_to_fetch_page(self, poller_with_session):
50191
with aioresponses() as mocked:

0 commit comments

Comments
 (0)