6868FETCH_LIMIT = 5
6969
7070
71+ class UpstreamRateLimit (RuntimeError ):
72+ def __init__ (self , message : str , retry_after : float | None = None ):
73+ super ().__init__ (message )
74+ self .retry_after = retry_after
75+
76+
77+ def _retry_after_seconds (error : HTTPError ) -> float | None :
78+ raw = error .headers .get ("Retry-After" ) if error .headers else None
79+ if not raw :
80+ return None
81+ try :
82+ return max (0.0 , float (raw ))
83+ except ValueError :
84+ return None
85+
86+
7187def _load_stations () -> list [dict ]:
7288 here = os .path .dirname (os .path .abspath (__file__ ))
7389 with open (os .path .join (here , "stations.json" )) as f :
@@ -106,6 +122,11 @@ def fetch_continuous_values(nwis_id: str, parameter_code: str,
106122 except HTTPError as e :
107123 if e .code == 404 :
108124 return []
125+ if e .code == 429 :
126+ raise UpstreamRateLimit (
127+ f"HTTP 429 Too Many Requests for { nwis_id } /{ parameter_code } " ,
128+ _retry_after_seconds (e ),
129+ ) from e
109130 raise
110131 except Exception as e :
111132 print (f" [WARN] USGS fetch failed for { nwis_id } /{ parameter_code } : { e } " )
@@ -185,6 +206,9 @@ def __init__(self, station_filter: list[str] | None = None):
185206
186207 # Track last observation timestamp per (station, param) to avoid duplicates
187208 self ._last_obs_ts : dict [str , float ] = {}
209+ self ._usgs_cooldown_until = 0.0
210+ self ._request_delay = float (os .environ .get ("USGS_REQUEST_DELAY" , "2.0" ))
211+ self ._rate_limit_backoff = float (os .environ .get ("USGS_429_BACKOFF" , "900" ))
188212
189213 # REST config
190214 self ._base_url = os .environ .get (
@@ -327,6 +351,12 @@ def publish_cycle(self, dry_run: bool = False) -> int:
327351 station_ds = self ._ds_ids .get (nwis_id , {})
328352
329353 for param_code in st .get ("parameterCodes" , []):
354+ cooldown_remaining = self ._usgs_cooldown_until - time .time ()
355+ if cooldown_remaining > 0 :
356+ self .stats ["skipped" ] += 1
357+ print (f" [{ ts_label } ] USGS cooldown active; skipping fetches for { cooldown_remaining :.0f} s" )
358+ return published
359+
330360 ds_id = station_ds .get (param_code )
331361 if ds_id is None and not dry_run :
332362 continue
@@ -338,6 +368,12 @@ def publish_cycle(self, dry_run: bool = False) -> int:
338368 values = fetch_continuous_values (
339369 nwis_id , param_code ,
340370 api_key = self .api_key , limit = 1 )
371+ except UpstreamRateLimit as e :
372+ backoff = e .retry_after or self ._rate_limit_backoff
373+ self ._usgs_cooldown_until = time .time () + backoff
374+ self .stats ["skipped" ] += 1
375+ print (f" [{ ts_label } ] { nwis_id } /{ param_code } : RATE LIMITED; backing off { backoff :.0f} s" )
376+ return published
341377 except Exception as e :
342378 self .stats ["errors" ] += 1
343379 print (f" [{ ts_label } ] { nwis_id } /{ param_code } : FETCH ERR { e } " )
@@ -389,8 +425,7 @@ def publish_cycle(self, dry_run: bool = False) -> int:
389425 self .stats ["errors" ] += 1
390426 print (f" [{ ts_label } ] { nwis_id } /{ param_code } : ERR { e } " )
391427
392- # Be polite to USGS API
393- time .sleep (0.3 )
428+ time .sleep (self ._request_delay )
394429
395430 return published
396431
0 commit comments