fix: add Overpass mirror fallback to eliminate 504 failures
Adds OVERPASS_MIRRORS list (overpass-api.de, kumi.systems, openstreetmap.ru) and a post_overpass() helper in _shared.py that tries mirrors in order, logging a warning on each failure and re-raising the last RequestException if all mirrors fail. Both overpass.py and overpass_tennis.py now call post_overpass() instead of hard-coding the primary URL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,15 @@ LANDING_DIR = Path(os.environ.get("LANDING_DIR", "data/landing"))
|
||||
HTTP_TIMEOUT_SECONDS = 30
|
||||
OVERPASS_TIMEOUT_SECONDS = 90 # Overpass can be slow on global queries
|
||||
|
||||
# Public mirrors running the same Overpass API software — tried in order on failure
|
||||
OVERPASS_MIRRORS = [
|
||||
"https://overpass-api.de/api/interpreter",
|
||||
"https://overpass.kumi.systems/api/interpreter",
|
||||
"https://overpass.openstreetmap.ru/api/interpreter",
|
||||
]
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
# Realistic browser User-Agent — avoids bot detection on all extractors
|
||||
USER_AGENT = (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
@@ -27,6 +36,30 @@ USER_AGENT = (
|
||||
)
|
||||
|
||||
|
||||
def post_overpass(
|
||||
session: niquests.Session,
|
||||
query: str,
|
||||
timeout_seconds: int,
|
||||
) -> niquests.Response:
|
||||
"""POST an Overpass QL query, trying mirrors in order.
|
||||
|
||||
On success returns the response. On all-mirrors-fail re-raises the last
|
||||
RequestException so callers with retry logic (e.g. overpass_tennis) keep
|
||||
working unchanged.
|
||||
"""
|
||||
last_exc: Exception | None = None
|
||||
for url in OVERPASS_MIRRORS:
|
||||
try:
|
||||
resp = session.post(url, data={"data": query}, timeout=timeout_seconds)
|
||||
resp.raise_for_status()
|
||||
return resp
|
||||
except niquests.exceptions.RequestException as exc:
|
||||
_log.warning("Overpass mirror %s failed: %s — trying next", url, exc)
|
||||
last_exc = exc
|
||||
assert last_exc is not None
|
||||
raise last_exc
|
||||
|
||||
|
||||
def setup_logging(name: str) -> logging.Logger:
|
||||
"""Configure and return a logger for the given extractor module."""
|
||||
logging.basicConfig(
|
||||
|
||||
@@ -11,13 +11,12 @@ from pathlib import Path
|
||||
|
||||
import niquests
|
||||
|
||||
from ._shared import OVERPASS_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||
from ._shared import OVERPASS_TIMEOUT_SECONDS, post_overpass, run_extractor, setup_logging
|
||||
from .utils import landing_path, write_gzip_atomic
|
||||
|
||||
logger = setup_logging("padelnomics.extract.overpass")
|
||||
|
||||
EXTRACTOR_NAME = "overpass"
|
||||
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||
|
||||
OVERPASS_QUERY = (
|
||||
"[out:json][timeout:60];\n"
|
||||
@@ -41,13 +40,8 @@ def extract(
|
||||
dest_dir = landing_path(landing_dir, "overpass", year, month)
|
||||
dest = dest_dir / "courts.json.gz"
|
||||
|
||||
logger.info("POST %s", OVERPASS_URL)
|
||||
resp = session.post(
|
||||
OVERPASS_URL,
|
||||
data={"data": OVERPASS_QUERY},
|
||||
timeout=OVERPASS_TIMEOUT_SECONDS,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
logger.info("POST Overpass (with mirror fallback)")
|
||||
resp = post_overpass(session, OVERPASS_QUERY, OVERPASS_TIMEOUT_SECONDS)
|
||||
|
||||
size_bytes = len(resp.content)
|
||||
logger.info("%s bytes received", f"{size_bytes:,}")
|
||||
|
||||
@@ -21,13 +21,12 @@ from pathlib import Path
|
||||
|
||||
import niquests
|
||||
|
||||
from ._shared import run_extractor, setup_logging
|
||||
from ._shared import post_overpass, run_extractor, setup_logging
|
||||
from .utils import compress_jsonl_atomic, landing_path, load_partial_results
|
||||
|
||||
logger = setup_logging("padelnomics.extract.overpass_tennis")
|
||||
|
||||
EXTRACTOR_NAME = "overpass_tennis"
|
||||
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||
|
||||
# Each region is [south, west, north, east] — Overpass bbox format
|
||||
REGIONS = [
|
||||
@@ -63,14 +62,9 @@ def _region_query(bbox: str) -> str:
|
||||
|
||||
|
||||
def _query_region(session: niquests.Session, region: dict) -> list[dict]:
|
||||
"""POST one regional Overpass query. Returns list of OSM elements."""
|
||||
"""POST one regional Overpass query, with mirror fallback. Returns OSM elements."""
|
||||
query = _region_query(region["bbox"])
|
||||
resp = session.post(
|
||||
OVERPASS_URL,
|
||||
data={"data": query},
|
||||
timeout=REGION_TIMEOUT_SECONDS,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
resp = post_overpass(session, query, REGION_TIMEOUT_SECONDS)
|
||||
return resp.json().get("elements", [])
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user