fix: add Overpass mirror fallback to eliminate 504 failures

Adds OVERPASS_MIRRORS list (overpass-api.de, kumi.systems, openstreetmap.ru)
and a post_overpass() helper in _shared.py that tries mirrors in order,
logging a warning on each failure and re-raising the last RequestException
if all mirrors fail. Both overpass.py and overpass_tennis.py now call
post_overpass() instead of hard-coding the primary URL.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-25 21:29:51 +01:00
parent 61a3335197
commit 73330b1aaa
3 changed files with 39 additions and 18 deletions

View File

@@ -19,6 +19,15 @@ LANDING_DIR = Path(os.environ.get("LANDING_DIR", "data/landing"))
HTTP_TIMEOUT_SECONDS = 30
OVERPASS_TIMEOUT_SECONDS = 90 # Overpass can be slow on global queries
# Public mirrors running the same Overpass API software — tried in order on failure
OVERPASS_MIRRORS = [
"https://overpass-api.de/api/interpreter",
"https://overpass.kumi.systems/api/interpreter",
"https://overpass.openstreetmap.ru/api/interpreter",
]
_log = logging.getLogger(__name__)
# Realistic browser User-Agent — avoids bot detection on all extractors
USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
@@ -27,6 +36,30 @@ USER_AGENT = (
)
def post_overpass(
session: niquests.Session,
query: str,
timeout_seconds: int,
) -> niquests.Response:
"""POST an Overpass QL query, trying mirrors in order.
On success returns the response. On all-mirrors-fail re-raises the last
RequestException so callers with retry logic (e.g. overpass_tennis) keep
working unchanged.
"""
last_exc: Exception | None = None
for url in OVERPASS_MIRRORS:
try:
resp = session.post(url, data={"data": query}, timeout=timeout_seconds)
resp.raise_for_status()
return resp
except niquests.exceptions.RequestException as exc:
_log.warning("Overpass mirror %s failed: %s — trying next", url, exc)
last_exc = exc
assert last_exc is not None
raise last_exc
def setup_logging(name: str) -> logging.Logger:
"""Configure and return a logger for the given extractor module."""
logging.basicConfig(

View File

@@ -11,13 +11,12 @@ from pathlib import Path
import niquests
from ._shared import OVERPASS_TIMEOUT_SECONDS, run_extractor, setup_logging
from ._shared import OVERPASS_TIMEOUT_SECONDS, post_overpass, run_extractor, setup_logging
from .utils import landing_path, write_gzip_atomic
logger = setup_logging("padelnomics.extract.overpass")
EXTRACTOR_NAME = "overpass"
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
OVERPASS_QUERY = (
"[out:json][timeout:60];\n"
@@ -41,13 +40,8 @@ def extract(
dest_dir = landing_path(landing_dir, "overpass", year, month)
dest = dest_dir / "courts.json.gz"
logger.info("POST %s", OVERPASS_URL)
resp = session.post(
OVERPASS_URL,
data={"data": OVERPASS_QUERY},
timeout=OVERPASS_TIMEOUT_SECONDS,
)
resp.raise_for_status()
logger.info("POST Overpass (with mirror fallback)")
resp = post_overpass(session, OVERPASS_QUERY, OVERPASS_TIMEOUT_SECONDS)
size_bytes = len(resp.content)
logger.info("%s bytes received", f"{size_bytes:,}")

View File

@@ -21,13 +21,12 @@ from pathlib import Path
import niquests
from ._shared import run_extractor, setup_logging
from ._shared import post_overpass, run_extractor, setup_logging
from .utils import compress_jsonl_atomic, landing_path, load_partial_results
logger = setup_logging("padelnomics.extract.overpass_tennis")
EXTRACTOR_NAME = "overpass_tennis"
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
# Each region is [south, west, north, east] — Overpass bbox format
REGIONS = [
@@ -63,14 +62,9 @@ def _region_query(bbox: str) -> str:
def _query_region(session: niquests.Session, region: dict) -> list[dict]:
"""POST one regional Overpass query. Returns list of OSM elements."""
"""POST one regional Overpass query, with mirror fallback. Returns OSM elements."""
query = _region_query(region["bbox"])
resp = session.post(
OVERPASS_URL,
data={"data": query},
timeout=REGION_TIMEOUT_SECONDS,
)
resp.raise_for_status()
resp = post_overpass(session, query, REGION_TIMEOUT_SECONDS)
return resp.json().get("elements", [])