From a055660cd258f0a0ec3a3cd040fc02100908e995 Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 23 Feb 2026 01:16:35 +0100 Subject: [PATCH] fix: replace broken bbox pagination with global page-based extraction Playtomic API ignores bbox params (min_latitude, etc.) and offset param. Discovered that `page` param works correctly for global enumeration. Result: 14,202 venues across 82 countries (was 100 with bbox approach). Co-Authored-By: Claude Opus 4.6 --- .../padelnomics_extract/playtomic_tenants.py | 172 ++++-------------- 1 file changed, 38 insertions(+), 134 deletions(-) diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py index cd8b343..a80636a 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py @@ -1,7 +1,14 @@ """Playtomic tenants extractor — venue listings via unauthenticated API. -Iterates over target-market bounding boxes with pagination, deduplicates -on tenant_id, and writes a single consolidated JSON to the landing zone. +Paginates through the global tenant list (sorted by UUID) using the `page` +parameter. Deduplicates on tenant_id and writes a single consolidated JSON +to the landing zone. + +API notes (discovered 2026-02): + - bbox params (min_latitude etc.) are silently ignored by the API + - `offset` param is ignored; `page` param works correctly + - `size=100` is the maximum effective page size + - ~14K venues globally as of Feb 2026 Rate: 1 req / 2 s (see docs/data-sources-inventory.md §1.2). @@ -25,89 +32,7 @@ PLAYTOMIC_TENANTS_URL = "https://api.playtomic.io/v1/tenants" THROTTLE_SECONDS = 2 PAGE_SIZE = 100 -MAX_PAGES_PER_BBOX = 500 # safety bound — prevents infinite pagination -MAX_STALE_PAGES = 3 # stop after N consecutive pages with zero new results - -# Global padel markets — bounding boxes sized to stay under API's internal result cap. -# Large countries (Spain, Italy, USA) are split into sub-regions. -BBOXES = [ - # Spain — south (Andalusia, Murcia, Valencia) - {"min_latitude": 35.95, "min_longitude": -9.39, "max_latitude": 39.87, "max_longitude": 4.33}, - # Spain — north (Madrid, Catalonia, Basque Country) - {"min_latitude": 39.87, "min_longitude": -9.39, "max_latitude": 43.79, "max_longitude": 4.33}, - # UK & Ireland - {"min_latitude": 49.90, "min_longitude": -8.62, "max_latitude": 60.85, "max_longitude": 1.77}, - # Germany - {"min_latitude": 47.27, "min_longitude": 5.87, "max_latitude": 55.06, "max_longitude": 15.04}, - # France - {"min_latitude": 41.36, "min_longitude": -5.14, "max_latitude": 51.09, "max_longitude": 9.56}, - # Italy — south (Rome, Naples, Sicily, Sardinia) - {"min_latitude": 36.35, "min_longitude": 6.62, "max_latitude": 42.00, "max_longitude": 18.51}, - # Italy — north (Milan, Turin, Venice, Bologna) - {"min_latitude": 42.00, "min_longitude": 6.62, "max_latitude": 47.09, "max_longitude": 18.51}, - # Portugal - {"min_latitude": 37.00, "min_longitude": -9.50, "max_latitude": 42.15, "max_longitude": -6.19}, - # Netherlands - {"min_latitude": 50.75, "min_longitude": 3.37, "max_latitude": 53.47, "max_longitude": 7.21}, - # Belgium - {"min_latitude": 49.50, "min_longitude": 2.55, "max_latitude": 51.50, "max_longitude": 6.40}, - # Austria - {"min_latitude": 46.37, "min_longitude": 9.53, "max_latitude": 49.02, "max_longitude": 17.16}, - # Switzerland - {"min_latitude": 45.82, "min_longitude": 5.96, "max_latitude": 47.80, "max_longitude": 10.49}, - # Sweden - {"min_latitude": 55.34, "min_longitude": 11.11, "max_latitude": 69.06, "max_longitude": 24.16}, - # Denmark - {"min_latitude": 54.56, "min_longitude": 8.09, "max_latitude": 57.75, "max_longitude": 12.69}, - # Norway - {"min_latitude": 57.97, "min_longitude": 4.50, "max_latitude": 71.19, "max_longitude": 31.17}, - # Finland - {"min_latitude": 59.81, "min_longitude": 20.55, "max_latitude": 70.09, "max_longitude": 31.59}, - # Mexico - { - "min_latitude": 14.53, - "min_longitude": -118.37, - "max_latitude": 32.72, - "max_longitude": -86.71, - }, - # Argentina - { - "min_latitude": -55.06, - "min_longitude": -73.56, - "max_latitude": -21.78, - "max_longitude": -53.63, - }, - # Middle East (UAE, Qatar, Saudi Arabia, Bahrain) - {"min_latitude": 21.00, "min_longitude": 38.00, "max_latitude": 32.00, "max_longitude": 56.50}, - # USA — southwest (California, Arizona, Texas west) - { - "min_latitude": 24.50, - "min_longitude": -125.00, - "max_latitude": 37.00, - "max_longitude": -100.00, - }, - # USA — southeast (Florida, Texas east, Georgia) - { - "min_latitude": 24.50, - "min_longitude": -100.00, - "max_latitude": 37.00, - "max_longitude": -66.95, - }, - # USA — northwest - { - "min_latitude": 37.00, - "min_longitude": -125.00, - "max_latitude": 49.38, - "max_longitude": -100.00, - }, - # USA — northeast (New York, Chicago, Boston) - { - "min_latitude": 37.00, - "min_longitude": -100.00, - "max_latitude": 49.38, - "max_longitude": -66.95, - }, -] +MAX_PAGES = 500 # safety bound — ~50K venues max, well above current ~14K def extract( @@ -116,7 +41,7 @@ def extract( conn: sqlite3.Connection, session: niquests.Session, ) -> dict: - """Fetch all Playtomic venues across target markets. Returns run metrics.""" + """Fetch all Playtomic venues via global pagination. Returns run metrics.""" year, month = year_month.split("/") dest_dir = landing_path(landing_dir, "playtomic", year, month) dest = dest_dir / "tenants.json.gz" @@ -124,61 +49,40 @@ def extract( all_tenants: list[dict] = [] seen_ids: set[str] = set() - for bbox in BBOXES: - stale_pages = 0 - for page in range(MAX_PAGES_PER_BBOX): - params = { - "sport_ids": "PADEL", - "min_latitude": bbox["min_latitude"], - "min_longitude": bbox["min_longitude"], - "max_latitude": bbox["max_latitude"], - "max_longitude": bbox["max_longitude"], - "offset": page * PAGE_SIZE, - "size": PAGE_SIZE, - } + for page in range(MAX_PAGES): + params = { + "sport_ids": "PADEL", + "size": PAGE_SIZE, + "page": page, + } - logger.info( - "GET page=%d bbox=(%.1f,%.1f,%.1f,%.1f)", - page, - bbox["min_latitude"], - bbox["min_longitude"], - bbox["max_latitude"], - bbox["max_longitude"], - ) + logger.info("GET page=%d (total so far: %d)", page, len(all_tenants)) - resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS) - resp.raise_for_status() + resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS) + resp.raise_for_status() - tenants = resp.json() - assert isinstance(tenants, list), ( - f"Expected list from Playtomic API, got {type(tenants)}" - ) + tenants = resp.json() + assert isinstance(tenants, list), ( + f"Expected list from Playtomic API, got {type(tenants)}" + ) - new_count = 0 - for tenant in tenants: - tid = tenant.get("tenant_id") or tenant.get("id") - if tid and tid not in seen_ids: - seen_ids.add(tid) - all_tenants.append(tenant) - new_count += 1 + new_count = 0 + for tenant in tenants: + tid = tenant.get("tenant_id") or tenant.get("id") + if tid and tid not in seen_ids: + seen_ids.add(tid) + all_tenants.append(tenant) + new_count += 1 - logger.info( - "page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants) - ) + logger.info( + "page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants) + ) - if len(tenants) < PAGE_SIZE: - break + # Last page — fewer than PAGE_SIZE results means we've exhausted the list + if len(tenants) < PAGE_SIZE: + break - # API recycles results past its internal limit — stop early - if new_count == 0: - stale_pages += 1 - if stale_pages >= MAX_STALE_PAGES: - logger.info("stopping bbox after %d stale pages", stale_pages) - break - else: - stale_pages = 0 - - time.sleep(THROTTLE_SECONDS) + time.sleep(THROTTLE_SECONDS) payload = json.dumps({"tenants": all_tenants, "count": len(all_tenants)}).encode() bytes_written = write_gzip_atomic(dest, payload)