fix: replace broken bbox pagination with global page-based extraction

Playtomic API ignores bbox params (min_latitude, etc.) and offset param.
Discovered that `page` param works correctly for global enumeration.

Result: 14,202 venues across 82 countries (was 100 with bbox approach).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 01:16:35 +01:00
parent 4e8d94de47
commit a055660cd2

View File

@@ -1,7 +1,14 @@
"""Playtomic tenants extractor — venue listings via unauthenticated API.
Iterates over target-market bounding boxes with pagination, deduplicates
on tenant_id, and writes a single consolidated JSON to the landing zone.
Paginates through the global tenant list (sorted by UUID) using the `page`
parameter. Deduplicates on tenant_id and writes a single consolidated JSON
to the landing zone.
API notes (discovered 2026-02):
- bbox params (min_latitude etc.) are silently ignored by the API
- `offset` param is ignored; `page` param works correctly
- `size=100` is the maximum effective page size
- ~14K venues globally as of Feb 2026
Rate: 1 req / 2 s (see docs/data-sources-inventory.md §1.2).
@@ -25,89 +32,7 @@ PLAYTOMIC_TENANTS_URL = "https://api.playtomic.io/v1/tenants"
THROTTLE_SECONDS = 2
PAGE_SIZE = 100
MAX_PAGES_PER_BBOX = 500 # safety bound — prevents infinite pagination
MAX_STALE_PAGES = 3 # stop after N consecutive pages with zero new results
# Global padel markets — bounding boxes sized to stay under API's internal result cap.
# Large countries (Spain, Italy, USA) are split into sub-regions.
BBOXES = [
# Spain — south (Andalusia, Murcia, Valencia)
{"min_latitude": 35.95, "min_longitude": -9.39, "max_latitude": 39.87, "max_longitude": 4.33},
# Spain — north (Madrid, Catalonia, Basque Country)
{"min_latitude": 39.87, "min_longitude": -9.39, "max_latitude": 43.79, "max_longitude": 4.33},
# UK & Ireland
{"min_latitude": 49.90, "min_longitude": -8.62, "max_latitude": 60.85, "max_longitude": 1.77},
# Germany
{"min_latitude": 47.27, "min_longitude": 5.87, "max_latitude": 55.06, "max_longitude": 15.04},
# France
{"min_latitude": 41.36, "min_longitude": -5.14, "max_latitude": 51.09, "max_longitude": 9.56},
# Italy — south (Rome, Naples, Sicily, Sardinia)
{"min_latitude": 36.35, "min_longitude": 6.62, "max_latitude": 42.00, "max_longitude": 18.51},
# Italy — north (Milan, Turin, Venice, Bologna)
{"min_latitude": 42.00, "min_longitude": 6.62, "max_latitude": 47.09, "max_longitude": 18.51},
# Portugal
{"min_latitude": 37.00, "min_longitude": -9.50, "max_latitude": 42.15, "max_longitude": -6.19},
# Netherlands
{"min_latitude": 50.75, "min_longitude": 3.37, "max_latitude": 53.47, "max_longitude": 7.21},
# Belgium
{"min_latitude": 49.50, "min_longitude": 2.55, "max_latitude": 51.50, "max_longitude": 6.40},
# Austria
{"min_latitude": 46.37, "min_longitude": 9.53, "max_latitude": 49.02, "max_longitude": 17.16},
# Switzerland
{"min_latitude": 45.82, "min_longitude": 5.96, "max_latitude": 47.80, "max_longitude": 10.49},
# Sweden
{"min_latitude": 55.34, "min_longitude": 11.11, "max_latitude": 69.06, "max_longitude": 24.16},
# Denmark
{"min_latitude": 54.56, "min_longitude": 8.09, "max_latitude": 57.75, "max_longitude": 12.69},
# Norway
{"min_latitude": 57.97, "min_longitude": 4.50, "max_latitude": 71.19, "max_longitude": 31.17},
# Finland
{"min_latitude": 59.81, "min_longitude": 20.55, "max_latitude": 70.09, "max_longitude": 31.59},
# Mexico
{
"min_latitude": 14.53,
"min_longitude": -118.37,
"max_latitude": 32.72,
"max_longitude": -86.71,
},
# Argentina
{
"min_latitude": -55.06,
"min_longitude": -73.56,
"max_latitude": -21.78,
"max_longitude": -53.63,
},
# Middle East (UAE, Qatar, Saudi Arabia, Bahrain)
{"min_latitude": 21.00, "min_longitude": 38.00, "max_latitude": 32.00, "max_longitude": 56.50},
# USA — southwest (California, Arizona, Texas west)
{
"min_latitude": 24.50,
"min_longitude": -125.00,
"max_latitude": 37.00,
"max_longitude": -100.00,
},
# USA — southeast (Florida, Texas east, Georgia)
{
"min_latitude": 24.50,
"min_longitude": -100.00,
"max_latitude": 37.00,
"max_longitude": -66.95,
},
# USA — northwest
{
"min_latitude": 37.00,
"min_longitude": -125.00,
"max_latitude": 49.38,
"max_longitude": -100.00,
},
# USA — northeast (New York, Chicago, Boston)
{
"min_latitude": 37.00,
"min_longitude": -100.00,
"max_latitude": 49.38,
"max_longitude": -66.95,
},
]
MAX_PAGES = 500 # safety bound — ~50K venues max, well above current ~14K
def extract(
@@ -116,7 +41,7 @@ def extract(
conn: sqlite3.Connection,
session: niquests.Session,
) -> dict:
"""Fetch all Playtomic venues across target markets. Returns run metrics."""
"""Fetch all Playtomic venues via global pagination. Returns run metrics."""
year, month = year_month.split("/")
dest_dir = landing_path(landing_dir, "playtomic", year, month)
dest = dest_dir / "tenants.json.gz"
@@ -124,27 +49,14 @@ def extract(
all_tenants: list[dict] = []
seen_ids: set[str] = set()
for bbox in BBOXES:
stale_pages = 0
for page in range(MAX_PAGES_PER_BBOX):
for page in range(MAX_PAGES):
params = {
"sport_ids": "PADEL",
"min_latitude": bbox["min_latitude"],
"min_longitude": bbox["min_longitude"],
"max_latitude": bbox["max_latitude"],
"max_longitude": bbox["max_longitude"],
"offset": page * PAGE_SIZE,
"size": PAGE_SIZE,
"page": page,
}
logger.info(
"GET page=%d bbox=(%.1f,%.1f,%.1f,%.1f)",
page,
bbox["min_latitude"],
bbox["min_longitude"],
bbox["max_latitude"],
bbox["max_longitude"],
)
logger.info("GET page=%d (total so far: %d)", page, len(all_tenants))
resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS)
resp.raise_for_status()
@@ -166,18 +78,10 @@ def extract(
"page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants)
)
# Last page — fewer than PAGE_SIZE results means we've exhausted the list
if len(tenants) < PAGE_SIZE:
break
# API recycles results past its internal limit — stop early
if new_count == 0:
stale_pages += 1
if stale_pages >= MAX_STALE_PAGES:
logger.info("stopping bbox after %d stale pages", stale_pages)
break
else:
stale_pages = 0
time.sleep(THROTTLE_SECONDS)
payload = json.dumps({"tenants": all_tenants, "count": len(all_tenants)}).encode()