fix: replace broken bbox pagination with global page-based extraction
Playtomic API ignores bbox params (min_latitude, etc.) and offset param. Discovered that `page` param works correctly for global enumeration. Result: 14,202 venues across 82 countries (was 100 with bbox approach). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,14 @@
|
||||
"""Playtomic tenants extractor — venue listings via unauthenticated API.
|
||||
|
||||
Iterates over target-market bounding boxes with pagination, deduplicates
|
||||
on tenant_id, and writes a single consolidated JSON to the landing zone.
|
||||
Paginates through the global tenant list (sorted by UUID) using the `page`
|
||||
parameter. Deduplicates on tenant_id and writes a single consolidated JSON
|
||||
to the landing zone.
|
||||
|
||||
API notes (discovered 2026-02):
|
||||
- bbox params (min_latitude etc.) are silently ignored by the API
|
||||
- `offset` param is ignored; `page` param works correctly
|
||||
- `size=100` is the maximum effective page size
|
||||
- ~14K venues globally as of Feb 2026
|
||||
|
||||
Rate: 1 req / 2 s (see docs/data-sources-inventory.md §1.2).
|
||||
|
||||
@@ -25,89 +32,7 @@ PLAYTOMIC_TENANTS_URL = "https://api.playtomic.io/v1/tenants"
|
||||
|
||||
THROTTLE_SECONDS = 2
|
||||
PAGE_SIZE = 100
|
||||
MAX_PAGES_PER_BBOX = 500 # safety bound — prevents infinite pagination
|
||||
MAX_STALE_PAGES = 3 # stop after N consecutive pages with zero new results
|
||||
|
||||
# Global padel markets — bounding boxes sized to stay under API's internal result cap.
|
||||
# Large countries (Spain, Italy, USA) are split into sub-regions.
|
||||
BBOXES = [
|
||||
# Spain — south (Andalusia, Murcia, Valencia)
|
||||
{"min_latitude": 35.95, "min_longitude": -9.39, "max_latitude": 39.87, "max_longitude": 4.33},
|
||||
# Spain — north (Madrid, Catalonia, Basque Country)
|
||||
{"min_latitude": 39.87, "min_longitude": -9.39, "max_latitude": 43.79, "max_longitude": 4.33},
|
||||
# UK & Ireland
|
||||
{"min_latitude": 49.90, "min_longitude": -8.62, "max_latitude": 60.85, "max_longitude": 1.77},
|
||||
# Germany
|
||||
{"min_latitude": 47.27, "min_longitude": 5.87, "max_latitude": 55.06, "max_longitude": 15.04},
|
||||
# France
|
||||
{"min_latitude": 41.36, "min_longitude": -5.14, "max_latitude": 51.09, "max_longitude": 9.56},
|
||||
# Italy — south (Rome, Naples, Sicily, Sardinia)
|
||||
{"min_latitude": 36.35, "min_longitude": 6.62, "max_latitude": 42.00, "max_longitude": 18.51},
|
||||
# Italy — north (Milan, Turin, Venice, Bologna)
|
||||
{"min_latitude": 42.00, "min_longitude": 6.62, "max_latitude": 47.09, "max_longitude": 18.51},
|
||||
# Portugal
|
||||
{"min_latitude": 37.00, "min_longitude": -9.50, "max_latitude": 42.15, "max_longitude": -6.19},
|
||||
# Netherlands
|
||||
{"min_latitude": 50.75, "min_longitude": 3.37, "max_latitude": 53.47, "max_longitude": 7.21},
|
||||
# Belgium
|
||||
{"min_latitude": 49.50, "min_longitude": 2.55, "max_latitude": 51.50, "max_longitude": 6.40},
|
||||
# Austria
|
||||
{"min_latitude": 46.37, "min_longitude": 9.53, "max_latitude": 49.02, "max_longitude": 17.16},
|
||||
# Switzerland
|
||||
{"min_latitude": 45.82, "min_longitude": 5.96, "max_latitude": 47.80, "max_longitude": 10.49},
|
||||
# Sweden
|
||||
{"min_latitude": 55.34, "min_longitude": 11.11, "max_latitude": 69.06, "max_longitude": 24.16},
|
||||
# Denmark
|
||||
{"min_latitude": 54.56, "min_longitude": 8.09, "max_latitude": 57.75, "max_longitude": 12.69},
|
||||
# Norway
|
||||
{"min_latitude": 57.97, "min_longitude": 4.50, "max_latitude": 71.19, "max_longitude": 31.17},
|
||||
# Finland
|
||||
{"min_latitude": 59.81, "min_longitude": 20.55, "max_latitude": 70.09, "max_longitude": 31.59},
|
||||
# Mexico
|
||||
{
|
||||
"min_latitude": 14.53,
|
||||
"min_longitude": -118.37,
|
||||
"max_latitude": 32.72,
|
||||
"max_longitude": -86.71,
|
||||
},
|
||||
# Argentina
|
||||
{
|
||||
"min_latitude": -55.06,
|
||||
"min_longitude": -73.56,
|
||||
"max_latitude": -21.78,
|
||||
"max_longitude": -53.63,
|
||||
},
|
||||
# Middle East (UAE, Qatar, Saudi Arabia, Bahrain)
|
||||
{"min_latitude": 21.00, "min_longitude": 38.00, "max_latitude": 32.00, "max_longitude": 56.50},
|
||||
# USA — southwest (California, Arizona, Texas west)
|
||||
{
|
||||
"min_latitude": 24.50,
|
||||
"min_longitude": -125.00,
|
||||
"max_latitude": 37.00,
|
||||
"max_longitude": -100.00,
|
||||
},
|
||||
# USA — southeast (Florida, Texas east, Georgia)
|
||||
{
|
||||
"min_latitude": 24.50,
|
||||
"min_longitude": -100.00,
|
||||
"max_latitude": 37.00,
|
||||
"max_longitude": -66.95,
|
||||
},
|
||||
# USA — northwest
|
||||
{
|
||||
"min_latitude": 37.00,
|
||||
"min_longitude": -125.00,
|
||||
"max_latitude": 49.38,
|
||||
"max_longitude": -100.00,
|
||||
},
|
||||
# USA — northeast (New York, Chicago, Boston)
|
||||
{
|
||||
"min_latitude": 37.00,
|
||||
"min_longitude": -100.00,
|
||||
"max_latitude": 49.38,
|
||||
"max_longitude": -66.95,
|
||||
},
|
||||
]
|
||||
MAX_PAGES = 500 # safety bound — ~50K venues max, well above current ~14K
|
||||
|
||||
|
||||
def extract(
|
||||
@@ -116,7 +41,7 @@ def extract(
|
||||
conn: sqlite3.Connection,
|
||||
session: niquests.Session,
|
||||
) -> dict:
|
||||
"""Fetch all Playtomic venues across target markets. Returns run metrics."""
|
||||
"""Fetch all Playtomic venues via global pagination. Returns run metrics."""
|
||||
year, month = year_month.split("/")
|
||||
dest_dir = landing_path(landing_dir, "playtomic", year, month)
|
||||
dest = dest_dir / "tenants.json.gz"
|
||||
@@ -124,27 +49,14 @@ def extract(
|
||||
all_tenants: list[dict] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
for bbox in BBOXES:
|
||||
stale_pages = 0
|
||||
for page in range(MAX_PAGES_PER_BBOX):
|
||||
for page in range(MAX_PAGES):
|
||||
params = {
|
||||
"sport_ids": "PADEL",
|
||||
"min_latitude": bbox["min_latitude"],
|
||||
"min_longitude": bbox["min_longitude"],
|
||||
"max_latitude": bbox["max_latitude"],
|
||||
"max_longitude": bbox["max_longitude"],
|
||||
"offset": page * PAGE_SIZE,
|
||||
"size": PAGE_SIZE,
|
||||
"page": page,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
"GET page=%d bbox=(%.1f,%.1f,%.1f,%.1f)",
|
||||
page,
|
||||
bbox["min_latitude"],
|
||||
bbox["min_longitude"],
|
||||
bbox["max_latitude"],
|
||||
bbox["max_longitude"],
|
||||
)
|
||||
logger.info("GET page=%d (total so far: %d)", page, len(all_tenants))
|
||||
|
||||
resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS)
|
||||
resp.raise_for_status()
|
||||
@@ -166,18 +78,10 @@ def extract(
|
||||
"page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants)
|
||||
)
|
||||
|
||||
# Last page — fewer than PAGE_SIZE results means we've exhausted the list
|
||||
if len(tenants) < PAGE_SIZE:
|
||||
break
|
||||
|
||||
# API recycles results past its internal limit — stop early
|
||||
if new_count == 0:
|
||||
stale_pages += 1
|
||||
if stale_pages >= MAX_STALE_PAGES:
|
||||
logger.info("stopping bbox after %d stale pages", stale_pages)
|
||||
break
|
||||
else:
|
||||
stale_pages = 0
|
||||
|
||||
time.sleep(THROTTLE_SECONDS)
|
||||
|
||||
payload = json.dumps({"tenants": all_tenants, "count": len(all_tenants)}).encode()
|
||||
|
||||
Reference in New Issue
Block a user