fix: replace broken bbox pagination with global page-based extraction
Playtomic API ignores bbox params (min_latitude, etc.) and offset param. Discovered that `page` param works correctly for global enumeration. Result: 14,202 venues across 82 countries (was 100 with bbox approach). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,14 @@
|
|||||||
"""Playtomic tenants extractor — venue listings via unauthenticated API.
|
"""Playtomic tenants extractor — venue listings via unauthenticated API.
|
||||||
|
|
||||||
Iterates over target-market bounding boxes with pagination, deduplicates
|
Paginates through the global tenant list (sorted by UUID) using the `page`
|
||||||
on tenant_id, and writes a single consolidated JSON to the landing zone.
|
parameter. Deduplicates on tenant_id and writes a single consolidated JSON
|
||||||
|
to the landing zone.
|
||||||
|
|
||||||
|
API notes (discovered 2026-02):
|
||||||
|
- bbox params (min_latitude etc.) are silently ignored by the API
|
||||||
|
- `offset` param is ignored; `page` param works correctly
|
||||||
|
- `size=100` is the maximum effective page size
|
||||||
|
- ~14K venues globally as of Feb 2026
|
||||||
|
|
||||||
Rate: 1 req / 2 s (see docs/data-sources-inventory.md §1.2).
|
Rate: 1 req / 2 s (see docs/data-sources-inventory.md §1.2).
|
||||||
|
|
||||||
@@ -25,89 +32,7 @@ PLAYTOMIC_TENANTS_URL = "https://api.playtomic.io/v1/tenants"
|
|||||||
|
|
||||||
THROTTLE_SECONDS = 2
|
THROTTLE_SECONDS = 2
|
||||||
PAGE_SIZE = 100
|
PAGE_SIZE = 100
|
||||||
MAX_PAGES_PER_BBOX = 500 # safety bound — prevents infinite pagination
|
MAX_PAGES = 500 # safety bound — ~50K venues max, well above current ~14K
|
||||||
MAX_STALE_PAGES = 3 # stop after N consecutive pages with zero new results
|
|
||||||
|
|
||||||
# Global padel markets — bounding boxes sized to stay under API's internal result cap.
|
|
||||||
# Large countries (Spain, Italy, USA) are split into sub-regions.
|
|
||||||
BBOXES = [
|
|
||||||
# Spain — south (Andalusia, Murcia, Valencia)
|
|
||||||
{"min_latitude": 35.95, "min_longitude": -9.39, "max_latitude": 39.87, "max_longitude": 4.33},
|
|
||||||
# Spain — north (Madrid, Catalonia, Basque Country)
|
|
||||||
{"min_latitude": 39.87, "min_longitude": -9.39, "max_latitude": 43.79, "max_longitude": 4.33},
|
|
||||||
# UK & Ireland
|
|
||||||
{"min_latitude": 49.90, "min_longitude": -8.62, "max_latitude": 60.85, "max_longitude": 1.77},
|
|
||||||
# Germany
|
|
||||||
{"min_latitude": 47.27, "min_longitude": 5.87, "max_latitude": 55.06, "max_longitude": 15.04},
|
|
||||||
# France
|
|
||||||
{"min_latitude": 41.36, "min_longitude": -5.14, "max_latitude": 51.09, "max_longitude": 9.56},
|
|
||||||
# Italy — south (Rome, Naples, Sicily, Sardinia)
|
|
||||||
{"min_latitude": 36.35, "min_longitude": 6.62, "max_latitude": 42.00, "max_longitude": 18.51},
|
|
||||||
# Italy — north (Milan, Turin, Venice, Bologna)
|
|
||||||
{"min_latitude": 42.00, "min_longitude": 6.62, "max_latitude": 47.09, "max_longitude": 18.51},
|
|
||||||
# Portugal
|
|
||||||
{"min_latitude": 37.00, "min_longitude": -9.50, "max_latitude": 42.15, "max_longitude": -6.19},
|
|
||||||
# Netherlands
|
|
||||||
{"min_latitude": 50.75, "min_longitude": 3.37, "max_latitude": 53.47, "max_longitude": 7.21},
|
|
||||||
# Belgium
|
|
||||||
{"min_latitude": 49.50, "min_longitude": 2.55, "max_latitude": 51.50, "max_longitude": 6.40},
|
|
||||||
# Austria
|
|
||||||
{"min_latitude": 46.37, "min_longitude": 9.53, "max_latitude": 49.02, "max_longitude": 17.16},
|
|
||||||
# Switzerland
|
|
||||||
{"min_latitude": 45.82, "min_longitude": 5.96, "max_latitude": 47.80, "max_longitude": 10.49},
|
|
||||||
# Sweden
|
|
||||||
{"min_latitude": 55.34, "min_longitude": 11.11, "max_latitude": 69.06, "max_longitude": 24.16},
|
|
||||||
# Denmark
|
|
||||||
{"min_latitude": 54.56, "min_longitude": 8.09, "max_latitude": 57.75, "max_longitude": 12.69},
|
|
||||||
# Norway
|
|
||||||
{"min_latitude": 57.97, "min_longitude": 4.50, "max_latitude": 71.19, "max_longitude": 31.17},
|
|
||||||
# Finland
|
|
||||||
{"min_latitude": 59.81, "min_longitude": 20.55, "max_latitude": 70.09, "max_longitude": 31.59},
|
|
||||||
# Mexico
|
|
||||||
{
|
|
||||||
"min_latitude": 14.53,
|
|
||||||
"min_longitude": -118.37,
|
|
||||||
"max_latitude": 32.72,
|
|
||||||
"max_longitude": -86.71,
|
|
||||||
},
|
|
||||||
# Argentina
|
|
||||||
{
|
|
||||||
"min_latitude": -55.06,
|
|
||||||
"min_longitude": -73.56,
|
|
||||||
"max_latitude": -21.78,
|
|
||||||
"max_longitude": -53.63,
|
|
||||||
},
|
|
||||||
# Middle East (UAE, Qatar, Saudi Arabia, Bahrain)
|
|
||||||
{"min_latitude": 21.00, "min_longitude": 38.00, "max_latitude": 32.00, "max_longitude": 56.50},
|
|
||||||
# USA — southwest (California, Arizona, Texas west)
|
|
||||||
{
|
|
||||||
"min_latitude": 24.50,
|
|
||||||
"min_longitude": -125.00,
|
|
||||||
"max_latitude": 37.00,
|
|
||||||
"max_longitude": -100.00,
|
|
||||||
},
|
|
||||||
# USA — southeast (Florida, Texas east, Georgia)
|
|
||||||
{
|
|
||||||
"min_latitude": 24.50,
|
|
||||||
"min_longitude": -100.00,
|
|
||||||
"max_latitude": 37.00,
|
|
||||||
"max_longitude": -66.95,
|
|
||||||
},
|
|
||||||
# USA — northwest
|
|
||||||
{
|
|
||||||
"min_latitude": 37.00,
|
|
||||||
"min_longitude": -125.00,
|
|
||||||
"max_latitude": 49.38,
|
|
||||||
"max_longitude": -100.00,
|
|
||||||
},
|
|
||||||
# USA — northeast (New York, Chicago, Boston)
|
|
||||||
{
|
|
||||||
"min_latitude": 37.00,
|
|
||||||
"min_longitude": -100.00,
|
|
||||||
"max_latitude": 49.38,
|
|
||||||
"max_longitude": -66.95,
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def extract(
|
def extract(
|
||||||
@@ -116,7 +41,7 @@ def extract(
|
|||||||
conn: sqlite3.Connection,
|
conn: sqlite3.Connection,
|
||||||
session: niquests.Session,
|
session: niquests.Session,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Fetch all Playtomic venues across target markets. Returns run metrics."""
|
"""Fetch all Playtomic venues via global pagination. Returns run metrics."""
|
||||||
year, month = year_month.split("/")
|
year, month = year_month.split("/")
|
||||||
dest_dir = landing_path(landing_dir, "playtomic", year, month)
|
dest_dir = landing_path(landing_dir, "playtomic", year, month)
|
||||||
dest = dest_dir / "tenants.json.gz"
|
dest = dest_dir / "tenants.json.gz"
|
||||||
@@ -124,61 +49,40 @@ def extract(
|
|||||||
all_tenants: list[dict] = []
|
all_tenants: list[dict] = []
|
||||||
seen_ids: set[str] = set()
|
seen_ids: set[str] = set()
|
||||||
|
|
||||||
for bbox in BBOXES:
|
for page in range(MAX_PAGES):
|
||||||
stale_pages = 0
|
params = {
|
||||||
for page in range(MAX_PAGES_PER_BBOX):
|
"sport_ids": "PADEL",
|
||||||
params = {
|
"size": PAGE_SIZE,
|
||||||
"sport_ids": "PADEL",
|
"page": page,
|
||||||
"min_latitude": bbox["min_latitude"],
|
}
|
||||||
"min_longitude": bbox["min_longitude"],
|
|
||||||
"max_latitude": bbox["max_latitude"],
|
|
||||||
"max_longitude": bbox["max_longitude"],
|
|
||||||
"offset": page * PAGE_SIZE,
|
|
||||||
"size": PAGE_SIZE,
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(
|
logger.info("GET page=%d (total so far: %d)", page, len(all_tenants))
|
||||||
"GET page=%d bbox=(%.1f,%.1f,%.1f,%.1f)",
|
|
||||||
page,
|
|
||||||
bbox["min_latitude"],
|
|
||||||
bbox["min_longitude"],
|
|
||||||
bbox["max_latitude"],
|
|
||||||
bbox["max_longitude"],
|
|
||||||
)
|
|
||||||
|
|
||||||
resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS)
|
resp = session.get(PLAYTOMIC_TENANTS_URL, params=params, timeout=HTTP_TIMEOUT_SECONDS)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
tenants = resp.json()
|
tenants = resp.json()
|
||||||
assert isinstance(tenants, list), (
|
assert isinstance(tenants, list), (
|
||||||
f"Expected list from Playtomic API, got {type(tenants)}"
|
f"Expected list from Playtomic API, got {type(tenants)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
new_count = 0
|
new_count = 0
|
||||||
for tenant in tenants:
|
for tenant in tenants:
|
||||||
tid = tenant.get("tenant_id") or tenant.get("id")
|
tid = tenant.get("tenant_id") or tenant.get("id")
|
||||||
if tid and tid not in seen_ids:
|
if tid and tid not in seen_ids:
|
||||||
seen_ids.add(tid)
|
seen_ids.add(tid)
|
||||||
all_tenants.append(tenant)
|
all_tenants.append(tenant)
|
||||||
new_count += 1
|
new_count += 1
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants)
|
"page=%d got=%d new=%d total=%d", page, len(tenants), new_count, len(all_tenants)
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(tenants) < PAGE_SIZE:
|
# Last page — fewer than PAGE_SIZE results means we've exhausted the list
|
||||||
break
|
if len(tenants) < PAGE_SIZE:
|
||||||
|
break
|
||||||
|
|
||||||
# API recycles results past its internal limit — stop early
|
time.sleep(THROTTLE_SECONDS)
|
||||||
if new_count == 0:
|
|
||||||
stale_pages += 1
|
|
||||||
if stale_pages >= MAX_STALE_PAGES:
|
|
||||||
logger.info("stopping bbox after %d stale pages", stale_pages)
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
stale_pages = 0
|
|
||||||
|
|
||||||
time.sleep(THROTTLE_SECONDS)
|
|
||||||
|
|
||||||
payload = json.dumps({"tenants": all_tenants, "count": len(all_tenants)}).encode()
|
payload = json.dumps({"tenants": all_tenants, "count": len(all_tenants)}).encode()
|
||||||
bytes_written = write_gzip_atomic(dest, payload)
|
bytes_written = write_gzip_atomic(dest, payload)
|
||||||
|
|||||||
Reference in New Issue
Block a user