feat(extract): weekly tenant snapshots via ISO week partition

Tenants extractor now partitions by ISO week (e.g. 2026/W09) instead of month (2026/02), so each weekly run writes a fresh file rather than skipping for the rest of the month. _load_tenant_ids() in playtomic_availability already globs */*/tenants.jsonl.gz and sorts reverse — 'W09' > '02' alphabetically so weekly files take priority over old monthly ones automatically. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-28 17:19:19 +01:00
parent adf22924f6
commit 9116625884
1 changed files with 15 additions and 8 deletions
--- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py
+++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py
@@ -25,6 +25,7 @@ import json
 import sqlite3
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import UTC, datetime
 from pathlib import Path

 import niquests
@@ -69,17 +70,23 @@ def _fetch_pages_parallel(pages: list[int], next_proxy) -> list[tuple[int, list[

 def extract(
    landing_dir: Path,
-    year_month: str,
+    year_month: str,  # noqa: ARG001 — unused; tenants uses ISO week partition instead
    conn: sqlite3.Connection,
    session: niquests.Session,
 ) -> dict:
-    """Fetch all Playtomic venues via global pagination. Returns run metrics."""
-    year, month = year_month.split("/")
-    dest_dir = landing_path(landing_dir, "playtomic", year, month)
+    """Fetch all Playtomic venues via global pagination. Returns run metrics.
+
+    Partitioned by ISO week (e.g. 2026/W09) so each weekly run produces a
+    fresh file. _load_tenant_ids() in playtomic_availability globs across all
+    partitions and picks the most recent one.
+    """
+    today = datetime.now(UTC)
+    year = today.strftime("%G")   # ISO year (matches ISO week, differs from calendar year on week boundaries)
+    week = today.strftime("W%V")  # ISO week: W01 … W53
+    dest_dir = landing_path(landing_dir, "playtomic", year, week)
    dest = dest_dir / "tenants.jsonl.gz"
-    old_blob = dest_dir / "tenants.json.gz"
-    if dest.exists() or old_blob.exists():
-        logger.info("Already have tenants for %s/%s — skipping", year, month)
+    if dest.exists():
+        logger.info("Already have tenants for %s/%s — skipping", year, week)
        return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}

    tiers = load_proxy_tiers()
@@ -155,7 +162,7 @@ def extract(
        "files_written": 1,
        "files_skipped": 0,
        "bytes_written": bytes_written,
-        "cursor_value": year_month,
+        "cursor_value": f"{year}/{week}",
    }