feat(extract): weekly tenant snapshots via ISO week partition
Tenants extractor now partitions by ISO week (e.g. 2026/W09) instead of month (2026/02), so each weekly run writes a fresh file rather than skipping for the rest of the month. _load_tenant_ids() in playtomic_availability already globs */*/tenants.jsonl.gz and sorts reverse — 'W09' > '02' alphabetically so weekly files take priority over old monthly ones automatically. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -25,6 +25,7 @@ import json
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from datetime import UTC, datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import niquests
|
import niquests
|
||||||
@@ -69,17 +70,23 @@ def _fetch_pages_parallel(pages: list[int], next_proxy) -> list[tuple[int, list[
|
|||||||
|
|
||||||
def extract(
|
def extract(
|
||||||
landing_dir: Path,
|
landing_dir: Path,
|
||||||
year_month: str,
|
year_month: str, # noqa: ARG001 — unused; tenants uses ISO week partition instead
|
||||||
conn: sqlite3.Connection,
|
conn: sqlite3.Connection,
|
||||||
session: niquests.Session,
|
session: niquests.Session,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Fetch all Playtomic venues via global pagination. Returns run metrics."""
|
"""Fetch all Playtomic venues via global pagination. Returns run metrics.
|
||||||
year, month = year_month.split("/")
|
|
||||||
dest_dir = landing_path(landing_dir, "playtomic", year, month)
|
Partitioned by ISO week (e.g. 2026/W09) so each weekly run produces a
|
||||||
|
fresh file. _load_tenant_ids() in playtomic_availability globs across all
|
||||||
|
partitions and picks the most recent one.
|
||||||
|
"""
|
||||||
|
today = datetime.now(UTC)
|
||||||
|
year = today.strftime("%G") # ISO year (matches ISO week, differs from calendar year on week boundaries)
|
||||||
|
week = today.strftime("W%V") # ISO week: W01 … W53
|
||||||
|
dest_dir = landing_path(landing_dir, "playtomic", year, week)
|
||||||
dest = dest_dir / "tenants.jsonl.gz"
|
dest = dest_dir / "tenants.jsonl.gz"
|
||||||
old_blob = dest_dir / "tenants.json.gz"
|
if dest.exists():
|
||||||
if dest.exists() or old_blob.exists():
|
logger.info("Already have tenants for %s/%s — skipping", year, week)
|
||||||
logger.info("Already have tenants for %s/%s — skipping", year, month)
|
|
||||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
tiers = load_proxy_tiers()
|
tiers = load_proxy_tiers()
|
||||||
@@ -155,7 +162,7 @@ def extract(
|
|||||||
"files_written": 1,
|
"files_written": 1,
|
||||||
"files_skipped": 0,
|
"files_skipped": 0,
|
||||||
"bytes_written": bytes_written,
|
"bytes_written": bytes_written,
|
||||||
"cursor_value": year_month,
|
"cursor_value": f"{year}/{week}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user