diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py index 575a98f..9ca3694 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py @@ -80,8 +80,10 @@ def _load_tenant_ids(landing_dir: Path) -> list[str]: if not playtomic_dir.exists(): return [] - # Prefer JSONL (new format), fall back to blob (old format) - tenant_files = sorted(playtomic_dir.glob("*/*/tenants.jsonl.gz"), reverse=True) + # Prefer daily partition (YYYY/MM/DD), fall back to older monthly/weekly partitions + tenant_files = sorted(playtomic_dir.glob("*/*/*/tenants.jsonl.gz"), reverse=True) + if not tenant_files: + tenant_files = sorted(playtomic_dir.glob("*/*/tenants.jsonl.gz"), reverse=True) if not tenant_files: tenant_files = sorted(playtomic_dir.glob("*/*/tenants.json.gz"), reverse=True) if not tenant_files: diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py index c4c0d06..e09102b 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py @@ -81,12 +81,11 @@ def extract( partitions and picks the most recent one. """ today = datetime.now(UTC) - year = today.strftime("%G") # ISO year (matches ISO week, differs from calendar year on week boundaries) - week = today.strftime("W%V") # ISO week: W01 … W53 - dest_dir = landing_path(landing_dir, "playtomic", year, week) + year, month, day = today.strftime("%Y"), today.strftime("%m"), today.strftime("%d") + dest_dir = landing_path(landing_dir, "playtomic", year, month, day) dest = dest_dir / "tenants.jsonl.gz" if dest.exists(): - logger.info("Already have tenants for %s/%s — skipping", year, week) + logger.info("Already have tenants for %s/%s/%s — skipping", year, month, day) return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} tiers = load_proxy_tiers() @@ -162,7 +161,7 @@ def extract( "files_written": 1, "files_skipped": 0, "bytes_written": bytes_written, - "cursor_value": f"{year}/{week}", + "cursor_value": f"{year}/{month}/{day}", } diff --git a/infra/supervisor/workflows.toml b/infra/supervisor/workflows.toml index 5f5c43d..3dcf055 100644 --- a/infra/supervisor/workflows.toml +++ b/infra/supervisor/workflows.toml @@ -23,7 +23,7 @@ schedule = "monthly" [playtomic_tenants] module = "padelnomics_extract.playtomic_tenants" -schedule = "weekly" +schedule = "daily" [playtomic_availability] module = "padelnomics_extract.playtomic_availability"