From ad48f23cfc80386d0452d40546f2df06b589b00c Mon Sep 17 00:00:00 2001 From: Deeman Date: Tue, 24 Feb 2026 20:42:11 +0100 Subject: [PATCH] fix: add precondition assertions in extract pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assert landing_dir.is_dir() and year_month format (YYYY/MM) at the entry point of each extract function — turning silent wrong-path bugs into immediate AssertionError with a descriptive message. Files changed: - playtomic_availability.py: assert in _load_tenant_ids(), extract(), extract_recheck() - eurostat.py: assert in extract() Co-Authored-By: Claude Opus 4.6 --- .../padelnomics_extract/src/padelnomics_extract/eurostat.py | 2 ++ .../src/padelnomics_extract/playtomic_availability.py | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py b/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py index 6e45cd8..c7dcfe0 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py @@ -180,6 +180,8 @@ def extract( session: niquests.Session, ) -> dict: """Fetch all Eurostat datasets. Returns run metrics.""" + assert landing_dir.is_dir(), f"landing_dir must exist: {landing_dir}" + assert "/" in year_month and len(year_month) == 7, f"year_month must be YYYY/MM: {year_month!r}" year, month = year_month.split("/") files_written = 0 files_skipped = 0 diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py index 4c73b80..cb1e41c 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py @@ -59,6 +59,7 @@ _thread_local = threading.local() def _load_tenant_ids(landing_dir: Path) -> list[str]: """Read tenant IDs from the most recent tenants.json.gz file.""" + assert landing_dir.is_dir(), f"landing_dir must exist: {landing_dir}" playtomic_dir = landing_dir / "playtomic" if not playtomic_dir.exists(): return [] @@ -243,6 +244,8 @@ def extract( session: niquests.Session, ) -> dict: """Fetch next-day availability for all known Playtomic venues.""" + assert landing_dir.is_dir(), f"landing_dir must exist: {landing_dir}" + assert "/" in year_month and len(year_month) == 7, f"year_month must be YYYY/MM: {year_month!r}" tenant_ids = _load_tenant_ids(landing_dir) if not tenant_ids: logger.warning("No tenant IDs found — run extract-playtomic-tenants first") @@ -385,6 +388,8 @@ def extract_recheck( session: niquests.Session, ) -> dict: """Re-query venues with slots starting soon for accurate occupancy data.""" + assert landing_dir.is_dir(), f"landing_dir must exist: {landing_dir}" + assert "/" in year_month and len(year_month) == 7, f"year_month must be YYYY/MM: {year_month!r}" now = datetime.now(UTC) target_date = now.strftime("%Y-%m-%d")