From 85b6aa0d0a452b40f2d370741b81b13dc8c0eeee Mon Sep 17 00:00:00 2001 From: Deeman Date: Sat, 28 Feb 2026 18:50:51 +0100 Subject: [PATCH] fix(seeds): update init_landing_seeds.py to write JSONL format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old script wrote blob json.gz seeds; staging models now only read jsonl.gz. Seeds are empty JSONL gzip files — zero rows, satisfies DuckDB file-not-found check. Co-Authored-By: Claude Sonnet 4.6 --- web/scripts/init_landing_seeds.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/web/scripts/init_landing_seeds.py b/web/scripts/init_landing_seeds.py index aeca9dd..e9c4ba7 100644 --- a/web/scripts/init_landing_seeds.py +++ b/web/scripts/init_landing_seeds.py @@ -1,22 +1,19 @@ -"""Create minimal seed files for SQLMesh staging models that require landing data.""" +"""Create minimal seed files for SQLMesh staging models that require landing data. + +Seeds are empty JSONL gzip files — they satisfy DuckDB's file-not-found check +while contributing zero rows to the staging models. +""" import gzip -import json from pathlib import Path -seed = { - "date": "1970-01-01", - "captured_at_utc": "1970-01-01T00:00:00Z", - "venue_count": 0, - "venues_errored": 0, - "venues": [], -} -morning = Path("data/landing/playtomic/1970/01/availability_1970-01-01.json.gz") -recheck = Path("data/landing/playtomic/1970/01/availability_1970-01-01_recheck_00.json.gz") +# stg_playtomic_availability requires at least one morning and one recheck file +morning = Path("data/landing/playtomic/1970/01/availability_1970-01-01.jsonl.gz") +recheck = Path("data/landing/playtomic/1970/01/availability_1970-01-01_recheck_00.jsonl.gz") morning.parent.mkdir(parents=True, exist_ok=True) for p in [morning, recheck]: if not p.exists(): - with gzip.open(p, "wt") as f: - json.dump(seed, f) + with gzip.open(p, "wb") as f: + pass # empty JSONL — 0 rows, no error print("created", p) else: print("exists ", p)