fix(supervisor): use version-sorted tag list for current_deployed_tag

git describe --exact-match returns the first tag alphabetically when multiple tags point to the same commit. This caused an infinite redeploy loop when Gitea CI created a sequential tag (v11) on the same commit as our date-based tag (v202602281745) — v11 < v202602281745 alphabetically but the deploy check uses version sort where v202602281745 > v11. Fix: use git tag --points-at HEAD --sort=-version:refname to pick the highest-version tag at HEAD, matching the sort order of latest_remote_tag(). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix(seeds): update init_landing_seeds.py to write JSONL format
2026-02-28 20:55:44 +01:00 · 2026-02-28 18:50:51 +01:00 · 2026-02-28 18:40:15 +01:00
3 changed files with 41 additions and 87 deletions
--- a/src/padelnomics/supervisor.py
+++ b/src/padelnomics/supervisor.py
@@ -279,12 +279,18 @@ def web_code_changed() -> bool:
 def current_deployed_tag() -> str | None:
-    """Return the tag currently checked out, or None if not on a tag."""
+    """Return the highest-version tag pointing at HEAD, or None.
    Uses the same sort order as latest_remote_tag() so that when multiple
    tags point to the same commit (e.g. a date-based tag and a CI integer
    tag), we always compare apples-to-apples.
    """
    result = subprocess.run(
-        ["git", "describe", "--tags", "--exact-match", "HEAD"],
+        ["git", "tag", "--list", "--sort=-version:refname", "--points-at", "HEAD", "v*"],
        capture_output=True, text=True, timeout=10,
    )
-    return result.stdout.strip() or None
+    tags = result.stdout.strip().splitlines()
    return tags[0] if tags else None
 def latest_remote_tag() -> str | None:
--- a/transform/sqlmesh_padelnomics/models/staging/stg_population_geonames.sql
+++ b/transform/sqlmesh_padelnomics/models/staging/stg_population_geonames.sql
@@ -3,11 +3,7 @@
 -- Broad coverage (140K+ locations) enables Gemeinde-level market intelligence.
 -- One row per geoname_id (GeoNames stable numeric identifier).
 --
-- Supports two landing formats (UNION ALL during migration):
+-- Source: data/landing/geonames/{year}/{month}/cities_global.jsonl.gz
 --   New: cities_global.jsonl.gz — one city per line, columns directly accessible
 --   Old: cities_global.json.gz  — {"rows": [...]} blob (UNNEST required)
 --
 -- Source: data/landing/geonames/{year}/{month}/cities_global.{jsonl,json}.gz
 MODEL (
  name staging.stg_population_geonames,
@@ -16,74 +12,29 @@ MODEL (
  grain geoname_id
 );
 WITH
 -- New format: one city per JSONL line
 jsonl_rows AS (
  SELECT
    TRY_CAST(geoname_id  AS INTEGER)  AS geoname_id,
    city_name,
    country_code,
    TRY_CAST(lat         AS DOUBLE)   AS lat,
    TRY_CAST(lon         AS DOUBLE)   AS lon,
    admin1_code,
    admin2_code,
    TRY_CAST(population  AS BIGINT)   AS population,
    TRY_CAST(ref_year    AS INTEGER)  AS ref_year,
    CURRENT_DATE                      AS extracted_date
  FROM read_json(
    @LANDING_DIR || '/geonames/*/*/cities_global.jsonl.gz',
    format = 'newline_delimited',
    columns = {
      geoname_id: 'INTEGER', city_name: 'VARCHAR', country_code: 'VARCHAR',
      lat: 'DOUBLE', lon: 'DOUBLE', admin1_code: 'VARCHAR', admin2_code: 'VARCHAR',
      population: 'BIGINT', ref_year: 'INTEGER'
    }
  )
  WHERE geoname_id IS NOT NULL
 ),
 -- Old format: {"rows": [...]} blob — kept for transition
 blob_rows AS (
  SELECT
    TRY_CAST(row ->> 'geoname_id'  AS INTEGER)  AS geoname_id,
    row ->> 'city_name'                          AS city_name,
    row ->> 'country_code'                       AS country_code,
    TRY_CAST(row ->> 'lat'         AS DOUBLE)    AS lat,
    TRY_CAST(row ->> 'lon'         AS DOUBLE)    AS lon,
    row ->> 'admin1_code'                        AS admin1_code,
    row ->> 'admin2_code'                        AS admin2_code,
    TRY_CAST(row ->> 'population'  AS BIGINT)    AS population,
    TRY_CAST(row ->> 'ref_year'    AS INTEGER)   AS ref_year,
    CURRENT_DATE                                 AS extracted_date
  FROM (
    SELECT UNNEST(rows) AS row
    FROM read_json(
      @LANDING_DIR || '/geonames/*/*/cities_global.json.gz',
      auto_detect = true,
      maximum_object_size = 40000000
    )
  )
  WHERE (row ->> 'geoname_id') IS NOT NULL
 ),
 all_rows AS (
  SELECT * FROM jsonl_rows
  UNION ALL
  SELECT * FROM blob_rows
 )
 SELECT
-  geoname_id,
+  TRY_CAST(geoname_id  AS INTEGER)  AS geoname_id,
-  TRIM(city_name)                AS city_name,
+  TRIM(city_name)                   AS city_name,
-  UPPER(country_code)            AS country_code,
+  UPPER(country_code)               AS country_code,
-  lat,
+  TRY_CAST(lat         AS DOUBLE)   AS lat,
-  lon,
+  TRY_CAST(lon         AS DOUBLE)   AS lon,
-  NULLIF(TRIM(admin1_code), '')  AS admin1_code,
+  NULLIF(TRIM(admin1_code), '')     AS admin1_code,
-  NULLIF(TRIM(admin2_code), '')  AS admin2_code,
+  NULLIF(TRIM(admin2_code), '')     AS admin2_code,
-  population,
+  TRY_CAST(population  AS BIGINT)   AS population,
-  ref_year,
+  TRY_CAST(ref_year    AS INTEGER)  AS ref_year,
-  extracted_date
+  CURRENT_DATE                      AS extracted_date
-FROM all_rows
+FROM read_json(
-WHERE population IS NOT NULL
+  @LANDING_DIR || '/geonames/*/*/cities_global.jsonl.gz',
  format = 'newline_delimited',
  columns = {
    geoname_id: 'INTEGER', city_name: 'VARCHAR', country_code: 'VARCHAR',
    lat: 'DOUBLE', lon: 'DOUBLE', admin1_code: 'VARCHAR', admin2_code: 'VARCHAR',
    population: 'BIGINT', ref_year: 'INTEGER'
  }
 )
 WHERE geoname_id IS NOT NULL
  AND population IS NOT NULL
  AND population > 0
  AND geoname_id IS NOT NULL
  AND city_name IS NOT NULL
  AND lat IS NOT NULL
  AND lon IS NOT NULL
--- a/web/scripts/init_landing_seeds.py
+++ b/web/scripts/init_landing_seeds.py
@@ -1,22 +1,19 @@
-"""Create minimal seed files for SQLMesh staging models that require landing data."""
+"""Create minimal seed files for SQLMesh staging models that require landing data.
 Seeds are empty JSONL gzip files — they satisfy DuckDB's file-not-found check
 while contributing zero rows to the staging models.
 """
 import gzip
 import json
 from pathlib import Path
-seed = {
+# stg_playtomic_availability requires at least one morning and one recheck file
-    "date": "1970-01-01",
+morning = Path("data/landing/playtomic/1970/01/availability_1970-01-01.jsonl.gz")
-    "captured_at_utc": "1970-01-01T00:00:00Z",
+recheck = Path("data/landing/playtomic/1970/01/availability_1970-01-01_recheck_00.jsonl.gz")
    "venue_count": 0,
    "venues_errored": 0,
    "venues": [],
 }
 morning = Path("data/landing/playtomic/1970/01/availability_1970-01-01.json.gz")
 recheck = Path("data/landing/playtomic/1970/01/availability_1970-01-01_recheck_00.json.gz")
 morning.parent.mkdir(parents=True, exist_ok=True)
 for p in [morning, recheck]:
    if not p.exists():
-        with gzip.open(p, "wt") as f:
+        with gzip.open(p, "wb") as f:
-            json.dump(seed, f)
+            pass  # empty JSONL — 0 rows, no error
        print("created", p)
    else:
        print("exists ", p)