feat(sqlmesh): add country code macros, apply across models

Task 4/6: Add 5 macros to compress repeated country code patterns:
- @country_name / @country_slug: 20-country CASE in dim_cities, dim_locations
- @normalize_eurostat_country / @normalize_eurostat_nuts: EL→GR, UK→GB
- @infer_country_from_coords: bounding box for 8 markets
Net: +91 lines in macros, -135 lines in models = -44 lines total.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-02 07:45:52 +01:00
parent e87a7fc9d6
commit 6774254cb0
8 changed files with 100 additions and 135 deletions

View File

@@ -16,5 +16,92 @@ def padelnomics_glob(evaluator) -> str:
return f"'{landing_dir}/padelnomics/**/*.csv.gz'"
# Add one macro per landing zone subdirectory you create.
# Pattern: def {source}_glob(evaluator) → f"'{landing_dir}/{source}/**/*.csv.gz'"
# ── Country code helpers ─────────────────────────────────────────────────────
# Shared lookup used by dim_cities and dim_locations.
_COUNTRY_NAMES = {
"DE": "Germany", "ES": "Spain", "GB": "United Kingdom",
"FR": "France", "IT": "Italy", "PT": "Portugal",
"AT": "Austria", "CH": "Switzerland", "NL": "Netherlands",
"BE": "Belgium", "SE": "Sweden", "NO": "Norway",
"DK": "Denmark", "FI": "Finland", "US": "United States",
"AR": "Argentina", "MX": "Mexico", "AE": "UAE",
"AU": "Australia", "IE": "Ireland",
}
def _country_case(col: str) -> str:
"""Build a CASE expression mapping ISO 3166-1 alpha-2 → English name."""
whens = "\n ".join(
f"WHEN '{code}' THEN '{name}'" for code, name in _COUNTRY_NAMES.items()
)
return f"CASE {col}\n {whens}\n ELSE {col}\n END"
@macro()
def country_name(evaluator, code_col) -> str:
"""CASE expression: country code → English name.
Usage in SQL: @country_name(vc.country_code) AS country_name_en
"""
return _country_case(str(code_col))
@macro()
def country_slug(evaluator, code_col) -> str:
"""CASE expression: country code → URL-safe slug (lowercased, spaces → dashes).
Usage in SQL: @country_slug(vc.country_code) AS country_slug
"""
return f"LOWER(REGEXP_REPLACE({_country_case(str(code_col))}, '[^a-zA-Z0-9]+', '-'))"
@macro()
def normalize_eurostat_country(evaluator, code_col) -> str:
"""Normalize Eurostat country codes to ISO 3166-1 alpha-2: EL→GR, UK→GB.
Usage in SQL: @normalize_eurostat_country(geo_code) AS country_code
"""
col = str(code_col)
return f"CASE {col} WHEN 'EL' THEN 'GR' WHEN 'UK' THEN 'GB' ELSE {col} END"
@macro()
def normalize_eurostat_nuts(evaluator, code_col) -> str:
"""Normalize NUTS code prefix: EL→GR, UK→GB, preserving the suffix.
Usage in SQL: @normalize_eurostat_nuts(geo_code) AS nuts_code
"""
col = str(code_col)
return (
f"CASE"
f" WHEN {col} LIKE 'EL%' THEN 'GR' || SUBSTR({col}, 3)"
f" WHEN {col} LIKE 'UK%' THEN 'GB' || SUBSTR({col}, 3)"
f" ELSE {col}"
f" END"
)
@macro()
def infer_country_from_coords(evaluator, lat_col, lon_col) -> str:
"""Infer ISO country code from lat/lon using bounding boxes for 8 European markets.
Usage in SQL:
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code
"""
lat = str(lat_col)
lon = str(lon_col)
return (
f"CASE"
f" WHEN {lat} BETWEEN 47.27 AND 55.06 AND {lon} BETWEEN 5.87 AND 15.04 THEN 'DE'"
f" WHEN {lat} BETWEEN 35.95 AND 43.79 AND {lon} BETWEEN -9.39 AND 4.33 THEN 'ES'"
f" WHEN {lat} BETWEEN 49.90 AND 60.85 AND {lon} BETWEEN -8.62 AND 1.77 THEN 'GB'"
f" WHEN {lat} BETWEEN 41.36 AND 51.09 AND {lon} BETWEEN -5.14 AND 9.56 THEN 'FR'"
f" WHEN {lat} BETWEEN 45.46 AND 47.80 AND {lon} BETWEEN 5.96 AND 10.49 THEN 'CH'"
f" WHEN {lat} BETWEEN 46.37 AND 49.02 AND {lon} BETWEEN 9.53 AND 17.16 THEN 'AT'"
f" WHEN {lat} BETWEEN 36.35 AND 47.09 AND {lon} BETWEEN 6.62 AND 18.51 THEN 'IT'"
f" WHEN {lat} BETWEEN 37.00 AND 42.15 AND {lon} BETWEEN -9.50 AND -6.19 THEN 'PT'"
f" ELSE NULL"
f" END"
)