feat(sqlmesh): add country code macros, apply across models

Task 4/6: Add 5 macros to compress repeated country code patterns:
- @country_name / @country_slug: 20-country CASE in dim_cities, dim_locations
- @normalize_eurostat_country / @normalize_eurostat_nuts: EL→GR, UK→GB
- @infer_country_from_coords: bounding box for 8 markets
Net: +91 lines in macros, -135 lines in models = -44 lines total.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-02 07:45:52 +01:00
parent e87a7fc9d6
commit 6774254cb0
8 changed files with 100 additions and 135 deletions

View File

@@ -16,5 +16,92 @@ def padelnomics_glob(evaluator) -> str:
return f"'{landing_dir}/padelnomics/**/*.csv.gz'"
# Add one macro per landing zone subdirectory you create.
# Pattern: def {source}_glob(evaluator) → f"'{landing_dir}/{source}/**/*.csv.gz'"
# ── Country code helpers ─────────────────────────────────────────────────────
# Shared lookup used by dim_cities and dim_locations.
_COUNTRY_NAMES = {
"DE": "Germany", "ES": "Spain", "GB": "United Kingdom",
"FR": "France", "IT": "Italy", "PT": "Portugal",
"AT": "Austria", "CH": "Switzerland", "NL": "Netherlands",
"BE": "Belgium", "SE": "Sweden", "NO": "Norway",
"DK": "Denmark", "FI": "Finland", "US": "United States",
"AR": "Argentina", "MX": "Mexico", "AE": "UAE",
"AU": "Australia", "IE": "Ireland",
}
def _country_case(col: str) -> str:
"""Build a CASE expression mapping ISO 3166-1 alpha-2 → English name."""
whens = "\n ".join(
f"WHEN '{code}' THEN '{name}'" for code, name in _COUNTRY_NAMES.items()
)
return f"CASE {col}\n {whens}\n ELSE {col}\n END"
@macro()
def country_name(evaluator, code_col) -> str:
"""CASE expression: country code → English name.
Usage in SQL: @country_name(vc.country_code) AS country_name_en
"""
return _country_case(str(code_col))
@macro()
def country_slug(evaluator, code_col) -> str:
"""CASE expression: country code → URL-safe slug (lowercased, spaces → dashes).
Usage in SQL: @country_slug(vc.country_code) AS country_slug
"""
return f"LOWER(REGEXP_REPLACE({_country_case(str(code_col))}, '[^a-zA-Z0-9]+', '-'))"
@macro()
def normalize_eurostat_country(evaluator, code_col) -> str:
"""Normalize Eurostat country codes to ISO 3166-1 alpha-2: EL→GR, UK→GB.
Usage in SQL: @normalize_eurostat_country(geo_code) AS country_code
"""
col = str(code_col)
return f"CASE {col} WHEN 'EL' THEN 'GR' WHEN 'UK' THEN 'GB' ELSE {col} END"
@macro()
def normalize_eurostat_nuts(evaluator, code_col) -> str:
"""Normalize NUTS code prefix: EL→GR, UK→GB, preserving the suffix.
Usage in SQL: @normalize_eurostat_nuts(geo_code) AS nuts_code
"""
col = str(code_col)
return (
f"CASE"
f" WHEN {col} LIKE 'EL%' THEN 'GR' || SUBSTR({col}, 3)"
f" WHEN {col} LIKE 'UK%' THEN 'GB' || SUBSTR({col}, 3)"
f" ELSE {col}"
f" END"
)
@macro()
def infer_country_from_coords(evaluator, lat_col, lon_col) -> str:
"""Infer ISO country code from lat/lon using bounding boxes for 8 European markets.
Usage in SQL:
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code
"""
lat = str(lat_col)
lon = str(lon_col)
return (
f"CASE"
f" WHEN {lat} BETWEEN 47.27 AND 55.06 AND {lon} BETWEEN 5.87 AND 15.04 THEN 'DE'"
f" WHEN {lat} BETWEEN 35.95 AND 43.79 AND {lon} BETWEEN -9.39 AND 4.33 THEN 'ES'"
f" WHEN {lat} BETWEEN 49.90 AND 60.85 AND {lon} BETWEEN -8.62 AND 1.77 THEN 'GB'"
f" WHEN {lat} BETWEEN 41.36 AND 51.09 AND {lon} BETWEEN -5.14 AND 9.56 THEN 'FR'"
f" WHEN {lat} BETWEEN 45.46 AND 47.80 AND {lon} BETWEEN 5.96 AND 10.49 THEN 'CH'"
f" WHEN {lat} BETWEEN 46.37 AND 49.02 AND {lon} BETWEEN 9.53 AND 17.16 THEN 'AT'"
f" WHEN {lat} BETWEEN 36.35 AND 47.09 AND {lon} BETWEEN 6.62 AND 18.51 THEN 'IT'"
f" WHEN {lat} BETWEEN 37.00 AND 42.15 AND {lon} BETWEEN -9.50 AND -6.19 THEN 'PT'"
f" ELSE NULL"
f" END"
)

View File

@@ -110,55 +110,9 @@ SELECT
vc.city_slug,
vc.city_name,
-- Human-readable country name for pSEO templates and internal linking
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END AS country_name_en,
@country_name(vc.country_code) AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
@country_slug(vc.country_code) AS country_slug,
vc.centroid_lat AS lat,
vc.centroid_lon AS lon,
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.

View File

@@ -215,55 +215,9 @@ SELECT
l.geoname_id,
l.country_code,
-- Human-readable country name (consistent with dim_cities)
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END AS country_name_en,
@country_name(l.country_code) AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
@country_slug(l.country_code) AS country_slug,
l.location_name,
l.location_slug,
l.lat,

View File

@@ -30,11 +30,7 @@ parsed AS (
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
@normalize_eurostat_country(geo_code) AS country_code,
ref_year,
median_income_pps,
extracted_date

View File

@@ -28,11 +28,7 @@ WITH raw AS (
SELECT
NUTS_ID AS nuts2_code,
-- Normalise country prefix to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE CNTR_CODE
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE CNTR_CODE
END AS country_code,
@normalize_eurostat_country(CNTR_CODE) AS country_code,
NAME_LATN AS region_name,
geom AS geometry,
-- Pre-compute bounding box for efficient spatial pre-filter in dim_locations.

View File

@@ -48,17 +48,8 @@ deduped AS (
with_country AS (
SELECT
osm_id, lat, lon,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
ELSE NULL
END) AS country_code,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code,
NULLIF(TRIM(name), '') AS name,
NULLIF(TRIM(city_tag), '') AS city,
postcode, operator_name, opening_hours, fee, extracted_date

View File

@@ -30,11 +30,7 @@ parsed AS (
)
SELECT
-- Normalise to ISO 3166-1 alpha-2 prefix: EL→GR, UK→GB
CASE
WHEN geo_code LIKE 'EL%' THEN 'GR' || SUBSTR(geo_code, 3)
WHEN geo_code LIKE 'UK%' THEN 'GB' || SUBSTR(geo_code, 3)
ELSE geo_code
END AS nuts_code,
@normalize_eurostat_nuts(geo_code) AS nuts_code,
-- NUTS level: 3-char = NUTS-1, 4-char = NUTS-2
LENGTH(geo_code) - 2 AS nuts_level,
ref_year,

View File

@@ -54,17 +54,8 @@ deduped AS (
with_country AS (
SELECT
osm_id, lat, lon,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
ELSE NULL
END) AS country_code,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code,
NULLIF(TRIM(name), '') AS name,
NULLIF(TRIM(city_tag), '') AS city,
extracted_date