feat(sqlmesh): add country code macros, apply across models

Task 4/6: Add 5 macros to compress repeated country code patterns:
- @country_name / @country_slug: 20-country CASE in dim_cities, dim_locations
- @normalize_eurostat_country / @normalize_eurostat_nuts: EL→GR, UK→GB
- @infer_country_from_coords: bounding box for 8 markets
Net: +91 lines in macros, -135 lines in models = -44 lines total.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-02 07:45:52 +01:00
parent e87a7fc9d6
commit 6774254cb0
8 changed files with 100 additions and 135 deletions

View File

@@ -110,55 +110,9 @@ SELECT
vc.city_slug,
vc.city_name,
-- Human-readable country name for pSEO templates and internal linking
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END AS country_name_en,
@country_name(vc.country_code) AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
@country_slug(vc.country_code) AS country_slug,
vc.centroid_lat AS lat,
vc.centroid_lon AS lon,
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.

View File

@@ -215,55 +215,9 @@ SELECT
l.geoname_id,
l.country_code,
-- Human-readable country name (consistent with dim_cities)
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END AS country_name_en,
@country_name(l.country_code) AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
@country_slug(l.country_code) AS country_slug,
l.location_name,
l.location_slug,
l.lat,

View File

@@ -30,11 +30,7 @@ parsed AS (
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
@normalize_eurostat_country(geo_code) AS country_code,
ref_year,
median_income_pps,
extracted_date

View File

@@ -28,11 +28,7 @@ WITH raw AS (
SELECT
NUTS_ID AS nuts2_code,
-- Normalise country prefix to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE CNTR_CODE
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE CNTR_CODE
END AS country_code,
@normalize_eurostat_country(CNTR_CODE) AS country_code,
NAME_LATN AS region_name,
geom AS geometry,
-- Pre-compute bounding box for efficient spatial pre-filter in dim_locations.

View File

@@ -48,17 +48,8 @@ deduped AS (
with_country AS (
SELECT
osm_id, lat, lon,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
ELSE NULL
END) AS country_code,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code,
NULLIF(TRIM(name), '') AS name,
NULLIF(TRIM(city_tag), '') AS city,
postcode, operator_name, opening_hours, fee, extracted_date

View File

@@ -30,11 +30,7 @@ parsed AS (
)
SELECT
-- Normalise to ISO 3166-1 alpha-2 prefix: EL→GR, UK→GB
CASE
WHEN geo_code LIKE 'EL%' THEN 'GR' || SUBSTR(geo_code, 3)
WHEN geo_code LIKE 'UK%' THEN 'GB' || SUBSTR(geo_code, 3)
ELSE geo_code
END AS nuts_code,
@normalize_eurostat_nuts(geo_code) AS nuts_code,
-- NUTS level: 3-char = NUTS-1, 4-char = NUTS-2
LENGTH(geo_code) - 2 AS nuts_level,
ref_year,

View File

@@ -54,17 +54,8 @@ deduped AS (
with_country AS (
SELECT
osm_id, lat, lon,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
ELSE NULL
END) AS country_code,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''),
@infer_country_from_coords(lat, lon)) AS country_code,
NULLIF(TRIM(name), '') AS name,
NULLIF(TRIM(city_tag), '') AS city,
extracted_date