-- City dimension: canonical city records with venue count and country metadata. -- Built from venue locations (dim_venues) as the primary source — padelnomics -- tracks cities where padel venues actually exist, not an administrative city list. -- -- Conformed dimension: used by city_market_profile and all pSEO serving models. -- Integrates two sources: -- dim_venues → city list, venue count, coordinates (Playtomic + OSM) -- stg_income → country-level median income (Eurostat) -- -- Population note: Eurostat uses coded identifiers (e.g. DE001C = Berlin) with no -- city name column in the dataset we extract. City-level population requires a -- separate code→name lookup extract (future improvement). Population is set to 0 -- until that source is available; market_score degrades gracefully. -- -- Grain: (country_code, city_slug) — two cities in different countries can share a -- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs. MODEL ( name foundation.dim_cities, kind FULL, cron '@daily', grain (country_code, city_slug) ); WITH -- Primary: distinct cities from dim_venues (canonical padel city list) venue_cities AS ( SELECT country_code, city AS city_name, -- Lowercase before regex so uppercase letters aren't stripped to '-' LOWER(REGEXP_REPLACE(LOWER(city), '[^a-z0-9]+', '-')) AS city_slug, COUNT(*) AS padel_venue_count, AVG(lat) AS centroid_lat, AVG(lon) AS centroid_lon FROM foundation.dim_venues WHERE city IS NOT NULL AND LENGTH(city) > 0 GROUP BY country_code, city ), -- Latest country income per country country_income AS ( SELECT country_code, median_income_pps, ref_year AS income_year FROM staging.stg_income QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 ) SELECT vc.country_code, vc.city_slug, vc.city_name, -- Human-readable country name for pSEO templates and internal linking CASE vc.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE vc.country_code END AS country_name_en, -- URL-safe country slug LOWER(REGEXP_REPLACE( CASE vc.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE vc.country_code END, '[^a-zA-Z0-9]+', '-' )) AS country_slug, vc.centroid_lat AS lat, vc.centroid_lon AS lon, -- Population: requires code→name Eurostat lookup (not yet extracted); defaults to 0. -- market_score uses LOG(GREATEST(population, 1)) so 0 degrades score gracefully. 0::BIGINT AS population, 0::INTEGER AS population_year, vc.padel_venue_count, ci.median_income_pps, ci.income_year FROM venue_cities vc LEFT JOIN country_income ci ON vc.country_code = ci.country_code -- Enforce grain: if two cities in the same country have the same slug -- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues QUALIFY ROW_NUMBER() OVER ( PARTITION BY vc.country_code, vc.city_slug ORDER BY vc.padel_venue_count DESC NULLS LAST ) = 1