-- City dimension: canonical city records with venue count and country metadata. -- Built from venue locations (dim_venues) as the primary source — padelnomics -- tracks cities where padel venues actually exist, not an administrative city list. -- -- Conformed dimension: used by city_market_profile and all pSEO serving models. -- Integrates four sources: -- dim_venues → city list, venue count, coordinates (Playtomic + OSM) -- stg_income → country-level median income (Eurostat) -- stg_city_labels → Eurostat city_code → city_name mapping (EU cities) -- stg_population → Eurostat city-level population (EU, joined via city code) -- stg_population_usa → US Census ACS place population -- stg_population_uk → ONS LAD population -- stg_population_geonames → GeoNames global fallback -- -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0. -- City name matching is case/whitespace-insensitive within each country. -- -- Grain: (country_code, city_slug) — two cities in different countries can share a -- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs. MODEL ( name foundation.dim_cities, kind FULL, cron '@daily', grain (country_code, city_slug) ); WITH -- Primary: distinct cities from dim_venues (canonical padel city list) venue_cities AS ( SELECT country_code, city AS city_name, -- Lowercase before regex so uppercase letters aren't stripped to '-' LOWER(REGEXP_REPLACE(LOWER(city), '[^a-z0-9]+', '-')) AS city_slug, COUNT(*) AS padel_venue_count, AVG(lat) AS centroid_lat, AVG(lon) AS centroid_lon FROM foundation.dim_venues WHERE city IS NOT NULL AND LENGTH(city) > 0 GROUP BY country_code, city ), -- Latest country income per country country_income AS ( SELECT country_code, median_income_pps, ref_year AS income_year FROM staging.stg_income QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 ), -- Eurostat EU population: join city labels (code→name) with population values. -- QUALIFY keeps only the most recent year per (country, city name). eurostat_pop AS ( SELECT cl.country_code, cl.city_name, p.population, p.ref_year FROM staging.stg_city_labels cl JOIN staging.stg_population p ON cl.city_code = p.city_code QUALIFY ROW_NUMBER() OVER ( PARTITION BY cl.country_code, cl.city_name ORDER BY p.ref_year DESC ) = 1 ), -- US Census ACS population (place-level, filtered to ≥50K) us_pop AS ( SELECT city_name, country_code, population, ref_year FROM staging.stg_population_usa QUALIFY ROW_NUMBER() OVER (PARTITION BY place_fips ORDER BY ref_year DESC) = 1 ), -- ONS UK Local Authority District population uk_pop AS ( SELECT lad_name AS city_name, country_code, population, ref_year FROM staging.stg_population_uk QUALIFY ROW_NUMBER() OVER (PARTITION BY lad_code ORDER BY ref_year DESC) = 1 ), -- GeoNames global fallback (all cities ≥50K) geonames_pop AS ( SELECT city_name, country_code, population, ref_year FROM staging.stg_population_geonames QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1 ) SELECT vc.country_code, vc.city_slug, vc.city_name, -- Human-readable country name for pSEO templates and internal linking CASE vc.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE vc.country_code END AS country_name_en, -- URL-safe country slug LOWER(REGEXP_REPLACE( CASE vc.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE vc.country_code END, '[^a-zA-Z0-9]+', '-' )) AS country_slug, vc.centroid_lat AS lat, vc.centroid_lon AS lon, -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0. -- City name match is case/whitespace-insensitive within each country. COALESCE( ep.population, usa.population, uk.population, gn.population, 0 )::BIGINT AS population, COALESCE( ep.ref_year, usa.ref_year, uk.ref_year, gn.ref_year, 0 )::INTEGER AS population_year, vc.padel_venue_count, ci.median_income_pps, ci.income_year FROM venue_cities vc LEFT JOIN country_income ci ON vc.country_code = ci.country_code -- Eurostat EU population (via city code→name lookup) LEFT JOIN eurostat_pop ep ON vc.country_code = ep.country_code AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(ep.city_name)) -- US Census population LEFT JOIN us_pop usa ON vc.country_code = usa.country_code AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(usa.city_name)) -- ONS UK population LEFT JOIN uk_pop uk ON vc.country_code = uk.country_code AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(uk.city_name)) -- GeoNames global fallback LEFT JOIN geonames_pop gn ON vc.country_code = gn.country_code AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(gn.city_name)) -- Enforce grain: if two cities in the same country have the same slug -- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues QUALIFY ROW_NUMBER() OVER ( PARTITION BY vc.country_code, vc.city_slug ORDER BY vc.padel_venue_count DESC NULLS LAST ) = 1