-- City dimension: canonical city records with population and venue count. -- Built from Eurostat Urban Audit codes joined to venue locations. -- Cities without Eurostat coverage (US, non-EU) are derived from venue clusters. MODEL ( name foundation.dim_cities, kind FULL, cron '@daily', grain city_code ); WITH -- Eurostat cities: latest population per city code eurostat_cities AS ( SELECT city_code, country_code, population, ref_year FROM staging.stg_population QUALIFY ROW_NUMBER() OVER (PARTITION BY city_code ORDER BY ref_year DESC) = 1 ), -- Venue counts per (country_code, city) from dim_venues venue_counts AS ( SELECT country_code, city, COUNT(*) AS venue_count, AVG(lat) AS centroid_lat, AVG(lon) AS centroid_lon FROM foundation.dim_venues WHERE city IS NOT NULL AND city != '' GROUP BY country_code, city ), -- Eurostat city label mapping to canonical city names -- (Eurostat uses codes like DE001C → Berlin; we keep both) eurostat_labels AS ( SELECT DISTINCT city_code, country_code, -- Derive a slug-friendly city name from the code as fallback LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw FROM eurostat_cities ) SELECT ec.city_code, ec.country_code, COALESCE(vc.city, ec.city_code) AS city_name, LOWER(REGEXP_REPLACE( COALESCE(vc.city, ec.city_slug_raw), '[^a-z0-9]+', '-' )) AS city_slug, COALESCE(vc.centroid_lat, 0::DOUBLE) AS lat, COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon, ec.population, ec.ref_year AS population_year, COALESCE(vc.venue_count, 0) AS padel_venue_count FROM eurostat_cities ec LEFT JOIN venue_counts vc ON ec.country_code = vc.country_code AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'