Files
padelnomics/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql
Deeman 13c86ebf84 Merge branch 'worktree-extraction-overhaul'
# Conflicts:
#	transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql
#	transform/sqlmesh_padelnomics/models/staging/stg_playtomic_venues.sql
2026-02-23 01:01:26 +01:00

71 lines
2.3 KiB
SQL

-- City dimension: canonical city records with population and venue count.
-- Built from Eurostat Urban Audit codes joined to venue locations.
-- Cities without Eurostat coverage (US, non-EU) are derived from venue clusters.
MODEL (
name foundation.dim_cities,
kind FULL,
cron '@daily',
grain city_code
);
WITH -- Eurostat cities: latest population per city code
eurostat_cities AS (
SELECT
city_code,
country_code,
population,
ref_year,
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
FROM staging.stg_population
QUALIFY ROW_NUMBER() OVER (PARTITION BY city_code ORDER BY ref_year DESC) = 1
),
-- Venue counts per (country_code, city) from dim_venues
venue_counts AS (
SELECT
country_code,
city,
COUNT(*) AS venue_count,
AVG(lat) AS centroid_lat,
AVG(lon) AS centroid_lon
FROM foundation.dim_venues
WHERE city IS NOT NULL AND city != ''
GROUP BY country_code, city
),
-- Eurostat city label mapping to canonical city names
-- (Eurostat uses codes like DE001C → Berlin; we keep both)
eurostat_labels AS (
SELECT DISTINCT
city_code,
country_code,
-- Derive a slug-friendly city name from the code as fallback
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
FROM eurostat_cities
),
-- Country-level median income (latest year per country)
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
)
SELECT
ec.city_code,
ec.country_code,
COALESCE(vc.city, ec.city_code) AS city_name,
LOWER(REGEXP_REPLACE(
COALESCE(vc.city, ec.city_slug_raw), '[^a-z0-9]+', '-'
)) AS city_slug,
COALESCE(vc.centroid_lat, 0::DOUBLE) AS lat,
COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon,
ec.population,
ec.ref_year AS population_year,
COALESCE(vc.venue_count, 0) AS padel_venue_count,
ci.median_income_pps,
ci.income_year
FROM eurostat_cities ec
LEFT JOIN venue_counts vc
ON ec.country_code = vc.country_code
AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'
LEFT JOIN country_income ci
ON ec.country_code = ci.country_code