- dim_cities: add geoname_id to geonames_pop CTE and final SELECT
Creates FK between dim_cities (city-with-padel-venues) and dim_locations (all GeoNames),
enabling joins to location_opportunity_profile for the first time.
- city_market_profile: pass geoname_id through base CTE and final SELECT
- pseo_city_costs_de: LEFT JOIN location_opportunity_profile on (country_code, geoname_id),
add opportunity_score to output columns
- pseo_country_overview: add avg_opportunity_score, top_opportunity_score, top_opportunity_slugs,
top_opportunity_names aggregates
Cities with no GeoNames name match get opportunity_score = NULL; templates guard with
{% if opportunity_score %}.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
184 lines
6.8 KiB
SQL
184 lines
6.8 KiB
SQL
-- City dimension: canonical city records with venue count and country metadata.
|
|
-- Built from venue locations (dim_venues) as the primary source — padelnomics
|
|
-- tracks cities where padel venues actually exist, not an administrative city list.
|
|
--
|
|
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
|
-- Integrates four sources:
|
|
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
|
-- stg_income → country-level median income (Eurostat)
|
|
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
|
|
-- stg_population → Eurostat city-level population (EU, joined via city code)
|
|
-- stg_population_usa → US Census ACS place population
|
|
-- stg_population_uk → ONS LAD population
|
|
-- stg_population_geonames → GeoNames global fallback
|
|
--
|
|
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0.
|
|
-- City name matching is case/whitespace-insensitive within each country.
|
|
--
|
|
-- Grain: (country_code, city_slug) — two cities in different countries can share a
|
|
-- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs.
|
|
|
|
MODEL (
|
|
name foundation.dim_cities,
|
|
kind FULL,
|
|
cron '@daily',
|
|
grain (country_code, city_slug)
|
|
);
|
|
|
|
WITH
|
|
-- Primary: distinct cities from dim_venues (canonical padel city list)
|
|
venue_cities AS (
|
|
SELECT
|
|
country_code,
|
|
city AS city_name,
|
|
-- Lowercase before regex so uppercase letters aren't stripped to '-'
|
|
LOWER(REGEXP_REPLACE(LOWER(city), '[^a-z0-9]+', '-')) AS city_slug,
|
|
COUNT(*) AS padel_venue_count,
|
|
AVG(lat) AS centroid_lat,
|
|
AVG(lon) AS centroid_lon
|
|
FROM foundation.dim_venues
|
|
WHERE city IS NOT NULL AND LENGTH(city) > 0
|
|
GROUP BY country_code, city
|
|
),
|
|
-- Latest country income per country
|
|
country_income AS (
|
|
SELECT country_code, median_income_pps, ref_year AS income_year
|
|
FROM staging.stg_income
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
|
),
|
|
-- Eurostat EU population: join city labels (code→name) with population values.
|
|
-- QUALIFY keeps only the most recent year per (country, city name).
|
|
eurostat_pop AS (
|
|
SELECT
|
|
cl.country_code,
|
|
cl.city_name,
|
|
p.population,
|
|
p.ref_year
|
|
FROM staging.stg_city_labels cl
|
|
JOIN staging.stg_population p ON cl.city_code = p.city_code
|
|
QUALIFY ROW_NUMBER() OVER (
|
|
PARTITION BY cl.country_code, cl.city_name
|
|
ORDER BY p.ref_year DESC
|
|
) = 1
|
|
),
|
|
-- US Census ACS population (place-level, filtered to ≥50K)
|
|
us_pop AS (
|
|
SELECT city_name, country_code, population, ref_year
|
|
FROM staging.stg_population_usa
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY place_fips ORDER BY ref_year DESC) = 1
|
|
),
|
|
-- ONS UK Local Authority District population
|
|
uk_pop AS (
|
|
SELECT lad_name AS city_name, country_code, population, ref_year
|
|
FROM staging.stg_population_uk
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY lad_code ORDER BY ref_year DESC) = 1
|
|
),
|
|
-- GeoNames global fallback (all cities ≥50K)
|
|
geonames_pop AS (
|
|
SELECT geoname_id, city_name, country_code, population, ref_year
|
|
FROM staging.stg_population_geonames
|
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1
|
|
)
|
|
SELECT
|
|
vc.country_code,
|
|
vc.city_slug,
|
|
vc.city_name,
|
|
-- Human-readable country name for pSEO templates and internal linking
|
|
CASE vc.country_code
|
|
WHEN 'DE' THEN 'Germany'
|
|
WHEN 'ES' THEN 'Spain'
|
|
WHEN 'GB' THEN 'United Kingdom'
|
|
WHEN 'FR' THEN 'France'
|
|
WHEN 'IT' THEN 'Italy'
|
|
WHEN 'PT' THEN 'Portugal'
|
|
WHEN 'AT' THEN 'Austria'
|
|
WHEN 'CH' THEN 'Switzerland'
|
|
WHEN 'NL' THEN 'Netherlands'
|
|
WHEN 'BE' THEN 'Belgium'
|
|
WHEN 'SE' THEN 'Sweden'
|
|
WHEN 'NO' THEN 'Norway'
|
|
WHEN 'DK' THEN 'Denmark'
|
|
WHEN 'FI' THEN 'Finland'
|
|
WHEN 'US' THEN 'United States'
|
|
WHEN 'AR' THEN 'Argentina'
|
|
WHEN 'MX' THEN 'Mexico'
|
|
WHEN 'AE' THEN 'UAE'
|
|
WHEN 'AU' THEN 'Australia'
|
|
WHEN 'IE' THEN 'Ireland'
|
|
ELSE vc.country_code
|
|
END AS country_name_en,
|
|
-- URL-safe country slug
|
|
LOWER(REGEXP_REPLACE(
|
|
CASE vc.country_code
|
|
WHEN 'DE' THEN 'Germany'
|
|
WHEN 'ES' THEN 'Spain'
|
|
WHEN 'GB' THEN 'United Kingdom'
|
|
WHEN 'FR' THEN 'France'
|
|
WHEN 'IT' THEN 'Italy'
|
|
WHEN 'PT' THEN 'Portugal'
|
|
WHEN 'AT' THEN 'Austria'
|
|
WHEN 'CH' THEN 'Switzerland'
|
|
WHEN 'NL' THEN 'Netherlands'
|
|
WHEN 'BE' THEN 'Belgium'
|
|
WHEN 'SE' THEN 'Sweden'
|
|
WHEN 'NO' THEN 'Norway'
|
|
WHEN 'DK' THEN 'Denmark'
|
|
WHEN 'FI' THEN 'Finland'
|
|
WHEN 'US' THEN 'United States'
|
|
WHEN 'AR' THEN 'Argentina'
|
|
WHEN 'MX' THEN 'Mexico'
|
|
WHEN 'AE' THEN 'UAE'
|
|
WHEN 'AU' THEN 'Australia'
|
|
WHEN 'IE' THEN 'Ireland'
|
|
ELSE vc.country_code
|
|
END, '[^a-zA-Z0-9]+', '-'
|
|
)) AS country_slug,
|
|
vc.centroid_lat AS lat,
|
|
vc.centroid_lon AS lon,
|
|
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0.
|
|
-- City name match is case/whitespace-insensitive within each country.
|
|
COALESCE(
|
|
ep.population,
|
|
usa.population,
|
|
uk.population,
|
|
gn.population,
|
|
0
|
|
)::BIGINT AS population,
|
|
COALESCE(
|
|
ep.ref_year,
|
|
usa.ref_year,
|
|
uk.ref_year,
|
|
gn.ref_year,
|
|
0
|
|
)::INTEGER AS population_year,
|
|
vc.padel_venue_count,
|
|
ci.median_income_pps,
|
|
ci.income_year,
|
|
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
|
|
-- NULL when city name doesn't match any GeoNames entry.
|
|
gn.geoname_id
|
|
FROM venue_cities vc
|
|
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
|
-- Eurostat EU population (via city code→name lookup)
|
|
LEFT JOIN eurostat_pop ep
|
|
ON vc.country_code = ep.country_code
|
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(ep.city_name))
|
|
-- US Census population
|
|
LEFT JOIN us_pop usa
|
|
ON vc.country_code = usa.country_code
|
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(usa.city_name))
|
|
-- ONS UK population
|
|
LEFT JOIN uk_pop uk
|
|
ON vc.country_code = uk.country_code
|
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(uk.city_name))
|
|
-- GeoNames global fallback
|
|
LEFT JOIN geonames_pop gn
|
|
ON vc.country_code = gn.country_code
|
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(gn.city_name))
|
|
-- Enforce grain: if two cities in the same country have the same slug
|
|
-- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues
|
|
QUALIFY ROW_NUMBER() OVER (
|
|
PARTITION BY vc.country_code, vc.city_slug
|
|
ORDER BY vc.padel_venue_count DESC NULLS LAST
|
|
) = 1
|