-- Location dimension: all known populated places globally (GeoNames cities1000). -- This is the opportunity-scoring root — NOT filtered to places with padel courts. -- Grain: (country_code, geoname_id) — stable GeoNames numeric ID per location. -- -- Unlike dim_cities (seeded from dim_venues / existing padel markets), dim_locations -- covers all locations with population ≥ 1K so zero-court Gemeinden score fully. -- -- Enriched with: -- stg_income → country-level median income PPS (fallback) -- stg_regional_income → NUTS-1 regional income PPS (preferred where mapped) -- stg_padel_courts → padel venue count + nearest court distance (km) -- stg_tennis_courts → tennis court count within 25km radius -- -- Income resolution: COALESCE(regional_income, country_income) AS median_income_pps. -- Germany: all 16 Bundesländer mapped via admin1_code → NUTS-1. -- All other countries fall back to country-level income (stg_income). -- -- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension). -- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the -- exact sphere distance is computed. MODEL ( name foundation.dim_locations, kind FULL, cron '@daily', grain (country_code, geoname_id) ); WITH -- Base: all GeoNames locations with valid coordinates locations AS ( SELECT geoname_id, city_name AS location_name, -- URL-safe location slug LOWER(REGEXP_REPLACE(LOWER(city_name), '[^a-z0-9]+', '-')) AS location_slug, country_code, lat, lon, admin1_code, admin2_code, population, ref_year FROM staging.stg_population_geonames WHERE lat IS NOT NULL AND lon IS NOT NULL ), -- Country income (same source and pattern as dim_cities) — fallback for unmapped regions country_income AS ( SELECT country_code, median_income_pps, ref_year AS income_year FROM staging.stg_income QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 ), -- Static mapping: GeoNames admin1_code → NUTS-1 code (Germany only; extend by adding rows) admin1_to_nuts1 (country_code, admin1_code, nuts1_code) AS ( VALUES ('DE', '01', 'DE1'), -- Baden-Württemberg ('DE', '02', 'DE2'), -- Bayern ('DE', '03', 'DE5'), -- Bremen ('DE', '04', 'DE6'), -- Hamburg ('DE', '05', 'DE7'), -- Hessen ('DE', '06', 'DE9'), -- Niedersachsen ('DE', '07', 'DEA'), -- Nordrhein-Westfalen ('DE', '08', 'DEB'), -- Rheinland-Pfalz ('DE', '09', 'DEC'), -- Saarland ('DE', '10', 'DEF'), -- Schleswig-Holstein ('DE', '11', 'DE4'), -- Brandenburg ('DE', '12', 'DE8'), -- Mecklenburg-Vorpommern ('DE', '13', 'DED'), -- Sachsen ('DE', '14', 'DEE'), -- Sachsen-Anhalt ('DE', '15', 'DEG'), -- Thüringen ('DE', '16', 'DE3') -- Berlin ), -- Latest NUTS-1 regional income per region regional_income AS ( SELECT m.country_code, m.admin1_code, r.regional_income_pps, r.ref_year AS regional_income_year FROM admin1_to_nuts1 m JOIN staging.stg_regional_income r ON r.nuts1_code = m.nuts1_code QUALIFY ROW_NUMBER() OVER ( PARTITION BY m.country_code, m.admin1_code ORDER BY r.ref_year DESC ) = 1 ), -- Padel court lat/lon for distance and density calculations padel_courts AS ( SELECT lat, lon, country_code FROM staging.stg_padel_courts WHERE lat IS NOT NULL AND lon IS NOT NULL ), -- Nearest padel court distance per location (bbox pre-filter → exact sphere distance) nearest_padel AS ( SELECT l.geoname_id, MIN( ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(p.lon, p.lat) ) / 1000.0 ) AS nearest_padel_court_km FROM locations l JOIN padel_courts p -- ~55km bounding box pre-filter to limit cross-join before sphere calc ON ABS(l.lat - p.lat) < 0.5 AND ABS(l.lon - p.lon) < 0.5 GROUP BY l.geoname_id ), -- Padel venues within 5km of each location (counts as "local padel supply") padel_local AS ( SELECT l.geoname_id, COUNT(*) AS padel_venue_count FROM locations l JOIN padel_courts p ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter AND ABS(l.lon - p.lon) < 0.05 WHERE ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(p.lon, p.lat) ) / 1000.0 <= 5.0 GROUP BY l.geoname_id ), -- Tennis courts within 25km of each location (sports culture proxy) tennis_nearby AS ( SELECT l.geoname_id, COUNT(*) AS tennis_courts_within_25km FROM locations l JOIN staging.stg_tennis_courts t ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter AND ABS(l.lon - t.lon) < 0.23 WHERE ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(t.lon, t.lat) ) / 1000.0 <= 25.0 GROUP BY l.geoname_id ) SELECT l.geoname_id, l.country_code, -- Human-readable country name (consistent with dim_cities) CASE l.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE l.country_code END AS country_name_en, -- URL-safe country slug LOWER(REGEXP_REPLACE( CASE l.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE l.country_code END, '[^a-zA-Z0-9]+', '-' )) AS country_slug, l.location_name, l.location_slug, l.lat, l.lon, l.admin1_code, l.admin2_code, l.population, l.ref_year AS population_year, COALESCE(ri.regional_income_pps, ci.median_income_pps) AS median_income_pps, COALESCE(ri.regional_income_year, ci.income_year) AS income_year, COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count, -- Venues per 100K residents (NULL if population = 0) CASE WHEN l.population > 0 THEN ROUND(COALESCE(pl.padel_venue_count, 0)::DOUBLE / l.population * 100000, 2) ELSE NULL END AS padel_venues_per_100k, np.nearest_padel_court_km, COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km, CURRENT_DATE AS refreshed_date FROM locations l LEFT JOIN country_income ci ON l.country_code = ci.country_code LEFT JOIN regional_income ri ON l.country_code = ri.country_code AND l.admin1_code = ri.admin1_code LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id LEFT JOIN padel_local pl ON l.geoname_id = pl.geoname_id LEFT JOIN tennis_nearby tn ON l.geoname_id = tn.geoname_id -- Enforce grain: deduplicate if city slug collides within same country QUALIFY ROW_NUMBER() OVER ( PARTITION BY l.country_code, l.geoname_id ORDER BY l.population DESC NULLS LAST ) = 1