-- Location dimension: all known populated places globally (GeoNames cities1000). -- This is the opportunity-scoring root — NOT filtered to places with padel courts. -- Grain: (country_code, geoname_id) — stable GeoNames numeric ID per location. -- -- Unlike dim_cities (seeded from dim_venues / existing padel markets), dim_locations -- covers all locations with population ≥ 1K so zero-court Gemeinden score fully. -- -- Enriched with: -- stg_income → country-level median income PPS -- stg_padel_courts → padel venue count + nearest court distance (km) -- stg_tennis_courts → tennis court count within 25km radius -- -- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension). -- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the -- exact sphere distance is computed. MODEL ( name foundation.dim_locations, kind FULL, cron '@daily', grain (country_code, geoname_id) ); WITH -- Base: all GeoNames locations with valid coordinates locations AS ( SELECT geoname_id, city_name AS location_name, -- URL-safe location slug LOWER(REGEXP_REPLACE(LOWER(city_name), '[^a-z0-9]+', '-')) AS location_slug, country_code, lat, lon, admin1_code, admin2_code, population, population_year AS population_year, ref_year FROM staging.stg_population_geonames WHERE lat IS NOT NULL AND lon IS NOT NULL ), -- Country income (same source and pattern as dim_cities) country_income AS ( SELECT country_code, median_income_pps, ref_year AS income_year FROM staging.stg_income QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 ), -- Padel court lat/lon for distance and density calculations padel_courts AS ( SELECT lat, lon, country_code FROM staging.stg_padel_courts WHERE lat IS NOT NULL AND lon IS NOT NULL ), -- Nearest padel court distance per location (bbox pre-filter → exact sphere distance) nearest_padel AS ( SELECT l.geoname_id, MIN( ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(p.lon, p.lat) ) / 1000.0 ) AS nearest_padel_court_km FROM locations l JOIN padel_courts p -- ~55km bounding box pre-filter to limit cross-join before sphere calc ON ABS(l.lat - p.lat) < 0.5 AND ABS(l.lon - p.lon) < 0.5 GROUP BY l.geoname_id ), -- Padel venues within 5km of each location (counts as "local padel supply") padel_local AS ( SELECT l.geoname_id, COUNT(*) AS padel_venue_count FROM locations l JOIN padel_courts p ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter AND ABS(l.lon - p.lon) < 0.05 WHERE ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(p.lon, p.lat) ) / 1000.0 <= 5.0 GROUP BY l.geoname_id ), -- Tennis courts within 25km of each location (sports culture proxy) tennis_nearby AS ( SELECT l.geoname_id, COUNT(*) AS tennis_courts_within_25km FROM locations l JOIN staging.stg_tennis_courts t ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter AND ABS(l.lon - t.lon) < 0.23 WHERE ST_Distance_Sphere( ST_Point(l.lon, l.lat), ST_Point(t.lon, t.lat) ) / 1000.0 <= 25.0 GROUP BY l.geoname_id ) SELECT l.geoname_id, l.country_code, -- Human-readable country name (consistent with dim_cities) CASE l.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE l.country_code END AS country_name_en, -- URL-safe country slug LOWER(REGEXP_REPLACE( CASE l.country_code WHEN 'DE' THEN 'Germany' WHEN 'ES' THEN 'Spain' WHEN 'GB' THEN 'United Kingdom' WHEN 'FR' THEN 'France' WHEN 'IT' THEN 'Italy' WHEN 'PT' THEN 'Portugal' WHEN 'AT' THEN 'Austria' WHEN 'CH' THEN 'Switzerland' WHEN 'NL' THEN 'Netherlands' WHEN 'BE' THEN 'Belgium' WHEN 'SE' THEN 'Sweden' WHEN 'NO' THEN 'Norway' WHEN 'DK' THEN 'Denmark' WHEN 'FI' THEN 'Finland' WHEN 'US' THEN 'United States' WHEN 'AR' THEN 'Argentina' WHEN 'MX' THEN 'Mexico' WHEN 'AE' THEN 'UAE' WHEN 'AU' THEN 'Australia' WHEN 'IE' THEN 'Ireland' ELSE l.country_code END, '[^a-zA-Z0-9]+', '-' )) AS country_slug, l.location_name, l.location_slug, l.lat, l.lon, l.admin1_code, l.admin2_code, l.population, l.ref_year AS population_year, ci.median_income_pps, ci.income_year, COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count, -- Venues per 100K residents (NULL if population = 0) CASE WHEN l.population > 0 THEN ROUND(COALESCE(pl.padel_venue_count, 0)::DOUBLE / l.population * 100000, 2) ELSE NULL END AS padel_venues_per_100k, np.nearest_padel_court_km, COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km, CURRENT_DATE AS refreshed_date FROM locations l LEFT JOIN country_income ci ON l.country_code = ci.country_code LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id LEFT JOIN padel_local pl ON l.geoname_id = pl.geoname_id LEFT JOIN tennis_nearby tn ON l.geoname_id = tn.geoname_id -- Enforce grain: deduplicate if city slug collides within same country QUALIFY ROW_NUMBER() OVER ( PARTITION BY l.country_code, l.geoname_id ORDER BY l.population DESC NULLS LAST ) = 1