feat(transform): add dim_locations + dual market scoring models
dim_locations (foundation):
- Seeded from stg_population_geonames (all locations, not venue-dependent)
- Grain: (country_code, geoname_id)
- Enriched with: padel venues within 5km, nearest court distance (ST_Distance_Sphere),
tennis courts within 25km, country income
- Covers zero-court Gemeinden for opportunity scoring
location_opportunity_profile (serving) — Padelnomics Marktpotenzial-Score:
- Answers "Where should I build?" — no padel_venue_count filter
- Formula: population (25) + income (20) + supply gap inverted (30) +
catchment gap (15) + tennis culture (10) = 100pts
- Sorted by opportunity_score DESC
city_market_profile (serving) — Padelnomics Marktreife-Score:
- Add saturation discount (×0.85 when venues_per_100k > 8)
- Update header comment to reference Marktreife-Score branding
- Kept WHERE padel_venue_count > 0 (established markets only)
- column name market_score unchanged (avoids downstream breakage)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
-- Location dimension: all known populated places globally (GeoNames cities1000).
|
||||
-- This is the opportunity-scoring root — NOT filtered to places with padel courts.
|
||||
-- Grain: (country_code, geoname_id) — stable GeoNames numeric ID per location.
|
||||
--
|
||||
-- Unlike dim_cities (seeded from dim_venues / existing padel markets), dim_locations
|
||||
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
|
||||
--
|
||||
-- Enriched with:
|
||||
-- stg_income → country-level median income PPS
|
||||
-- stg_padel_courts → padel venue count + nearest court distance (km)
|
||||
-- stg_tennis_courts → tennis court count within 25km radius
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the
|
||||
-- exact sphere distance is computed.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_locations,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, geoname_id)
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Base: all GeoNames locations with valid coordinates
|
||||
locations AS (
|
||||
SELECT
|
||||
geoname_id,
|
||||
city_name AS location_name,
|
||||
-- URL-safe location slug
|
||||
LOWER(REGEXP_REPLACE(LOWER(city_name), '[^a-z0-9]+', '-')) AS location_slug,
|
||||
country_code,
|
||||
lat,
|
||||
lon,
|
||||
admin1_code,
|
||||
admin2_code,
|
||||
population,
|
||||
population_year AS population_year,
|
||||
ref_year
|
||||
FROM staging.stg_population_geonames
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Country income (same source and pattern as dim_cities)
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Padel court lat/lon for distance and density calculations
|
||||
padel_courts AS (
|
||||
SELECT lat, lon, country_code
|
||||
FROM staging.stg_padel_courts
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)
|
||||
nearest_padel AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
MIN(
|
||||
ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(p.lon, p.lat)
|
||||
) / 1000.0
|
||||
) AS nearest_padel_court_km
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
-- ~55km bounding box pre-filter to limit cross-join before sphere calc
|
||||
ON ABS(l.lat - p.lat) < 0.5
|
||||
AND ABS(l.lon - p.lon) < 0.5
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Padel venues within 5km of each location (counts as "local padel supply")
|
||||
padel_local AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
COUNT(*) AS padel_venue_count
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter
|
||||
AND ABS(l.lon - p.lon) < 0.05
|
||||
WHERE ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(p.lon, p.lat)
|
||||
) / 1000.0 <= 5.0
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Tennis courts within 25km of each location (sports culture proxy)
|
||||
tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
COUNT(*) AS tennis_courts_within_25km
|
||||
FROM locations l
|
||||
JOIN staging.stg_tennis_courts t
|
||||
ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter
|
||||
AND ABS(l.lon - t.lon) < 0.23
|
||||
WHERE ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(t.lon, t.lat)
|
||||
) / 1000.0 <= 25.0
|
||||
GROUP BY l.geoname_id
|
||||
)
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
-- Human-readable country name (consistent with dim_cities)
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
l.lon,
|
||||
l.admin1_code,
|
||||
l.admin2_code,
|
||||
l.population,
|
||||
l.ref_year AS population_year,
|
||||
ci.median_income_pps,
|
||||
ci.income_year,
|
||||
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
|
||||
-- Venues per 100K residents (NULL if population = 0)
|
||||
CASE WHEN l.population > 0
|
||||
THEN ROUND(COALESCE(pl.padel_venue_count, 0)::DOUBLE / l.population * 100000, 2)
|
||||
ELSE NULL
|
||||
END AS padel_venues_per_100k,
|
||||
np.nearest_padel_court_km,
|
||||
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM locations l
|
||||
LEFT JOIN country_income ci ON l.country_code = ci.country_code
|
||||
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id
|
||||
LEFT JOIN padel_local pl ON l.geoname_id = pl.geoname_id
|
||||
LEFT JOIN tennis_nearby tn ON l.geoname_id = tn.geoname_id
|
||||
-- Enforce grain: deduplicate if city slug collides within same country
|
||||
QUALIFY ROW_NUMBER() OVER (
|
||||
PARTITION BY l.country_code, l.geoname_id
|
||||
ORDER BY l.population DESC NULLS LAST
|
||||
) = 1
|
||||
@@ -1,11 +1,16 @@
|
||||
-- One Big Table: per-city padel market intelligence.
|
||||
-- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints.
|
||||
--
|
||||
-- Market score v2 (0–100):
|
||||
-- 30 pts population — log-scaled to 1M+ city ceiling (was 40pts/500K)
|
||||
-- Padelnomics Marktreife-Score v2 (0–100):
|
||||
-- Answers "How mature/established is this padel market?"
|
||||
-- Only computed for cities with ≥1 padel venue (padel_venue_count > 0).
|
||||
-- For white-space opportunity scoring, see serving.location_opportunity_profile.
|
||||
--
|
||||
-- 30 pts population — log-scaled to 1M+ city ceiling
|
||||
-- 25 pts income PPS — normalised to 200 ceiling (covers CH/NO/LU outliers)
|
||||
-- 30 pts demand — observed occupancy if available, else venue density
|
||||
-- 15 pts data quality — completeness discount, not a market signal
|
||||
-- ×0.85 saturation — discount when venues_per_100k > 8 (oversupplied market)
|
||||
|
||||
MODEL (
|
||||
name serving.city_market_profile,
|
||||
@@ -73,7 +78,11 @@ scored AS (
|
||||
-- Data quality (15 pts): measures completeness, not market quality.
|
||||
-- Reduced from 20pts — kept as confidence discount, not market signal.
|
||||
+ 15.0 * data_confidence
|
||||
, 1) AS market_score
|
||||
, 1)
|
||||
-- Saturation discount: venues_per_100k > 8 signals oversupply.
|
||||
-- ~8/100K ≈ Spain-tier density; above this marginal return decreases.
|
||||
* CASE WHEN venues_per_100k > 8 THEN 0.85 ELSE 1.0 END
|
||||
AS market_score
|
||||
FROM base
|
||||
)
|
||||
SELECT
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
-- Per-location padel investment opportunity intelligence.
|
||||
-- Consumed by: Gemeinde-level pSEO pages, opportunity map, "top markets" lists.
|
||||
--
|
||||
-- Padelnomics Marktpotenzial-Score (0–100):
|
||||
-- Answers "Where should I build a padel court?"
|
||||
-- Covers ALL GeoNames locations (pop ≥ 1K) — NOT filtered to existing padel markets.
|
||||
-- Zero-court locations score highest on supply gap component (white space = opportunity).
|
||||
--
|
||||
-- 25 pts addressable market — log-scaled population, ceiling 500K
|
||||
-- (opportunity peaks in mid-size cities; megacities already served)
|
||||
-- 20 pts economic power — country income PPS, normalised to 200
|
||||
-- 30 pts supply gap — INVERTED venue density; 0 courts/100K = full marks
|
||||
-- 15 pts catchment gap — distance to nearest padel court (>30km = full marks)
|
||||
-- 10 pts sports culture — tennis courts within 25km (≥10 = full marks)
|
||||
|
||||
MODEL (
|
||||
name serving.location_opportunity_profile,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, geoname_id)
|
||||
);
|
||||
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
l.country_name_en,
|
||||
l.country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
l.lon,
|
||||
l.admin1_code,
|
||||
l.admin2_code,
|
||||
l.population,
|
||||
l.population_year,
|
||||
l.median_income_pps,
|
||||
l.income_year,
|
||||
l.padel_venue_count,
|
||||
l.padel_venues_per_100k,
|
||||
l.nearest_padel_court_km,
|
||||
l.tennis_courts_within_25km,
|
||||
ROUND(
|
||||
-- Addressable market (25 pts): log-scaled to 500K ceiling.
|
||||
-- Lower ceiling than Marktreife (1M) — opportunity peaks in mid-size cities
|
||||
-- that can support a court but aren't already saturated by large-city operators.
|
||||
25.0 * LEAST(1.0, LN(GREATEST(l.population, 1)) / LN(500000))
|
||||
|
||||
-- Economic power (20 pts): country-level income PPS normalised to 200.
|
||||
-- Drives willingness-to-pay for court fees (€20-35/hr target range).
|
||||
+ 20.0 * LEAST(1.0, COALESCE(l.median_income_pps, 100) / 200.0)
|
||||
|
||||
-- Supply gap (30 pts): INVERTED venue density.
|
||||
-- 0 courts/100K = full 30 pts (white space); ≥4/100K = 0 pts (served market).
|
||||
-- This is the key signal that separates Marktpotenzial from Marktreife.
|
||||
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(l.padel_venues_per_100k, 0) / 4.0)
|
||||
|
||||
-- Catchment gap (15 pts): distance to nearest existing padel court.
|
||||
-- >30km = full 15 pts (underserved catchment area).
|
||||
-- NULL = no courts found anywhere (rare edge case) → neutral 0.5.
|
||||
+ 15.0 * COALESCE(LEAST(1.0, l.nearest_padel_court_km / 30.0), 0.5)
|
||||
|
||||
-- Sports culture proxy (10 pts): tennis courts within 25km.
|
||||
-- ≥10 courts = full 10 pts (proven racket sport market = faster padel adoption).
|
||||
-- 0 courts = 0 pts. Many new padel courts open inside existing tennis clubs.
|
||||
+ 10.0 * LEAST(1.0, l.tennis_courts_within_25km / 10.0)
|
||||
, 1) AS opportunity_score,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM foundation.dim_locations l
|
||||
ORDER BY opportunity_score DESC
|
||||
Reference in New Issue
Block a user