merge: Opportunity Score v7 calibration fix

This commit is contained in:
Deeman
2026-03-09 18:12:47 +01:00
2 changed files with 34 additions and 11 deletions

View File

@@ -9,7 +9,7 @@
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps -- foundation.dim_countries → country_name_en, country_slug, median_income_pps
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join) -- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
-- stg_income_usa → US state-level income (PPS-normalised) -- stg_income_usa → US state-level income (PPS-normalised)
-- stg_padel_courts → padel venue count + nearest court distance (km) -- foundation.dim_venues → padel venue count + nearest court distance (km)
-- stg_tennis_courts → tennis court count within 25km radius -- stg_tennis_courts → tennis court count within 25km radius
-- --
-- Income resolution cascade: -- Income resolution cascade:
@@ -137,10 +137,12 @@ us_income AS (
PARTITION BY m.admin1_code ORDER BY s.ref_year DESC PARTITION BY m.admin1_code ORDER BY s.ref_year DESC
) = 1 ) = 1
), ),
-- Padel court lat/lon for distance and density calculations -- Padel venue lat/lon for distance and density calculations.
-- Uses dim_venues (deduplicated OSM + Playtomic) instead of stg_padel_courts (OSM only)
-- so Playtomic-only venues are visible to spatial lookups.
padel_courts AS ( padel_courts AS (
SELECT lat, lon, country_code SELECT lat, lon, country_code
FROM staging.stg_padel_courts FROM foundation.dim_venues
WHERE lat IS NOT NULL AND lon IS NOT NULL WHERE lat IS NOT NULL AND lon IS NOT NULL
), ),
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance) -- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)

View File

@@ -19,19 +19,20 @@
-- 10 pts economic context — income PPS normalised to 25,000 ceiling -- 10 pts economic context — income PPS normalised to 25,000 ceiling
-- 10 pts data quality — completeness discount -- 10 pts data quality — completeness discount
-- --
-- Padelnomics Opportunity Score (Marktpotenzial-Score v6, 0100): -- Padelnomics Opportunity Score (Marktpotenzial-Score v7, 0100):
-- "Where should I build a padel court?" -- "Where should I build a padel court?"
-- Computed for ALL locations — zero-court locations score highest on supply deficit. -- Computed for ALL locations — zero-court locations score highest on supply deficit.
-- H3 catchment methodology: addressable market and supply deficit use a regional -- H3 catchment methodology: addressable market and supply deficit use a regional
-- H3 catchment (res-5 cell + 6 neighbours, ~24km radius). -- H3 catchment (res-5 cell + 6 neighbours, ~24km radius).
-- --
-- v6 changes: lower density ceiling 8→5/100k (saturated markets hit zero-gap sooner), -- v7 changes: country-level supply saturation dampener on supply deficit.
-- increase supply deficit weight 35→40 pts, reduce addressable market 25→20 pts, -- Saturated countries (Spain 7.4/100k) get dampened supply deficit (×0.30 → 12 pts max).
-- invert market validation (high country maturity = LESS opportunity). -- Emerging markets (Germany 0.24/100k) are nearly unaffected (×0.98 → ~39 pts).
-- Floor at 0.3 so supply deficit never fully vanishes.
-- --
-- 20 pts addressable market — log-scaled catchment population, ceiling 500K -- 20 pts addressable market — log-scaled catchment population, ceiling 500K
-- 15 pts economic power — income PPS, normalised to 35,000 -- 15 pts economic power — income PPS, normalised to 35,000
-- 40 pts supply deficit — max(density gap, distance gap); eliminates double-count -- 40 pts supply deficit — max(density gap, distance gap) × country dampener
-- 10 pts sports culture — tennis court density as racquet-sport adoption proxy -- 10 pts sports culture — tennis court density as racquet-sport adoption proxy
-- 5 pts construction affordability — income relative to construction costs (PLI) -- 5 pts construction affordability — income relative to construction costs (PLI)
-- 10 pts market headroom — inverse country-level avg market maturity -- 10 pts market headroom — inverse country-level avg market maturity
@@ -209,17 +210,34 @@ country_market AS (
WHERE market_score > 0 WHERE market_score > 0
GROUP BY country_code GROUP BY country_code
), ),
-- Step 3: add opportunity_score using country market validation signal. -- Step 3: country-level supply saturation — venues per 100K at the country level.
-- Used to dampen supply deficit in saturated markets (Spain, Sweden).
country_supply AS (
SELECT
country_code,
SUM(city_padel_venue_count) AS country_venues,
SUM(population) AS country_pop,
CASE WHEN SUM(population) > 0
THEN SUM(city_padel_venue_count) * 100000.0 / SUM(population)
ELSE 0
END AS venues_per_100k
FROM foundation.dim_cities
WHERE population > 0
GROUP BY country_code
),
-- Step 4: add opportunity_score using country market validation + supply saturation.
scored AS ( scored AS (
SELECT ms.*, SELECT ms.*,
-- ── Opportunity Score (Marktpotenzial-Score v6, H3 catchment) ────────── -- ── Opportunity Score (Marktpotenzial-Score v7, H3 catchment) ──────────
ROUND( ROUND(
-- Addressable market (20 pts): log-scaled catchment population, ceiling 500K -- Addressable market (20 pts): log-scaled catchment population, ceiling 500K
20.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000)) 20.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
-- Economic power (15 pts): income PPS normalised to 35,000 -- Economic power (15 pts): income PPS normalised to 35,000
+ 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) + 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
-- Supply deficit (40 pts): max of density gap and distance gap. -- Supply deficit (40 pts): max of density gap and distance gap.
-- Ceiling 5/100k (down from 8): Spain at 6-16/100k now hits zero-gap. -- Dampened by country-level supply saturation:
-- Spain (7.4/100k) → dampener 0.30 → 12 pts max
-- Germany (0.24/100k) → dampener 0.98 → ~39 pts max
+ 40.0 * GREATEST( + 40.0 * GREATEST(
-- density-based gap (H3 catchment): 0 courts = 1.0, 5/100k = 0.0 -- density-based gap (H3 catchment): 0 courts = 1.0, 5/100k = 0.0
GREATEST(0.0, 1.0 - COALESCE( GREATEST(0.0, 1.0 - COALESCE(
@@ -230,6 +248,8 @@ scored AS (
-- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5 -- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5
COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
) )
-- Country supply dampener: floor 0.3 so deficit never fully vanishes
* GREATEST(0.3, 1.0 - COALESCE(cs.venues_per_100k, 0.0) / 10.0)
-- Sports culture (10 pts): tennis density as racquet-sport adoption proxy. -- Sports culture (10 pts): tennis density as racquet-sport adoption proxy.
-- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0). -- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0).
+ 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0) + 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0)
@@ -247,6 +267,7 @@ scored AS (
, 1) AS opportunity_score , 1) AS opportunity_score
FROM market_scored ms FROM market_scored ms
LEFT JOIN country_market cm ON ms.country_code = cm.country_code LEFT JOIN country_market cm ON ms.country_code = cm.country_code
LEFT JOIN country_supply cs ON ms.country_code = cs.country_code
) )
SELECT SELECT
s.geoname_id, s.geoname_id,