feat(scoring): Opportunity Score v4 → v5 — fix correlated components

- Merge supply gap (30pts) + catchment gap (15pts) → supply deficit (35pts, GREATEST)
  Eliminates ~80% correlated double-count on a single signal.
- Add sports culture signal (10pts): tennis court density as racquet-sport adoption proxy.
  Ceiling 50 courts/25km. Harmless when tennis data is zero (contributes 0).
- Add construction affordability (5pts): income relative to PLI construction costs.
  Joins dim_countries.pli_construction. High income + low build cost = high score.
- Reduce economic power from 20 → 15pts to make room.

New weights: addressable market 25, economic power 15, supply deficit 35,
sports culture 10, construction affordability 5, market validation 10.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-08 15:30:04 +01:00
parent cd6d950233
commit 118c2c0fc7

View File

@@ -19,19 +19,22 @@
-- 10 pts economic context — income PPS normalised to 25,000 ceiling -- 10 pts economic context — income PPS normalised to 25,000 ceiling
-- 10 pts data quality — completeness discount -- 10 pts data quality — completeness discount
-- --
-- Padelnomics Opportunity Score (Marktpotenzial-Score v4, 0100): -- Padelnomics Opportunity Score (Marktpotenzial-Score v5, 0100):
-- "Where should I build a padel court?" -- "Where should I build a padel court?"
-- Computed for ALL locations — zero-court locations score highest on supply gap. -- Computed for ALL locations — zero-court locations score highest on supply deficit.
-- H3 catchment methodology: addressable market and supply gap use a regional -- H3 catchment methodology: addressable market and supply deficit use a regional
-- H3 catchment (res-5 cell + 6 neighbours, ~24km radius). -- H3 catchment (res-5 cell + 6 neighbours, ~24km radius).
-- --
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K -- v5 changes: merge supply gap + catchment gap → single supply deficit (35 pts),
-- 20 pts economic power — income PPS, normalised to 35,000 -- add sports culture proxy (10 pts, tennis density), add construction affordability (5 pts),
-- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks -- reduce economic power from 20 → 15 pts.
-- 15 pts catchment gap — distance to nearest padel court --
-- 10 pts market validation — country-level avg market maturity (from market_scored CTE). -- 25 pts addressable market — log-scaled catchment population, ceiling 500K
-- Replaces sports culture proxy (v3: tennis data was all zeros). -- 15 pts economic power — income PPS, normalised to 35,000
-- ES (~60/100) → ~6 pts, SE (~35/100) → ~3.5 pts, unknown → 5 pts. -- 35 pts supply deficit — max(density gap, distance gap); eliminates double-count
-- 10 pts sports culture — tennis court density as racquet-sport adoption proxy
-- 5 pts construction affordability — income relative to construction costs (PLI)
-- 10 pts market validation — country-level avg market maturity (from market_scored CTE)
-- --
-- Consumers query directly with WHERE filters: -- Consumers query directly with WHERE filters:
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL -- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
@@ -110,7 +113,7 @@ city_match AS (
ORDER BY c.padel_venue_count DESC ORDER BY c.padel_venue_count DESC
) = 1 ) = 1
), ),
-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment -- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment + country PLI
with_pricing AS ( with_pricing AS (
SELECT SELECT
b.*, b.*,
@@ -123,6 +126,7 @@ with_pricing AS (
vpb.median_occupancy_rate, vpb.median_occupancy_rate,
vpb.median_daily_revenue_per_venue, vpb.median_daily_revenue_per_venue,
vpb.price_currency, vpb.price_currency,
dc.pli_construction,
COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population, COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population,
COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts
FROM base b FROM base b
@@ -134,6 +138,8 @@ with_pricing AS (
AND cm.city_slug = vpb.city_slug AND cm.city_slug = vpb.city_slug
LEFT JOIN catchment ct LEFT JOIN catchment ct
ON b.geoname_id = ct.geoname_id ON b.geoname_id = ct.geoname_id
LEFT JOIN foundation.dim_countries dc
ON b.country_code = dc.country_code
), ),
-- Step 1: market score only — needed first so we can aggregate country averages. -- Step 1: market score only — needed first so we can aggregate country averages.
market_scored AS ( market_scored AS (
@@ -206,23 +212,35 @@ country_market AS (
-- Step 3: add opportunity_score using country market validation signal. -- Step 3: add opportunity_score using country market validation signal.
scored AS ( scored AS (
SELECT ms.*, SELECT ms.*,
-- ── Opportunity Score (Marktpotenzial-Score v4, H3 catchment) ────────── -- ── Opportunity Score (Marktpotenzial-Score v5, H3 catchment) ──────────
ROUND( ROUND(
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K -- Addressable market (25 pts): log-scaled catchment population, ceiling 500K
25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000)) 25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
-- Economic power (20 pts): income PPS normalised to 35,000 -- Economic power (15 pts): income PPS normalised to 35,000
+ 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) + 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
-- Supply gap (30 pts): inverted catchment venue density -- Supply deficit (35 pts): max of density gap and distance gap.
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE( -- Merges old supply gap (30) + catchment gap (15) which were ~80% correlated.
CASE WHEN catchment_population > 0 + 35.0 * GREATEST(
THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000 -- density-based gap (H3 catchment): 0 courts = 1.0, 8/100k = 0.0
ELSE 0.0 GREATEST(0.0, 1.0 - COALESCE(
END, 0.0) / 8.0) CASE WHEN catchment_population > 0
-- Catchment gap (15 pts): distance to nearest court THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000
+ 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) ELSE 0.0
END, 0.0) / 8.0),
-- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5
COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
)
-- Sports culture (10 pts): tennis density as racquet-sport adoption proxy.
-- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0).
+ 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0)
-- Construction affordability (5 pts): income purchasing power relative to build costs.
-- PLI construction is EU27=100 index. High income + low construction cost = high score.
+ 5.0 * LEAST(1.0,
COALESCE(median_income_pps, 15000) / 35000.0
/ GREATEST(0.5, COALESCE(pli_construction, 100.0) / 100.0)
)
-- Market validation (10 pts): country-level avg market maturity. -- Market validation (10 pts): country-level avg market maturity.
-- Replaces sports culture (v3 tennis data was all zeros = dead code). -- ES (~70/100): proven demand → ~7 pts. SE (~35/100): emerging → ~3.5 pts.
-- ES (~60/100): proven demand → ~6 pts. SE (~35/100): struggling → ~3.5 pts.
-- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised). -- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised).
+ 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5) + 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5)
, 1) AS opportunity_score , 1) AS opportunity_score