From 118c2c0fc759c87887f69d56f40c7434602a7b95 Mon Sep 17 00:00:00 2001 From: Deeman Date: Sun, 8 Mar 2026 15:30:04 +0100 Subject: [PATCH] =?UTF-8?q?feat(scoring):=20Opportunity=20Score=20v4=20?= =?UTF-8?q?=E2=86=92=20v5=20=E2=80=94=20fix=20correlated=20components?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Merge supply gap (30pts) + catchment gap (15pts) → supply deficit (35pts, GREATEST) Eliminates ~80% correlated double-count on a single signal. - Add sports culture signal (10pts): tennis court density as racquet-sport adoption proxy. Ceiling 50 courts/25km. Harmless when tennis data is zero (contributes 0). - Add construction affordability (5pts): income relative to PLI construction costs. Joins dim_countries.pli_construction. High income + low build cost = high score. - Reduce economic power from 20 → 15pts to make room. New weights: addressable market 25, economic power 15, supply deficit 35, sports culture 10, construction affordability 5, market validation 10. Co-Authored-By: Claude Opus 4.6 --- .../models/serving/location_profiles.sql | 66 ++++++++++++------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql index 0fd5b9a..5d5f36e 100644 --- a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql +++ b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql @@ -19,19 +19,22 @@ -- 10 pts economic context — income PPS normalised to 25,000 ceiling -- 10 pts data quality — completeness discount -- --- Padelnomics Opportunity Score (Marktpotenzial-Score v4, 0–100): +-- Padelnomics Opportunity Score (Marktpotenzial-Score v5, 0–100): -- "Where should I build a padel court?" --- Computed for ALL locations — zero-court locations score highest on supply gap. --- H3 catchment methodology: addressable market and supply gap use a regional +-- Computed for ALL locations — zero-court locations score highest on supply deficit. +-- H3 catchment methodology: addressable market and supply deficit use a regional -- H3 catchment (res-5 cell + 6 neighbours, ~24km radius). -- --- 25 pts addressable market — log-scaled catchment population, ceiling 500K --- 20 pts economic power — income PPS, normalised to 35,000 --- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks --- 15 pts catchment gap — distance to nearest padel court --- 10 pts market validation — country-level avg market maturity (from market_scored CTE). --- Replaces sports culture proxy (v3: tennis data was all zeros). --- ES (~60/100) → ~6 pts, SE (~35/100) → ~3.5 pts, unknown → 5 pts. +-- v5 changes: merge supply gap + catchment gap → single supply deficit (35 pts), +-- add sports culture proxy (10 pts, tennis density), add construction affordability (5 pts), +-- reduce economic power from 20 → 15 pts. +-- +-- 25 pts addressable market — log-scaled catchment population, ceiling 500K +-- 15 pts economic power — income PPS, normalised to 35,000 +-- 35 pts supply deficit — max(density gap, distance gap); eliminates double-count +-- 10 pts sports culture — tennis court density as racquet-sport adoption proxy +-- 5 pts construction affordability — income relative to construction costs (PLI) +-- 10 pts market validation — country-level avg market maturity (from market_scored CTE) -- -- Consumers query directly with WHERE filters: -- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL @@ -110,7 +113,7 @@ city_match AS ( ORDER BY c.padel_venue_count DESC ) = 1 ), --- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment +-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment + country PLI with_pricing AS ( SELECT b.*, @@ -123,6 +126,7 @@ with_pricing AS ( vpb.median_occupancy_rate, vpb.median_daily_revenue_per_venue, vpb.price_currency, + dc.pli_construction, COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population, COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts FROM base b @@ -134,6 +138,8 @@ with_pricing AS ( AND cm.city_slug = vpb.city_slug LEFT JOIN catchment ct ON b.geoname_id = ct.geoname_id + LEFT JOIN foundation.dim_countries dc + ON b.country_code = dc.country_code ), -- Step 1: market score only — needed first so we can aggregate country averages. market_scored AS ( @@ -206,23 +212,35 @@ country_market AS ( -- Step 3: add opportunity_score using country market validation signal. scored AS ( SELECT ms.*, - -- ── Opportunity Score (Marktpotenzial-Score v4, H3 catchment) ────────── + -- ── Opportunity Score (Marktpotenzial-Score v5, H3 catchment) ────────── ROUND( -- Addressable market (25 pts): log-scaled catchment population, ceiling 500K 25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000)) - -- Economic power (20 pts): income PPS normalised to 35,000 - + 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) - -- Supply gap (30 pts): inverted catchment venue density - + 30.0 * GREATEST(0.0, 1.0 - COALESCE( - CASE WHEN catchment_population > 0 - THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000 - ELSE 0.0 - END, 0.0) / 8.0) - -- Catchment gap (15 pts): distance to nearest court - + 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) + -- Economic power (15 pts): income PPS normalised to 35,000 + + 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) + -- Supply deficit (35 pts): max of density gap and distance gap. + -- Merges old supply gap (30) + catchment gap (15) which were ~80% correlated. + + 35.0 * GREATEST( + -- density-based gap (H3 catchment): 0 courts = 1.0, 8/100k = 0.0 + GREATEST(0.0, 1.0 - COALESCE( + CASE WHEN catchment_population > 0 + THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000 + ELSE 0.0 + END, 0.0) / 8.0), + -- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5 + COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) + ) + -- Sports culture (10 pts): tennis density as racquet-sport adoption proxy. + -- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0). + + 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0) + -- Construction affordability (5 pts): income purchasing power relative to build costs. + -- PLI construction is EU27=100 index. High income + low construction cost = high score. + + 5.0 * LEAST(1.0, + COALESCE(median_income_pps, 15000) / 35000.0 + / GREATEST(0.5, COALESCE(pli_construction, 100.0) / 100.0) + ) -- Market validation (10 pts): country-level avg market maturity. - -- Replaces sports culture (v3 tennis data was all zeros = dead code). - -- ES (~60/100): proven demand → ~6 pts. SE (~35/100): struggling → ~3.5 pts. + -- ES (~70/100): proven demand → ~7 pts. SE (~35/100): emerging → ~3.5 pts. -- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised). + 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5) , 1) AS opportunity_score