From e39dd4ec0ba84dc3413d8a94ef753f11ef36757a Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 9 Mar 2026 18:03:14 +0100 Subject: [PATCH] =?UTF-8?q?fix(score):=20Opportunity=20Score=20v7=20?= =?UTF-8?q?=E2=80=94=20calibration=20fix=20for=20saturated=20markets?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes: 1. dim_locations now sources venues from dim_venues (deduplicated OSM + Playtomic) instead of stg_padel_courts (OSM only). Playtomic-only venues are no longer invisible to spatial lookups. 2. Country-level supply saturation dampener on supply deficit component. Saturated countries (Spain 7.4/100k) get dampened supply deficit (x0.30 → 12 pts max). Emerging markets (Germany 0.24/100k) nearly unaffected (x0.98 → ~39 pts). Co-Authored-By: Claude Opus 4.6 --- .../models/foundation/dim_locations.sql | 8 ++-- .../models/serving/location_profiles.sql | 37 +++++++++++++++---- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql index 98d0fd0..28978f1 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql @@ -9,7 +9,7 @@ -- foundation.dim_countries → country_name_en, country_slug, median_income_pps -- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join) -- stg_income_usa → US state-level income (PPS-normalised) --- stg_padel_courts → padel venue count + nearest court distance (km) +-- foundation.dim_venues → padel venue count + nearest court distance (km) -- stg_tennis_courts → tennis court count within 25km radius -- -- Income resolution cascade: @@ -137,10 +137,12 @@ us_income AS ( PARTITION BY m.admin1_code ORDER BY s.ref_year DESC ) = 1 ), --- Padel court lat/lon for distance and density calculations +-- Padel venue lat/lon for distance and density calculations. +-- Uses dim_venues (deduplicated OSM + Playtomic) instead of stg_padel_courts (OSM only) +-- so Playtomic-only venues are visible to spatial lookups. padel_courts AS ( SELECT lat, lon, country_code - FROM staging.stg_padel_courts + FROM foundation.dim_venues WHERE lat IS NOT NULL AND lon IS NOT NULL ), -- Nearest padel court distance per location (bbox pre-filter → exact sphere distance) diff --git a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql index d7645bd..84995ec 100644 --- a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql +++ b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql @@ -19,19 +19,20 @@ -- 10 pts economic context — income PPS normalised to 25,000 ceiling -- 10 pts data quality — completeness discount -- --- Padelnomics Opportunity Score (Marktpotenzial-Score v6, 0–100): +-- Padelnomics Opportunity Score (Marktpotenzial-Score v7, 0–100): -- "Where should I build a padel court?" -- Computed for ALL locations — zero-court locations score highest on supply deficit. -- H3 catchment methodology: addressable market and supply deficit use a regional -- H3 catchment (res-5 cell + 6 neighbours, ~24km radius). -- --- v6 changes: lower density ceiling 8→5/100k (saturated markets hit zero-gap sooner), --- increase supply deficit weight 35→40 pts, reduce addressable market 25→20 pts, --- invert market validation (high country maturity = LESS opportunity). +-- v7 changes: country-level supply saturation dampener on supply deficit. +-- Saturated countries (Spain 7.4/100k) get dampened supply deficit (×0.30 → 12 pts max). +-- Emerging markets (Germany 0.24/100k) are nearly unaffected (×0.98 → ~39 pts). +-- Floor at 0.3 so supply deficit never fully vanishes. -- -- 20 pts addressable market — log-scaled catchment population, ceiling 500K -- 15 pts economic power — income PPS, normalised to 35,000 --- 40 pts supply deficit — max(density gap, distance gap); eliminates double-count +-- 40 pts supply deficit — max(density gap, distance gap) × country dampener -- 10 pts sports culture — tennis court density as racquet-sport adoption proxy -- 5 pts construction affordability — income relative to construction costs (PLI) -- 10 pts market headroom — inverse country-level avg market maturity @@ -209,17 +210,34 @@ country_market AS ( WHERE market_score > 0 GROUP BY country_code ), --- Step 3: add opportunity_score using country market validation signal. +-- Step 3: country-level supply saturation — venues per 100K at the country level. +-- Used to dampen supply deficit in saturated markets (Spain, Sweden). +country_supply AS ( + SELECT + country_code, + SUM(city_padel_venue_count) AS country_venues, + SUM(population) AS country_pop, + CASE WHEN SUM(population) > 0 + THEN SUM(city_padel_venue_count) * 100000.0 / SUM(population) + ELSE 0 + END AS venues_per_100k + FROM foundation.dim_cities + WHERE population > 0 + GROUP BY country_code +), +-- Step 4: add opportunity_score using country market validation + supply saturation. scored AS ( SELECT ms.*, - -- ── Opportunity Score (Marktpotenzial-Score v6, H3 catchment) ────────── + -- ── Opportunity Score (Marktpotenzial-Score v7, H3 catchment) ────────── ROUND( -- Addressable market (20 pts): log-scaled catchment population, ceiling 500K 20.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000)) -- Economic power (15 pts): income PPS normalised to 35,000 + 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) -- Supply deficit (40 pts): max of density gap and distance gap. - -- Ceiling 5/100k (down from 8): Spain at 6-16/100k now hits zero-gap. + -- Dampened by country-level supply saturation: + -- Spain (7.4/100k) → dampener 0.30 → 12 pts max + -- Germany (0.24/100k) → dampener 0.98 → ~39 pts max + 40.0 * GREATEST( -- density-based gap (H3 catchment): 0 courts = 1.0, 5/100k = 0.0 GREATEST(0.0, 1.0 - COALESCE( @@ -230,6 +248,8 @@ scored AS ( -- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5 COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) ) + -- Country supply dampener: floor 0.3 so deficit never fully vanishes + * GREATEST(0.3, 1.0 - COALESCE(cs.venues_per_100k, 0.0) / 10.0) -- Sports culture (10 pts): tennis density as racquet-sport adoption proxy. -- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0). + 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0) @@ -247,6 +267,7 @@ scored AS ( , 1) AS opportunity_score FROM market_scored ms LEFT JOIN country_market cm ON ms.country_code = cm.country_code + LEFT JOIN country_supply cs ON ms.country_code = cs.country_code ) SELECT s.geoname_id,