diff --git a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql index e91c97d..c35f767 100644 --- a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql +++ b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql @@ -16,7 +16,7 @@ -- 10 pts economic context — income PPS normalised to 200 ceiling -- 10 pts data quality — completeness discount -- --- Padelnomics Opportunity Score (Marktpotenzial-Score v3, 0–100): +-- Padelnomics Opportunity Score (Marktpotenzial-Score v4, 0–100): -- "Where should I build a padel court?" -- Computed for ALL locations — zero-court locations score highest on supply gap. -- H3 catchment methodology: addressable market and supply gap use a regional @@ -26,7 +26,9 @@ -- 20 pts economic power — income PPS, normalised to 35,000 -- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks -- 15 pts catchment gap — distance to nearest padel court --- 10 pts sports culture — tennis courts within 25km +-- 10 pts market validation — country-level avg market maturity (from market_scored CTE). +-- Replaces sports culture proxy (v3: tennis data was all zeros). +-- ES (~60/100) → ~6 pts, SE (~35/100) → ~3.5 pts, unknown → 5 pts. -- -- Consumers query directly with WHERE filters: -- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL @@ -130,8 +132,8 @@ with_pricing AS ( LEFT JOIN catchment ct ON b.geoname_id = ct.geoname_id ), --- Both scores computed from the enriched base -scored AS ( +-- Step 1: market score only — needed first so we can aggregate country averages. +market_scored AS ( SELECT *, -- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km) CASE WHEN population > 0 @@ -180,8 +182,24 @@ scored AS ( END , 1) ELSE 0 - END AS market_score, - -- ── Opportunity Score (Marktpotenzial-Score v3, H3 catchment) ────────── + END AS market_score + FROM with_pricing +), +-- Step 2: country-level avg market maturity — used as market validation signal (10 pts). +-- Filter to market_score > 0 (cities with padel courts only) so zero-court locations +-- don't dilute the country signal. ES proven demand → ~60, SE struggling → ~35. +country_market AS ( + SELECT + country_code, + ROUND(AVG(market_score), 1) AS country_avg_market_score + FROM market_scored + WHERE market_score > 0 + GROUP BY country_code +), +-- Step 3: add opportunity_score using country market validation signal. +scored AS ( + SELECT ms.*, + -- ── Opportunity Score (Marktpotenzial-Score v4, H3 catchment) ────────── ROUND( -- Addressable market (25 pts): log-scaled catchment population, ceiling 500K 25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000)) @@ -195,10 +213,14 @@ scored AS ( END, 0.0) / 8.0) -- Catchment gap (15 pts): distance to nearest court + 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) - -- Sports culture (10 pts): tennis courts within 25km - + 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0) + -- Market validation (10 pts): country-level avg market maturity. + -- Replaces sports culture (v3 tennis data was all zeros = dead code). + -- ES (~60/100): proven demand → ~6 pts. SE (~35/100): struggling → ~3.5 pts. + -- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised). + + 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5) , 1) AS opportunity_score - FROM with_pricing + FROM market_scored ms + LEFT JOIN country_market cm ON ms.country_code = cm.country_code ) SELECT s.geoname_id, diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql index fcff9bd..a876295 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql @@ -18,13 +18,14 @@ SELECT country_slug, COUNT(*) AS city_count, SUM(padel_venue_count) AS total_venues, - ROUND(AVG(market_score), 1) AS avg_market_score, + -- Population-weighted: large cities (Madrid, Barcelona) dominate, not hundreds of small towns + ROUND(SUM(market_score * population) / NULLIF(SUM(population), 0), 1) AS avg_market_score, MAX(market_score) AS top_city_market_score, -- Top 5 cities by venue count (prominence), then score for internal linking LIST(city_slug ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_slugs, LIST(city_name ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_names, - -- Opportunity score aggregates (NULL-safe: cities without geoname_id match excluded from AVG) - ROUND(AVG(opportunity_score), 1) AS avg_opportunity_score, + -- Opportunity score aggregates (population-weighted: saturated megacities dominate, not hundreds of small towns) + ROUND(SUM(opportunity_score * population) / NULLIF(SUM(population), 0), 1) AS avg_opportunity_score, MAX(opportunity_score) AS top_opportunity_score, -- Top 5 opportunity cities by population (prominence), then opportunity score LIST(city_slug ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs,