diff --git a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql index bca33af..2d4443d 100644 --- a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql +++ b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql @@ -1,7 +1,7 @@ -- Per-location padel investment opportunity intelligence. -- Consumed by: Gemeinde-level pSEO pages, opportunity map, "top markets" lists. -- --- Padelnomics Marktpotenzial-Score v3 (0–100): +-- Padelnomics Marktpotenzial-Score v4 (0–100): -- Answers "Where should I build a padel court?" -- Covers ALL GeoNames locations (pop ≥ 1K) — NOT filtered to existing padel markets. -- Zero-court locations score highest on supply gap component (white space = opportunity). @@ -30,9 +30,10 @@ -- DuckDB LEAST ignores NULLs: LEAST(1.0, NULL/30) = 1.0, -- so NULL nearest_km = full marks (no court in bounding box -- = high opportunity). COALESCE fallback is dead code. --- 10 pts sports culture — tennis courts within 25km (≥10 = full marks). --- NOTE: dim_locations tennis data is empty (all 0 rows). --- Component contributes 0 pts everywhere until data lands. +-- 10 pts market validation — country-level avg market maturity score (from city_market_profile). +-- Replaces sports culture proxy (v3: tennis data was all zeros). +-- ES (~60/100) → 6 pts, SE (~35/100) → 3.5 pts, unknown → 5 pts (neutral). +-- Distinguishes proven demand (ES white-space) from failing markets (SE). MODEL ( name serving.location_opportunity_profile, @@ -53,6 +54,16 @@ hex_stats AS ( FROM foundation.dim_locations GROUP BY h3_cell_res5 ), +-- Country-level average market maturity — used as market validation signal (10 pts). +-- Replaces sports culture proxy (tennis data was all zeros). +-- Proven demand markets (ES ~60) reward white-space towns more than failing markets (SE ~35). +country_market AS ( + SELECT + country_code, + ROUND(AVG(market_score), 1) AS country_avg_market_score + FROM serving.city_market_profile + GROUP BY country_code +), -- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1). -- Effective catchment: ~24km radius — realistic driving distance. catchment AS ( @@ -124,12 +135,14 @@ SELECT -- NULL = no courts found anywhere (rare edge case) → neutral 0.5. + 15.0 * COALESCE(LEAST(1.0, l.nearest_padel_court_km / 30.0), 0.5) - -- Sports culture proxy (10 pts): tennis courts within 25km. - -- ≥10 courts = full 10 pts (proven racket sport market = faster padel adoption). - -- 0 courts = 0 pts. Many new padel courts open inside existing tennis clubs. - + 10.0 * LEAST(1.0, l.tennis_courts_within_25km / 10.0) + -- Market validation (10 pts): country-level avg market maturity (from city_market_profile). + -- Replaces sports culture proxy (v3 tennis data was all zeros = dead code). + -- ES (~60/100): proven demand → ~6 pts. SE (~35/100): struggling → ~3.5 pts. + -- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised). + + 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5) , 1) AS opportunity_score, CURRENT_DATE AS refreshed_date FROM foundation.dim_locations l LEFT JOIN catchment c ON c.geoname_id = l.geoname_id +LEFT JOIN country_market cm ON cm.country_code = l.country_code ORDER BY opportunity_score DESC diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql index fcff9bd..a876295 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql @@ -18,13 +18,14 @@ SELECT country_slug, COUNT(*) AS city_count, SUM(padel_venue_count) AS total_venues, - ROUND(AVG(market_score), 1) AS avg_market_score, + -- Population-weighted: large cities (Madrid, Barcelona) dominate, not hundreds of small towns + ROUND(SUM(market_score * population) / NULLIF(SUM(population), 0), 1) AS avg_market_score, MAX(market_score) AS top_city_market_score, -- Top 5 cities by venue count (prominence), then score for internal linking LIST(city_slug ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_slugs, LIST(city_name ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_names, - -- Opportunity score aggregates (NULL-safe: cities without geoname_id match excluded from AVG) - ROUND(AVG(opportunity_score), 1) AS avg_opportunity_score, + -- Opportunity score aggregates (population-weighted: saturated megacities dominate, not hundreds of small towns) + ROUND(SUM(opportunity_score * population) / NULLIF(SUM(population), 0), 1) AS avg_opportunity_score, MAX(opportunity_score) AS top_opportunity_score, -- Top 5 opportunity cities by population (prominence), then opportunity score LIST(city_slug ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs,