From cda94c9ee47735b22a32bdf3dc5eef559c3ec194 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:36:36 +0100 Subject: [PATCH] feat(serving): add unified location_profiles model Combines city_market_profile and location_opportunity_profile into a single serving model at (country_code, geoname_id) grain. Both Market Score and Opportunity Score computed per location. City data enriched via LEFT JOIN dim_cities on geoname_id. Subtask 1/5: create new model (old models not yet removed). Co-Authored-By: Claude Sonnet 4.6 --- .../models/serving/location_profiles.sql | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 transform/sqlmesh_padelnomics/models/serving/location_profiles.sql diff --git a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql new file mode 100644 index 0000000..2efd3cc --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql @@ -0,0 +1,203 @@ +-- Unified location profile: both scores at (country_code, geoname_id) grain. +-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows). +-- Enriched with dim_cities (city_slug, city_name, exact venue count) and +-- venue_pricing_benchmarks (Playtomic pricing/occupancy). +-- +-- Two scores per location: +-- +-- Padelnomics Market Score (Marktreife-Score v3, 0–100): +-- "How mature/established is this padel market?" +-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL) +-- with padel venues. 0 for all other locations. +-- +-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate +-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise +-- 15 pts addressable market — log-scaled population, ceiling 1M +-- 10 pts economic context — income PPS normalised to 200 ceiling +-- 10 pts data quality — completeness discount +-- +-- Padelnomics Opportunity Score (Marktpotenzial-Score v2, 0–100): +-- "Where should I build a padel court?" +-- Computed for ALL locations — zero-court locations score highest on supply gap. +-- +-- 25 pts addressable market — log-scaled population, ceiling 500K +-- 20 pts economic power — income PPS, normalised to 35,000 +-- 30 pts supply gap — inverted venue density; 0 courts = full marks +-- 15 pts catchment gap — distance to nearest padel court +-- 10 pts sports culture — tennis courts within 25km +-- +-- Consumers query directly with WHERE filters: +-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL +-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0 +-- planner_defaults: WHERE city_slug IS NOT NULL +-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0 + +MODEL ( + name serving.location_profiles, + kind FULL, + cron '@daily', + grain (country_code, geoname_id) +); + +WITH +-- All locations from dim_locations (superset) +base AS ( + SELECT + l.geoname_id, + l.country_code, + l.country_name_en, + l.country_slug, + l.location_name, + l.location_slug, + l.lat, + l.lon, + l.admin1_code, + l.admin2_code, + l.population, + l.population_year, + l.median_income_pps, + l.income_year, + l.padel_venue_count, + l.padel_venues_per_100k, + l.nearest_padel_court_km, + l.tennis_courts_within_25km + FROM foundation.dim_locations l +), +-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count. +-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count). +city_match AS ( + SELECT + c.country_code, + c.geoname_id, + c.city_slug, + c.city_name, + c.padel_venue_count AS city_padel_venue_count + FROM foundation.dim_cities c + WHERE c.geoname_id IS NOT NULL + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY c.country_code, c.geoname_id + ORDER BY c.padel_venue_count DESC + ) = 1 +), +-- Pricing / occupancy from Playtomic (via city_slug) +with_pricing AS ( + SELECT + b.*, + cm.city_slug, + cm.city_name, + cm.city_padel_venue_count, + vpb.median_hourly_rate, + vpb.median_peak_rate, + vpb.median_offpeak_rate, + vpb.median_occupancy_rate, + vpb.median_daily_revenue_per_venue, + vpb.price_currency + FROM base b + LEFT JOIN city_match cm + ON b.country_code = cm.country_code + AND b.geoname_id = cm.geoname_id + LEFT JOIN serving.venue_pricing_benchmarks vpb + ON cm.country_code = vpb.country_code + AND cm.city_slug = vpb.city_slug +), +-- Both scores computed from the enriched base +scored AS ( + SELECT *, + -- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km) + CASE WHEN population > 0 + THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2) + ELSE NULL + END AS city_venues_per_100k, + -- Data confidence (for market_score) + CASE + WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0 + WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5 + ELSE 0.0 + END AS data_confidence, + -- ── Market Score (Marktreife-Score v3) ────────────────────────────────── + -- 0 when no city match or no venues (city_padel_venue_count NULL or 0) + CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN + ROUND( + -- Supply development (40 pts) + 40.0 * LEAST(1.0, LN( + COALESCE( + CASE WHEN population > 0 + THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000 + ELSE 0 END + , 0) + 1) / LN(21)) + * LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0) + -- Demand evidence (25 pts) + + 25.0 * CASE + WHEN median_occupancy_rate IS NOT NULL + THEN LEAST(1.0, median_occupancy_rate / 0.65) + ELSE 0.4 * LEAST(1.0, LN( + COALESCE( + CASE WHEN population > 0 + THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000 + ELSE 0 END + , 0) + 1) / LN(21)) + * LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0) + END + -- Addressable market (15 pts) + + 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000)) + -- Economic context (10 pts) + + 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0) + -- Data quality (10 pts) + + 10.0 * CASE + WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0 + WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5 + ELSE 0.0 + END + , 1) + ELSE 0 + END AS market_score, + -- ── Opportunity Score (Marktpotenzial-Score v2) ──────────────────────── + ROUND( + -- Addressable market (25 pts): ceiling 500K + 25.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000)) + -- Economic power (20 pts): income PPS normalised to 35,000 + + 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) + -- Supply gap (30 pts): inverted venue density + + 30.0 * GREATEST(0.0, 1.0 - COALESCE(padel_venues_per_100k, 0) / 8.0) + -- Catchment gap (15 pts): distance to nearest court + + 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) + -- Sports culture (10 pts): tennis courts within 25km + + 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0) + , 1) AS opportunity_score + FROM with_pricing +) +SELECT + s.geoname_id, + s.country_code, + s.country_name_en, + s.country_slug, + s.location_name, + s.location_slug, + s.city_slug, + s.city_name, + s.lat, + s.lon, + s.admin1_code, + s.admin2_code, + s.population, + s.population_year, + s.median_income_pps, + s.income_year, + s.padel_venue_count, + s.padel_venues_per_100k, + s.nearest_padel_court_km, + s.tennis_courts_within_25km, + s.city_padel_venue_count, + s.city_venues_per_100k, + s.data_confidence, + s.market_score, + s.opportunity_score, + s.median_hourly_rate, + s.median_peak_rate, + s.median_offpeak_rate, + s.median_occupancy_rate, + s.median_daily_revenue_per_venue, + s.price_currency, + CURRENT_DATE AS refreshed_date +FROM scored s +ORDER BY s.market_score DESC, s.opportunity_score DESC