From cda94c9ee47735b22a32bdf3dc5eef559c3ec194 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:36:36 +0100 Subject: [PATCH 1/5] feat(serving): add unified location_profiles model Combines city_market_profile and location_opportunity_profile into a single serving model at (country_code, geoname_id) grain. Both Market Score and Opportunity Score computed per location. City data enriched via LEFT JOIN dim_cities on geoname_id. Subtask 1/5: create new model (old models not yet removed). Co-Authored-By: Claude Sonnet 4.6 --- .../models/serving/location_profiles.sql | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 transform/sqlmesh_padelnomics/models/serving/location_profiles.sql diff --git a/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql new file mode 100644 index 0000000..2efd3cc --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql @@ -0,0 +1,203 @@ +-- Unified location profile: both scores at (country_code, geoname_id) grain. +-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows). +-- Enriched with dim_cities (city_slug, city_name, exact venue count) and +-- venue_pricing_benchmarks (Playtomic pricing/occupancy). +-- +-- Two scores per location: +-- +-- Padelnomics Market Score (Marktreife-Score v3, 0–100): +-- "How mature/established is this padel market?" +-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL) +-- with padel venues. 0 for all other locations. +-- +-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate +-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise +-- 15 pts addressable market — log-scaled population, ceiling 1M +-- 10 pts economic context — income PPS normalised to 200 ceiling +-- 10 pts data quality — completeness discount +-- +-- Padelnomics Opportunity Score (Marktpotenzial-Score v2, 0–100): +-- "Where should I build a padel court?" +-- Computed for ALL locations — zero-court locations score highest on supply gap. +-- +-- 25 pts addressable market — log-scaled population, ceiling 500K +-- 20 pts economic power — income PPS, normalised to 35,000 +-- 30 pts supply gap — inverted venue density; 0 courts = full marks +-- 15 pts catchment gap — distance to nearest padel court +-- 10 pts sports culture — tennis courts within 25km +-- +-- Consumers query directly with WHERE filters: +-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL +-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0 +-- planner_defaults: WHERE city_slug IS NOT NULL +-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0 + +MODEL ( + name serving.location_profiles, + kind FULL, + cron '@daily', + grain (country_code, geoname_id) +); + +WITH +-- All locations from dim_locations (superset) +base AS ( + SELECT + l.geoname_id, + l.country_code, + l.country_name_en, + l.country_slug, + l.location_name, + l.location_slug, + l.lat, + l.lon, + l.admin1_code, + l.admin2_code, + l.population, + l.population_year, + l.median_income_pps, + l.income_year, + l.padel_venue_count, + l.padel_venues_per_100k, + l.nearest_padel_court_km, + l.tennis_courts_within_25km + FROM foundation.dim_locations l +), +-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count. +-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count). +city_match AS ( + SELECT + c.country_code, + c.geoname_id, + c.city_slug, + c.city_name, + c.padel_venue_count AS city_padel_venue_count + FROM foundation.dim_cities c + WHERE c.geoname_id IS NOT NULL + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY c.country_code, c.geoname_id + ORDER BY c.padel_venue_count DESC + ) = 1 +), +-- Pricing / occupancy from Playtomic (via city_slug) +with_pricing AS ( + SELECT + b.*, + cm.city_slug, + cm.city_name, + cm.city_padel_venue_count, + vpb.median_hourly_rate, + vpb.median_peak_rate, + vpb.median_offpeak_rate, + vpb.median_occupancy_rate, + vpb.median_daily_revenue_per_venue, + vpb.price_currency + FROM base b + LEFT JOIN city_match cm + ON b.country_code = cm.country_code + AND b.geoname_id = cm.geoname_id + LEFT JOIN serving.venue_pricing_benchmarks vpb + ON cm.country_code = vpb.country_code + AND cm.city_slug = vpb.city_slug +), +-- Both scores computed from the enriched base +scored AS ( + SELECT *, + -- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km) + CASE WHEN population > 0 + THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2) + ELSE NULL + END AS city_venues_per_100k, + -- Data confidence (for market_score) + CASE + WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0 + WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5 + ELSE 0.0 + END AS data_confidence, + -- ── Market Score (Marktreife-Score v3) ────────────────────────────────── + -- 0 when no city match or no venues (city_padel_venue_count NULL or 0) + CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN + ROUND( + -- Supply development (40 pts) + 40.0 * LEAST(1.0, LN( + COALESCE( + CASE WHEN population > 0 + THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000 + ELSE 0 END + , 0) + 1) / LN(21)) + * LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0) + -- Demand evidence (25 pts) + + 25.0 * CASE + WHEN median_occupancy_rate IS NOT NULL + THEN LEAST(1.0, median_occupancy_rate / 0.65) + ELSE 0.4 * LEAST(1.0, LN( + COALESCE( + CASE WHEN population > 0 + THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000 + ELSE 0 END + , 0) + 1) / LN(21)) + * LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0) + END + -- Addressable market (15 pts) + + 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000)) + -- Economic context (10 pts) + + 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0) + -- Data quality (10 pts) + + 10.0 * CASE + WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0 + WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5 + ELSE 0.0 + END + , 1) + ELSE 0 + END AS market_score, + -- ── Opportunity Score (Marktpotenzial-Score v2) ──────────────────────── + ROUND( + -- Addressable market (25 pts): ceiling 500K + 25.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000)) + -- Economic power (20 pts): income PPS normalised to 35,000 + + 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0) + -- Supply gap (30 pts): inverted venue density + + 30.0 * GREATEST(0.0, 1.0 - COALESCE(padel_venues_per_100k, 0) / 8.0) + -- Catchment gap (15 pts): distance to nearest court + + 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5) + -- Sports culture (10 pts): tennis courts within 25km + + 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0) + , 1) AS opportunity_score + FROM with_pricing +) +SELECT + s.geoname_id, + s.country_code, + s.country_name_en, + s.country_slug, + s.location_name, + s.location_slug, + s.city_slug, + s.city_name, + s.lat, + s.lon, + s.admin1_code, + s.admin2_code, + s.population, + s.population_year, + s.median_income_pps, + s.income_year, + s.padel_venue_count, + s.padel_venues_per_100k, + s.nearest_padel_court_km, + s.tennis_courts_within_25km, + s.city_padel_venue_count, + s.city_venues_per_100k, + s.data_confidence, + s.market_score, + s.opportunity_score, + s.median_hourly_rate, + s.median_peak_rate, + s.median_offpeak_rate, + s.median_occupancy_rate, + s.median_daily_revenue_per_venue, + s.price_currency, + CURRENT_DATE AS refreshed_date +FROM scored s +ORDER BY s.market_score DESC, s.opportunity_score DESC From 81b556b205d95c4fc84f4659bdccd1b4a61dfbd3 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:39:52 +0100 Subject: [PATCH 2/5] refactor(serving): replace old models with location_profiles Delete city_market_profile.sql and location_opportunity_profile.sql. Update downstream models (planner_defaults, pseo_city_costs_de, pseo_city_pricing) to read from location_profiles instead. Subtask 2/5: delete old models + update downstream SQL. Co-Authored-By: Claude Sonnet 4.6 --- .../models/serving/city_market_profile.sql | 117 ------------------ .../serving/location_opportunity_profile.sql | 86 ------------- .../models/serving/planner_defaults.sql | 7 +- .../models/serving/pseo_city_costs_de.sql | 14 +-- .../models/serving/pseo_city_pricing.sql | 13 +- 5 files changed, 17 insertions(+), 220 deletions(-) delete mode 100644 transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql delete mode 100644 transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql diff --git a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql deleted file mode 100644 index 963ea8a..0000000 --- a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql +++ /dev/null @@ -1,117 +0,0 @@ --- One Big Table: per-city padel market intelligence. --- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints. --- --- Padelnomics Marktreife-Score v3 (0–100): --- Answers "How mature/established is this padel market?" --- Only computed for cities with ≥1 padel venue (padel_venue_count > 0). --- For white-space opportunity scoring, see serving.location_opportunity_profile. --- --- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate --- (min(1, count/5) kills small-town inflation) --- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise --- 15 pts addressable market — log-scaled population, ceiling 1M (context only) --- 10 pts economic context — income PPS normalised to 200 ceiling --- 10 pts data quality — completeness discount --- No saturation discount: high density = maturity, not a penalty - -MODEL ( - name serving.city_market_profile, - kind FULL, - cron '@daily', - grain (country_code, city_slug) -); - -WITH base AS ( - SELECT - c.country_code, - c.country_name_en, - c.country_slug, - c.city_name, - c.city_slug, - c.lat, - c.lon, - c.population, - c.population_year, - c.padel_venue_count, - c.median_income_pps, - c.income_year, - c.geoname_id, - -- Venue density: padel venues per 100K residents - CASE WHEN c.population > 0 - THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2) - ELSE NULL - END AS venues_per_100k, - -- Data confidence: 1.0 if both population and venues are present - CASE - WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0 - WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5 - ELSE 0.0 - END AS data_confidence, - -- Pricing / occupancy from Playtomic (NULL when no availability data) - vpb.median_hourly_rate, - vpb.median_peak_rate, - vpb.median_offpeak_rate, - vpb.median_occupancy_rate, - vpb.median_daily_revenue_per_venue, - vpb.price_currency - FROM foundation.dim_cities c - LEFT JOIN serving.venue_pricing_benchmarks vpb - ON c.country_code = vpb.country_code - AND c.city_slug = vpb.city_slug - WHERE c.padel_venue_count > 0 -), -scored AS ( - SELECT *, - ROUND( - -- Supply development (40 pts): THE maturity signal. - -- Log-scaled density: LN(density+1)/LN(21) → 20/100k ≈ full marks. - -- Count gate: min(1, count/5) — 1 venue=20%, 5+ venues=100%. - -- Kills small-town inflation (1 court / 5k pop = 20/100k) without hard cutoffs. - 40.0 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21)) - * LEAST(1.0, padel_venue_count / 5.0) - -- Demand evidence (25 pts): occupancy when Playtomic data available. - -- Fallback: 40% of density score (avoids double-counting with supply component). - + 25.0 * CASE - WHEN median_occupancy_rate IS NOT NULL - THEN LEAST(1.0, median_occupancy_rate / 0.65) - ELSE 0.4 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21)) - * LEAST(1.0, padel_venue_count / 5.0) - END - -- Addressable market (15 pts): population as context, not maturity signal. - -- LN(1) = 0 so zero-pop cities score 0 here. - + 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000)) - -- Economic context (10 pts): country-level income PPS. - -- Flat per country — kept as context modifier, not primary signal. - + 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0) - -- Data quality (10 pts): completeness discount. - + 10.0 * data_confidence - , 1) - AS market_score - FROM base -) -SELECT - s.country_code, - s.country_name_en, - s.country_slug, - s.city_name, - s.city_slug, - s.lat, - s.lon, - s.population, - s.population_year, - s.padel_venue_count, - s.venues_per_100k, - s.data_confidence, - s.market_score, - s.median_income_pps, - s.income_year, - s.median_hourly_rate, - s.median_peak_rate, - s.median_offpeak_rate, - s.median_occupancy_rate, - s.median_daily_revenue_per_venue, - s.price_currency, - s.geoname_id, - CURRENT_DATE AS refreshed_date -FROM scored s -ORDER BY s.market_score DESC diff --git a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql deleted file mode 100644 index b746cab..0000000 --- a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql +++ /dev/null @@ -1,86 +0,0 @@ --- Per-location padel investment opportunity intelligence. --- Consumed by: Gemeinde-level pSEO pages, opportunity map, "top markets" lists. --- --- Padelnomics Marktpotenzial-Score v2 (0–100): --- Answers "Where should I build a padel court?" --- Covers ALL GeoNames locations (pop ≥ 1K) — NOT filtered to existing padel markets. --- Zero-court locations score highest on supply gap component (white space = opportunity). --- --- 25 pts addressable market — log-scaled population, ceiling 500K --- (opportunity peaks in mid-size cities; megacities already served) --- 20 pts economic power — country income PPS, normalised to 35,000 --- EU PPS values range 18k-37k; /35k gives real spread. --- DE ≈ 13.2pts, ES ≈ 10.7pts, SE ≈ 14.3pts. --- Previously /200 caused all countries to saturate at 20/20. --- 30 pts supply gap — INVERTED venue density; 0 courts/100K = full marks. --- Ceiling raised to 8/100K (was 4) for a gentler gradient --- and to account for ~87% data undercount vs FIP totals. --- Linear: GREATEST(0, 1 - density/8) --- 15 pts catchment gap — distance to nearest padel court. --- DuckDB LEAST ignores NULLs: LEAST(1.0, NULL/30) = 1.0, --- so NULL nearest_km = full marks (no court in bounding box --- = high opportunity). COALESCE fallback is dead code. --- 10 pts sports culture — tennis courts within 25km (≥10 = full marks). --- NOTE: dim_locations tennis data is empty (all 0 rows). --- Component contributes 0 pts everywhere until data lands. - -MODEL ( - name serving.location_opportunity_profile, - kind FULL, - cron '@daily', - grain (country_code, geoname_id) -); - -SELECT - l.geoname_id, - l.country_code, - l.country_name_en, - l.country_slug, - l.location_name, - l.location_slug, - l.lat, - l.lon, - l.admin1_code, - l.admin2_code, - l.population, - l.population_year, - l.median_income_pps, - l.income_year, - l.padel_venue_count, - l.padel_venues_per_100k, - l.nearest_padel_court_km, - l.tennis_courts_within_25km, - ROUND( - -- Addressable market (25 pts): log-scaled to 500K ceiling. - -- Lower ceiling than Marktreife (1M) — opportunity peaks in mid-size cities - -- that can support a court but aren't already saturated by large-city operators. - 25.0 * LEAST(1.0, LN(GREATEST(l.population, 1)) / LN(500000)) - - -- Economic power (20 pts): country-level income PPS normalised to 35,000. - -- Drives willingness-to-pay for court fees (€20-35/hr target range). - -- EU PPS values range 18k-37k; ceiling 35k gives meaningful spread. - -- v1 used /200 which caused LEAST(1.0, 115) = 1.0 for ALL countries (flat, no differentiation). - -- v2: /35000 → DE 0.66×20=13.2pts, ES 0.53×20=10.7pts, SE 0.71×20=14.3pts. - -- Default 15000 for missing data = reasonable developing-market assumption (~0.43). - + 20.0 * LEAST(1.0, COALESCE(l.median_income_pps, 15000) / 35000.0) - - -- Supply gap (30 pts): INVERTED venue density. - -- 0 courts/100K = full 30 pts (white space); ≥8/100K = 0 pts (served market). - -- Ceiling raised from 4→8/100K for a gentler gradient and to account for data - -- undercount (~87% of real courts not in our data). - -- This is the key signal that separates Marktpotenzial from Marktreife. - + 30.0 * GREATEST(0.0, 1.0 - COALESCE(l.padel_venues_per_100k, 0) / 8.0) - - -- Catchment gap (15 pts): distance to nearest existing padel court. - -- >30km = full 15 pts (underserved catchment area). - -- NULL = no courts found anywhere (rare edge case) → neutral 0.5. - + 15.0 * COALESCE(LEAST(1.0, l.nearest_padel_court_km / 30.0), 0.5) - - -- Sports culture proxy (10 pts): tennis courts within 25km. - -- ≥10 courts = full 10 pts (proven racket sport market = faster padel adoption). - -- 0 courts = 0 pts. Many new padel courts open inside existing tennis clubs. - + 10.0 * LEAST(1.0, l.tennis_courts_within_25km / 10.0) - , 1) AS opportunity_score, - CURRENT_DATE AS refreshed_date -FROM foundation.dim_locations l -ORDER BY opportunity_score DESC diff --git a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql index eb0fcb3..29c27cd 100644 --- a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql +++ b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql @@ -76,11 +76,12 @@ city_profiles AS ( city_slug, country_code, city_name, - padel_venue_count, + city_padel_venue_count AS padel_venue_count, population, market_score, - venues_per_100k - FROM serving.city_market_profile + city_venues_per_100k AS venues_per_100k + FROM serving.location_profiles + WHERE city_slug IS NOT NULL ) SELECT cp.city_slug, diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql index 69db995..c71cdf6 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql @@ -31,10 +31,10 @@ SELECT c.lon, -- Market metrics c.population, - c.padel_venue_count, - c.venues_per_100k, + c.city_padel_venue_count AS padel_venue_count, + c.city_venues_per_100k AS venues_per_100k, c.market_score, - lop.opportunity_score, + c.opportunity_score, c.data_confidence, -- Pricing (from Playtomic, NULL when no coverage) c.median_hourly_rate, @@ -85,15 +85,13 @@ SELECT cc.working_capital AS "workingCapital", cc.permits_compliance AS "permitsCompliance", CURRENT_DATE AS refreshed_date -FROM serving.city_market_profile c +FROM serving.location_profiles c LEFT JOIN serving.planner_defaults p ON c.country_code = p.country_code AND c.city_slug = p.city_slug -LEFT JOIN serving.location_opportunity_profile lop - ON c.country_code = lop.country_code - AND c.geoname_id = lop.geoname_id LEFT JOIN foundation.dim_countries cc ON c.country_code = cc.country_code -- Only cities with actual padel presence and at least some rate data -WHERE c.padel_venue_count > 0 +WHERE c.city_slug IS NOT NULL + AND c.city_padel_venue_count > 0 AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql index ad306c1..aee7c2c 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql @@ -1,6 +1,6 @@ -- pSEO article data: per-city padel court pricing. -- One row per city — consumed by the city-pricing.md.jinja template. --- Joins venue_pricing_benchmarks (real Playtomic data) with city_market_profile +-- Joins venue_pricing_benchmarks (real Playtomic data) with location_profiles -- (population, venue count, country metadata). -- -- Stricter filter than pseo_city_costs_de: requires >= 2 venues with real @@ -16,7 +16,7 @@ MODEL ( SELECT -- Composite natural key: country_slug + city_slug ensures uniqueness across countries c.country_slug || '-' || c.city_slug AS city_key, - -- City identity (from city_market_profile, which has the canonical city_slug) + -- City identity (from location_profiles, which has the canonical city_slug) c.city_slug, c.city_name, c.country_code, @@ -24,8 +24,8 @@ SELECT c.country_slug, -- Market context c.population, - c.padel_venue_count, - c.venues_per_100k, + c.city_padel_venue_count AS padel_venue_count, + c.city_venues_per_100k AS venues_per_100k, c.market_score, -- Pricing benchmarks (from Playtomic availability data) vpb.median_hourly_rate, @@ -38,9 +38,10 @@ SELECT vpb.price_currency, CURRENT_DATE AS refreshed_date FROM serving.venue_pricing_benchmarks vpb --- Join city_market_profile to get the canonical city_slug and country metadata -INNER JOIN serving.city_market_profile c +-- Join location_profiles to get canonical city metadata +INNER JOIN serving.location_profiles c ON vpb.country_code = c.country_code AND vpb.city_slug = c.city_slug + AND c.city_slug IS NOT NULL -- Only cities with enough venues for meaningful pricing statistics WHERE vpb.venue_count >= 2 From 688f2dd1eeb0d3e9c7a4b53d3a56703fdb378dd1 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:41:42 +0100 Subject: [PATCH 3/5] refactor(web): update all references to location_profiles Update api.py (3 endpoints), public/routes.py, analytics.py docstring, pipeline_routes.py DAG, pipeline_query.html placeholder, and test_pipeline.py fixtures to use the new unified model. Subtask 3/5: web app references. Co-Authored-By: Claude Sonnet 4.6 --- web/src/padelnomics/admin/pipeline_routes.py | 9 +++--- .../admin/partials/pipeline_query.html | 2 +- web/src/padelnomics/analytics.py | 2 +- web/src/padelnomics/api.py | 22 ++++++++------ web/src/padelnomics/public/routes.py | 3 +- web/tests/test_pipeline.py | 30 +++++++++---------- 6 files changed, 36 insertions(+), 32 deletions(-) diff --git a/web/src/padelnomics/admin/pipeline_routes.py b/web/src/padelnomics/admin/pipeline_routes.py index f477e85..3189da0 100644 --- a/web/src/padelnomics/admin/pipeline_routes.py +++ b/web/src/padelnomics/admin/pipeline_routes.py @@ -111,13 +111,12 @@ _DAG: dict[str, list[str]] = { "fct_daily_availability": ["fct_availability_slot", "dim_venue_capacity"], # Serving "venue_pricing_benchmarks": ["fct_daily_availability"], - "city_market_profile": ["dim_cities", "venue_pricing_benchmarks"], - "planner_defaults": ["venue_pricing_benchmarks", "city_market_profile"], - "location_opportunity_profile": ["dim_locations"], + "location_profiles": ["dim_locations", "dim_cities", "venue_pricing_benchmarks"], + "planner_defaults": ["venue_pricing_benchmarks", "location_profiles"], "pseo_city_costs_de": [ - "city_market_profile", "planner_defaults", "location_opportunity_profile", + "location_profiles", "planner_defaults", ], - "pseo_city_pricing": ["venue_pricing_benchmarks", "city_market_profile"], + "pseo_city_pricing": ["venue_pricing_benchmarks", "location_profiles"], "pseo_country_overview": ["pseo_city_costs_de"], } diff --git a/web/src/padelnomics/admin/templates/admin/partials/pipeline_query.html b/web/src/padelnomics/admin/templates/admin/partials/pipeline_query.html index 212b423..1a8087d 100644 --- a/web/src/padelnomics/admin/templates/admin/partials/pipeline_query.html +++ b/web/src/padelnomics/admin/templates/admin/partials/pipeline_query.html @@ -171,7 +171,7 @@ autocomplete="off" autocorrect="off" autocapitalize="off" - placeholder="-- SELECT * FROM serving.city_market_profile -- WHERE country_code = 'DE' -- ORDER BY marktreife_score DESC -- LIMIT 20" + placeholder="-- SELECT * FROM serving.location_profiles -- WHERE country_code = 'DE' AND city_slug IS NOT NULL -- ORDER BY market_score DESC -- LIMIT 20" >
diff --git a/web/src/padelnomics/analytics.py b/web/src/padelnomics/analytics.py index b00c955..9333f57 100644 --- a/web/src/padelnomics/analytics.py +++ b/web/src/padelnomics/analytics.py @@ -13,7 +13,7 @@ Usage: rows = await fetch_analytics("SELECT * FROM serving.planner_defaults WHERE city_slug = ?", ["berlin"]) - cols, rows, error, elapsed_ms = await execute_user_query("SELECT city_slug FROM serving.city_market_profile LIMIT 5") + cols, rows, error, elapsed_ms = await execute_user_query("SELECT city_slug FROM serving.location_profiles LIMIT 5") """ import asyncio import logging diff --git a/web/src/padelnomics/api.py b/web/src/padelnomics/api.py index 8db5bae..5ba4fb9 100644 --- a/web/src/padelnomics/api.py +++ b/web/src/padelnomics/api.py @@ -32,12 +32,14 @@ async def countries(): rows = await fetch_analytics(""" SELECT country_code, country_name_en, country_slug, COUNT(*) AS city_count, - SUM(padel_venue_count) AS total_venues, + SUM(city_padel_venue_count) AS total_venues, ROUND(AVG(market_score), 1) AS avg_market_score, + ROUND(AVG(opportunity_score), 1) AS avg_opportunity_score, AVG(lat) AS lat, AVG(lon) AS lon - FROM serving.city_market_profile + FROM serving.location_profiles + WHERE city_slug IS NOT NULL GROUP BY country_code, country_name_en, country_slug - HAVING SUM(padel_venue_count) > 0 + HAVING SUM(city_padel_venue_count) > 0 ORDER BY total_venues DESC """) return jsonify(rows), 200, _CACHE_HEADERS @@ -51,10 +53,11 @@ async def country_cities(country_slug: str): rows = await fetch_analytics( """ SELECT city_name, city_slug, lat, lon, - padel_venue_count, market_score, population - FROM serving.city_market_profile - WHERE country_slug = ? - ORDER BY padel_venue_count DESC + city_padel_venue_count AS padel_venue_count, + market_score, opportunity_score, population + FROM serving.location_profiles + WHERE country_slug = ? AND city_slug IS NOT NULL + ORDER BY city_padel_venue_count DESC LIMIT 200 """, [country_slug], @@ -102,9 +105,10 @@ async def opportunity(country_slug: str): rows = await fetch_analytics( """ SELECT location_name, location_slug, lat, lon, - opportunity_score, nearest_padel_court_km, + opportunity_score, market_score, + nearest_padel_court_km, padel_venue_count, population - FROM serving.location_opportunity_profile + FROM serving.location_profiles WHERE country_slug = ? AND opportunity_score > 0 ORDER BY opportunity_score DESC LIMIT 500 diff --git a/web/src/padelnomics/public/routes.py b/web/src/padelnomics/public/routes.py index 018bf74..1a8ba03 100644 --- a/web/src/padelnomics/public/routes.py +++ b/web/src/padelnomics/public/routes.py @@ -80,7 +80,8 @@ async def opportunity_map(): abort(404) countries = await fetch_analytics(""" SELECT DISTINCT country_slug, country_name_en - FROM serving.city_market_profile + FROM serving.location_profiles + WHERE city_slug IS NOT NULL ORDER BY country_name_en """) return await render_template("opportunity_map.html", countries=countries) diff --git a/web/tests/test_pipeline.py b/web/tests/test_pipeline.py index bdd4b96..8f4ffec 100644 --- a/web/tests/test_pipeline.py +++ b/web/tests/test_pipeline.py @@ -64,7 +64,7 @@ def serving_meta_dir(): meta = { "exported_at_utc": "2026-02-25T08:30:00+00:00", "tables": { - "city_market_profile": {"row_count": 612}, + "location_profiles": {"row_count": 612}, "planner_defaults": {"row_count": 612}, "pseo_city_costs_de": {"row_count": 487}, }, @@ -78,16 +78,16 @@ def serving_meta_dir(): # ── Schema + query mocks ────────────────────────────────────────────────────── _MOCK_SCHEMA_ROWS = [ - {"table_name": "city_market_profile", "column_name": "city_slug", "data_type": "VARCHAR", "ordinal_position": 1}, - {"table_name": "city_market_profile", "column_name": "country_code", "data_type": "VARCHAR", "ordinal_position": 2}, - {"table_name": "city_market_profile", "column_name": "marktreife_score", "data_type": "DOUBLE", "ordinal_position": 3}, + {"table_name": "location_profiles", "column_name": "city_slug", "data_type": "VARCHAR", "ordinal_position": 1}, + {"table_name": "location_profiles", "column_name": "country_code", "data_type": "VARCHAR", "ordinal_position": 2}, + {"table_name": "location_profiles", "column_name": "market_score", "data_type": "DOUBLE", "ordinal_position": 3}, {"table_name": "planner_defaults", "column_name": "city_slug", "data_type": "VARCHAR", "ordinal_position": 1}, ] _MOCK_TABLE_EXISTS = [{"1": 1}] _MOCK_SAMPLE_ROWS = [ - {"city_slug": "berlin", "country_code": "DE", "marktreife_score": 82.5}, - {"city_slug": "munich", "country_code": "DE", "marktreife_score": 77.0}, + {"city_slug": "berlin", "country_code": "DE", "market_score": 82.5}, + {"city_slug": "munich", "country_code": "DE", "market_score": 77.0}, ] @@ -100,7 +100,7 @@ def _make_fetch_analytics_mock(schema=True): return [r for r in _MOCK_SCHEMA_ROWS if r["table_name"] == params[0]] if "information_schema.columns" in sql: return _MOCK_SCHEMA_ROWS - if "city_market_profile" in sql: + if "location_profiles" in sql: return _MOCK_SAMPLE_ROWS return [] return _mock @@ -162,7 +162,7 @@ async def test_pipeline_overview(admin_client, state_db_dir, serving_meta_dir): resp = await admin_client.get("/admin/pipeline/overview") assert resp.status_code == 200 data = await resp.get_data(as_text=True) - assert "city_market_profile" in data + assert "location_profiles" in data assert "612" in data # row count from serving meta @@ -314,7 +314,7 @@ async def test_pipeline_catalog(admin_client, serving_meta_dir): resp = await admin_client.get("/admin/pipeline/catalog") assert resp.status_code == 200 data = await resp.get_data(as_text=True) - assert "city_market_profile" in data + assert "location_profiles" in data assert "612" in data # row count from serving meta @@ -322,7 +322,7 @@ async def test_pipeline_catalog(admin_client, serving_meta_dir): async def test_pipeline_table_detail(admin_client): """Table detail returns columns and sample rows.""" with patch("padelnomics.analytics.fetch_analytics", side_effect=_make_fetch_analytics_mock()): - resp = await admin_client.get("/admin/pipeline/catalog/city_market_profile") + resp = await admin_client.get("/admin/pipeline/catalog/location_profiles") assert resp.status_code == 200 data = await resp.get_data(as_text=True) assert "city_slug" in data @@ -362,7 +362,7 @@ async def test_pipeline_query_editor_loads(admin_client): data = await resp.get_data(as_text=True) assert "query-editor" in data assert "schema-panel" in data - assert "city_market_profile" in data + assert "location_profiles" in data @pytest.mark.asyncio @@ -380,7 +380,7 @@ async def test_pipeline_query_execute_valid(admin_client): with patch("padelnomics.analytics.execute_user_query", new_callable=AsyncMock, return_value=mock_result): resp = await admin_client.post( "/admin/pipeline/query/execute", - form={"csrf_token": "test", "sql": "SELECT city_slug, country_code FROM serving.city_market_profile"}, + form={"csrf_token": "test", "sql": "SELECT city_slug, country_code FROM serving.location_profiles"}, ) assert resp.status_code == 200 data = await resp.get_data(as_text=True) @@ -397,7 +397,7 @@ async def test_pipeline_query_execute_blocked_keyword(admin_client): with patch("padelnomics.analytics.execute_user_query", new_callable=AsyncMock) as mock_q: resp = await admin_client.post( "/admin/pipeline/query/execute", - form={"csrf_token": "test", "sql": "DROP TABLE serving.city_market_profile"}, + form={"csrf_token": "test", "sql": "DROP TABLE serving.location_profiles"}, ) assert resp.status_code == 200 data = await resp.get_data(as_text=True) @@ -532,8 +532,8 @@ def test_load_serving_meta(serving_meta_dir): with patch.object(pipeline_mod, "_SERVING_DUCKDB_PATH", str(Path(serving_meta_dir) / "analytics.duckdb")): meta = pipeline_mod._load_serving_meta() assert meta is not None - assert "city_market_profile" in meta["tables"] - assert meta["tables"]["city_market_profile"]["row_count"] == 612 + assert "location_profiles" in meta["tables"] + assert meta["tables"]["location_profiles"]["row_count"] == 612 def test_load_serving_meta_missing(): From 8b794d24a6eab6dea54364513e982f6224f94dc5 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:42:36 +0100 Subject: [PATCH 4/5] feat(maps): show both scores in all map tooltips Country map: avg Market Score + avg Opportunity Score. City map: Market Score + Opportunity Score per city. Opportunity map: Opportunity Score + Market Score per location. Subtask 4/5: tooltip updates. Co-Authored-By: Claude Sonnet 4.6 --- web/src/padelnomics/content/templates/markets.html | 4 +++- web/src/padelnomics/public/templates/opportunity_map.html | 2 ++ web/src/padelnomics/static/js/article-maps.js | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/web/src/padelnomics/content/templates/markets.html b/web/src/padelnomics/content/templates/markets.html index 84b109a..e0901a2 100644 --- a/web/src/padelnomics/content/templates/markets.html +++ b/web/src/padelnomics/content/templates/markets.html @@ -102,9 +102,11 @@ if (!c.lat || !c.lon) return; var size = 12 + 44 * Math.sqrt(c.total_venues / maxV); var color = scoreColor(c.avg_market_score); + var oppColor = c.avg_opportunity_score >= 60 ? '#16A34A' : (c.avg_opportunity_score >= 30 ? '#D97706' : '#3B82F6'); var tip = '' + c.country_name_en + '
' + c.total_venues + ' venues · ' + c.city_count + ' cities
' - + 'Padelnomics Market Score: ' + c.avg_market_score + '/100'; + + 'Padelnomics Market Score: ' + c.avg_market_score + '/100
' + + 'Padelnomics Opportunity Score: ' + (c.avg_opportunity_score || 0) + '/100'; L.marker([c.lat, c.lon], { icon: makeIcon(size, color) }) .bindTooltip(tip, { className: 'map-tooltip', direction: 'top', offset: [0, -Math.round(size / 2)] }) .on('click', function() { window.location = '/' + lang + '/markets/' + c.country_slug; }) diff --git a/web/src/padelnomics/public/templates/opportunity_map.html b/web/src/padelnomics/public/templates/opportunity_map.html index 9eca701..0ddf190 100644 --- a/web/src/padelnomics/public/templates/opportunity_map.html +++ b/web/src/padelnomics/public/templates/opportunity_map.html @@ -104,8 +104,10 @@ var dist = loc.nearest_padel_court_km != null ? loc.nearest_padel_court_km.toFixed(1) + ' km to nearest court' : 'No nearby courts'; + var mktColor = loc.market_score >= 60 ? '#16A34A' : (loc.market_score >= 30 ? '#D97706' : '#DC2626'); var tip = '' + loc.location_name + '
' + 'Padelnomics Opportunity Score: ' + loc.opportunity_score + '/100
' + + 'Padelnomics Market Score: ' + (loc.market_score || 0) + '/100
' + dist + ' · Pop. ' + fmtPop(loc.population); L.marker([loc.lat, loc.lon], { icon: makeIcon(size, color) }) .bindTooltip(tip, { className: 'map-tooltip', direction: 'top', offset: [0, -Math.round(size / 2)] }) diff --git a/web/src/padelnomics/static/js/article-maps.js b/web/src/padelnomics/static/js/article-maps.js index 975663c..d90cf8f 100644 --- a/web/src/padelnomics/static/js/article-maps.js +++ b/web/src/padelnomics/static/js/article-maps.js @@ -49,10 +49,12 @@ var pop = c.population >= 1000000 ? (c.population / 1000000).toFixed(1) + 'M' : (c.population >= 1000 ? Math.round(c.population / 1000) + 'K' : (c.population || '')); + var oppColor = c.opportunity_score >= 60 ? '#16A34A' : (c.opportunity_score >= 30 ? '#D97706' : '#3B82F6'); var tip = '' + c.city_name + '
' + (c.padel_venue_count || 0) + ' venues' + (pop ? ' · ' + pop : '') - + '
Padelnomics Market Score: ' + Math.round(c.market_score) + '/100'; + + '
Padelnomics Market Score: ' + Math.round(c.market_score) + '/100' + + '
Padelnomics Opportunity Score: ' + Math.round(c.opportunity_score || 0) + '/100'; if (hasArticle) { tip += '
Click to explore →'; } else { From a3b4e1fab6e3f76ba80572c711cd6690f5e885b6 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 11:45:08 +0100 Subject: [PATCH 5/5] docs: update CHANGELOG, CLAUDE.md, and comments for location_profiles Update transform CLAUDE.md source integration map and conformed dimensions table. Update CHANGELOG with unified model + tooltip changes. Fix stale comments in dim_cities.sql and serving README. Subtask 5/5: documentation. Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 5 +++++ transform/sqlmesh_padelnomics/CLAUDE.md | 20 +++++++++---------- .../models/foundation/dim_cities.sql | 4 ++-- .../models/serving/README.md | 2 +- 4 files changed, 18 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f368e0b..446c51b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Changed +- **Unified `location_profiles` serving model** — merged `city_market_profile` and `location_opportunity_profile` into a single `serving.location_profiles` table at `(country_code, geoname_id)` grain. Both Marktreife-Score (Market Score) and Marktpotenzial-Score (Opportunity Score) are now computed per location. City data enriched via LEFT JOIN `dim_cities` on `geoname_id`. Downstream models (`planner_defaults`, `pseo_city_costs_de`, `pseo_city_pricing`) updated to query `location_profiles` directly. `city_padel_venue_count` (exact from dim_cities) distinguished from `padel_venue_count` (spatial 5km from dim_locations). +- **Both scores on all map tooltips** — country map shows avg Market Score + avg Opportunity Score; city map shows Market Score + Opportunity Score per city; opportunity map shows Opportunity Score + Market Score per location. All score labels use the trademarked "Padelnomics Market Score" / "Padelnomics Opportunity Score" names. +- **API endpoints** — `/api/markets/countries.json` adds `avg_opportunity_score`; `/api/markets//cities.json` adds `opportunity_score`; `/api/opportunity/.json` adds `market_score`. + ### Added - **Custom 404/500 error pages** — styled error pages extending `base.html` with i18n support (EN/DE). The 404 page is context-aware: when the URL matches `/markets/{country}/{city}`, it shows a city-specific message with a link back to the country overview instead of a generic "page not found". - **Map: city article indicators** — country overview map bubbles now differentiate cities with/without published articles. All cities retain score-based colors (green/amber/red); non-article cities are visually receded with lower opacity, dashed borders, desaturated color, and default cursor (no click). Tooltips show scores for all cities — article cities get "Click to explore →", non-article cities get "Coming soon". The `/api/markets//cities.json` endpoint includes a `has_article` boolean per city. diff --git a/transform/sqlmesh_padelnomics/CLAUDE.md b/transform/sqlmesh_padelnomics/CLAUDE.md index 2686693..296d0e5 100644 --- a/transform/sqlmesh_padelnomics/CLAUDE.md +++ b/transform/sqlmesh_padelnomics/CLAUDE.md @@ -56,27 +56,27 @@ Grain must match reality — use `QUALIFY ROW_NUMBER()` to enforce it. |-----------|-------|---------| | `foundation.dim_countries` | `country_code` | `dim_cities`, `dim_locations`, `pseo_city_costs_de`, `planner_defaults` — single source for country names, income, PLI/cost overrides | | `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) | -| `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models | -| `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations | +| `foundation.dim_cities` | `(country_code, city_slug)` | `serving.location_profiles` (city_slug + city_padel_venue_count) → all pSEO serving models | +| `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_profiles` — all GeoNames locations (pop ≥1K), incl. zero-court locations | | `foundation.dim_venue_capacity` | `tenant_id` | `foundation.fct_daily_availability` | ## Source integration map ``` stg_playtomic_venues ─┐ -stg_playtomic_resources─┤→ dim_venues ─┬→ dim_cities ──────────────→ city_market_profile -stg_padel_courts ─┘ └→ dim_venue_capacity (Marktreife-Score) - ↓ +stg_playtomic_resources─┤→ dim_venues ─┬→ dim_cities ──┐ +stg_padel_courts ─┘ └→ dim_venue_capacity + │ stg_playtomic_availability ──→ fct_availability_slot ──→ fct_daily_availability ↓ venue_pricing_benchmarks ↓ stg_population ──→ dim_cities ─────────────────────────────┘ -stg_income ──→ dim_cities - -stg_population_geonames ─┐ -stg_padel_courts ─┤→ dim_locations ──→ location_opportunity_profile -stg_tennis_courts ─┤ (Marktpotenzial-Score) +stg_income ──→ dim_cities │ + ↓ +stg_population_geonames ─┐ location_profiles +stg_padel_courts ─┤→ dim_locations ────────→ (both scores: +stg_tennis_courts ─┤ Marktreife + Marktpotenzial) stg_income ─┘ ``` diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql index f25aa60..3595aba 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql @@ -2,7 +2,7 @@ -- Built from venue locations (dim_venues) as the primary source — padelnomics -- tracks cities where padel venues actually exist, not an administrative city list. -- --- Conformed dimension: used by city_market_profile and all pSEO serving models. +-- Conformed dimension: used by location_profiles and all pSEO serving models. -- Integrates four sources: -- dim_venues → city list, venue count, coordinates (Playtomic + OSM) -- foundation.dim_countries → country_name_en, country_slug, median_income_pps @@ -128,7 +128,7 @@ SELECT vc.padel_venue_count, c.median_income_pps, c.income_year, - -- GeoNames ID: FK to dim_locations / location_opportunity_profile. + -- GeoNames ID: FK to dim_locations / location_profiles. -- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.) COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id FROM venue_cities vc diff --git a/transform/sqlmesh_padelnomics/models/serving/README.md b/transform/sqlmesh_padelnomics/models/serving/README.md index 73cc013..8192f8f 100644 --- a/transform/sqlmesh_padelnomics/models/serving/README.md +++ b/transform/sqlmesh_padelnomics/models/serving/README.md @@ -3,4 +3,4 @@ Analytics-ready views consumed by the web app and programmatic SEO. Query these from `analytics.py` via DuckDB read-only connection. -Naming convention: `serving.` (e.g. `serving.city_market_profile`) +Naming convention: `serving.` (e.g. `serving.location_profiles`)