diff --git a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql deleted file mode 100644 index 963ea8a..0000000 --- a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql +++ /dev/null @@ -1,117 +0,0 @@ --- One Big Table: per-city padel market intelligence. --- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints. --- --- Padelnomics Marktreife-Score v3 (0–100): --- Answers "How mature/established is this padel market?" --- Only computed for cities with ≥1 padel venue (padel_venue_count > 0). --- For white-space opportunity scoring, see serving.location_opportunity_profile. --- --- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate --- (min(1, count/5) kills small-town inflation) --- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise --- 15 pts addressable market — log-scaled population, ceiling 1M (context only) --- 10 pts economic context — income PPS normalised to 200 ceiling --- 10 pts data quality — completeness discount --- No saturation discount: high density = maturity, not a penalty - -MODEL ( - name serving.city_market_profile, - kind FULL, - cron '@daily', - grain (country_code, city_slug) -); - -WITH base AS ( - SELECT - c.country_code, - c.country_name_en, - c.country_slug, - c.city_name, - c.city_slug, - c.lat, - c.lon, - c.population, - c.population_year, - c.padel_venue_count, - c.median_income_pps, - c.income_year, - c.geoname_id, - -- Venue density: padel venues per 100K residents - CASE WHEN c.population > 0 - THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2) - ELSE NULL - END AS venues_per_100k, - -- Data confidence: 1.0 if both population and venues are present - CASE - WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0 - WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5 - ELSE 0.0 - END AS data_confidence, - -- Pricing / occupancy from Playtomic (NULL when no availability data) - vpb.median_hourly_rate, - vpb.median_peak_rate, - vpb.median_offpeak_rate, - vpb.median_occupancy_rate, - vpb.median_daily_revenue_per_venue, - vpb.price_currency - FROM foundation.dim_cities c - LEFT JOIN serving.venue_pricing_benchmarks vpb - ON c.country_code = vpb.country_code - AND c.city_slug = vpb.city_slug - WHERE c.padel_venue_count > 0 -), -scored AS ( - SELECT *, - ROUND( - -- Supply development (40 pts): THE maturity signal. - -- Log-scaled density: LN(density+1)/LN(21) → 20/100k ≈ full marks. - -- Count gate: min(1, count/5) — 1 venue=20%, 5+ venues=100%. - -- Kills small-town inflation (1 court / 5k pop = 20/100k) without hard cutoffs. - 40.0 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21)) - * LEAST(1.0, padel_venue_count / 5.0) - -- Demand evidence (25 pts): occupancy when Playtomic data available. - -- Fallback: 40% of density score (avoids double-counting with supply component). - + 25.0 * CASE - WHEN median_occupancy_rate IS NOT NULL - THEN LEAST(1.0, median_occupancy_rate / 0.65) - ELSE 0.4 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21)) - * LEAST(1.0, padel_venue_count / 5.0) - END - -- Addressable market (15 pts): population as context, not maturity signal. - -- LN(1) = 0 so zero-pop cities score 0 here. - + 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000)) - -- Economic context (10 pts): country-level income PPS. - -- Flat per country — kept as context modifier, not primary signal. - + 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0) - -- Data quality (10 pts): completeness discount. - + 10.0 * data_confidence - , 1) - AS market_score - FROM base -) -SELECT - s.country_code, - s.country_name_en, - s.country_slug, - s.city_name, - s.city_slug, - s.lat, - s.lon, - s.population, - s.population_year, - s.padel_venue_count, - s.venues_per_100k, - s.data_confidence, - s.market_score, - s.median_income_pps, - s.income_year, - s.median_hourly_rate, - s.median_peak_rate, - s.median_offpeak_rate, - s.median_occupancy_rate, - s.median_daily_revenue_per_venue, - s.price_currency, - s.geoname_id, - CURRENT_DATE AS refreshed_date -FROM scored s -ORDER BY s.market_score DESC diff --git a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql deleted file mode 100644 index b746cab..0000000 --- a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql +++ /dev/null @@ -1,86 +0,0 @@ --- Per-location padel investment opportunity intelligence. --- Consumed by: Gemeinde-level pSEO pages, opportunity map, "top markets" lists. --- --- Padelnomics Marktpotenzial-Score v2 (0–100): --- Answers "Where should I build a padel court?" --- Covers ALL GeoNames locations (pop ≥ 1K) — NOT filtered to existing padel markets. --- Zero-court locations score highest on supply gap component (white space = opportunity). --- --- 25 pts addressable market — log-scaled population, ceiling 500K --- (opportunity peaks in mid-size cities; megacities already served) --- 20 pts economic power — country income PPS, normalised to 35,000 --- EU PPS values range 18k-37k; /35k gives real spread. --- DE ≈ 13.2pts, ES ≈ 10.7pts, SE ≈ 14.3pts. --- Previously /200 caused all countries to saturate at 20/20. --- 30 pts supply gap — INVERTED venue density; 0 courts/100K = full marks. --- Ceiling raised to 8/100K (was 4) for a gentler gradient --- and to account for ~87% data undercount vs FIP totals. --- Linear: GREATEST(0, 1 - density/8) --- 15 pts catchment gap — distance to nearest padel court. --- DuckDB LEAST ignores NULLs: LEAST(1.0, NULL/30) = 1.0, --- so NULL nearest_km = full marks (no court in bounding box --- = high opportunity). COALESCE fallback is dead code. --- 10 pts sports culture — tennis courts within 25km (≥10 = full marks). --- NOTE: dim_locations tennis data is empty (all 0 rows). --- Component contributes 0 pts everywhere until data lands. - -MODEL ( - name serving.location_opportunity_profile, - kind FULL, - cron '@daily', - grain (country_code, geoname_id) -); - -SELECT - l.geoname_id, - l.country_code, - l.country_name_en, - l.country_slug, - l.location_name, - l.location_slug, - l.lat, - l.lon, - l.admin1_code, - l.admin2_code, - l.population, - l.population_year, - l.median_income_pps, - l.income_year, - l.padel_venue_count, - l.padel_venues_per_100k, - l.nearest_padel_court_km, - l.tennis_courts_within_25km, - ROUND( - -- Addressable market (25 pts): log-scaled to 500K ceiling. - -- Lower ceiling than Marktreife (1M) — opportunity peaks in mid-size cities - -- that can support a court but aren't already saturated by large-city operators. - 25.0 * LEAST(1.0, LN(GREATEST(l.population, 1)) / LN(500000)) - - -- Economic power (20 pts): country-level income PPS normalised to 35,000. - -- Drives willingness-to-pay for court fees (€20-35/hr target range). - -- EU PPS values range 18k-37k; ceiling 35k gives meaningful spread. - -- v1 used /200 which caused LEAST(1.0, 115) = 1.0 for ALL countries (flat, no differentiation). - -- v2: /35000 → DE 0.66×20=13.2pts, ES 0.53×20=10.7pts, SE 0.71×20=14.3pts. - -- Default 15000 for missing data = reasonable developing-market assumption (~0.43). - + 20.0 * LEAST(1.0, COALESCE(l.median_income_pps, 15000) / 35000.0) - - -- Supply gap (30 pts): INVERTED venue density. - -- 0 courts/100K = full 30 pts (white space); ≥8/100K = 0 pts (served market). - -- Ceiling raised from 4→8/100K for a gentler gradient and to account for data - -- undercount (~87% of real courts not in our data). - -- This is the key signal that separates Marktpotenzial from Marktreife. - + 30.0 * GREATEST(0.0, 1.0 - COALESCE(l.padel_venues_per_100k, 0) / 8.0) - - -- Catchment gap (15 pts): distance to nearest existing padel court. - -- >30km = full 15 pts (underserved catchment area). - -- NULL = no courts found anywhere (rare edge case) → neutral 0.5. - + 15.0 * COALESCE(LEAST(1.0, l.nearest_padel_court_km / 30.0), 0.5) - - -- Sports culture proxy (10 pts): tennis courts within 25km. - -- ≥10 courts = full 10 pts (proven racket sport market = faster padel adoption). - -- 0 courts = 0 pts. Many new padel courts open inside existing tennis clubs. - + 10.0 * LEAST(1.0, l.tennis_courts_within_25km / 10.0) - , 1) AS opportunity_score, - CURRENT_DATE AS refreshed_date -FROM foundation.dim_locations l -ORDER BY opportunity_score DESC diff --git a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql index eb0fcb3..29c27cd 100644 --- a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql +++ b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql @@ -76,11 +76,12 @@ city_profiles AS ( city_slug, country_code, city_name, - padel_venue_count, + city_padel_venue_count AS padel_venue_count, population, market_score, - venues_per_100k - FROM serving.city_market_profile + city_venues_per_100k AS venues_per_100k + FROM serving.location_profiles + WHERE city_slug IS NOT NULL ) SELECT cp.city_slug, diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql index 69db995..c71cdf6 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql @@ -31,10 +31,10 @@ SELECT c.lon, -- Market metrics c.population, - c.padel_venue_count, - c.venues_per_100k, + c.city_padel_venue_count AS padel_venue_count, + c.city_venues_per_100k AS venues_per_100k, c.market_score, - lop.opportunity_score, + c.opportunity_score, c.data_confidence, -- Pricing (from Playtomic, NULL when no coverage) c.median_hourly_rate, @@ -85,15 +85,13 @@ SELECT cc.working_capital AS "workingCapital", cc.permits_compliance AS "permitsCompliance", CURRENT_DATE AS refreshed_date -FROM serving.city_market_profile c +FROM serving.location_profiles c LEFT JOIN serving.planner_defaults p ON c.country_code = p.country_code AND c.city_slug = p.city_slug -LEFT JOIN serving.location_opportunity_profile lop - ON c.country_code = lop.country_code - AND c.geoname_id = lop.geoname_id LEFT JOIN foundation.dim_countries cc ON c.country_code = cc.country_code -- Only cities with actual padel presence and at least some rate data -WHERE c.padel_venue_count > 0 +WHERE c.city_slug IS NOT NULL + AND c.city_padel_venue_count > 0 AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql index ad306c1..aee7c2c 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql @@ -1,6 +1,6 @@ -- pSEO article data: per-city padel court pricing. -- One row per city — consumed by the city-pricing.md.jinja template. --- Joins venue_pricing_benchmarks (real Playtomic data) with city_market_profile +-- Joins venue_pricing_benchmarks (real Playtomic data) with location_profiles -- (population, venue count, country metadata). -- -- Stricter filter than pseo_city_costs_de: requires >= 2 venues with real @@ -16,7 +16,7 @@ MODEL ( SELECT -- Composite natural key: country_slug + city_slug ensures uniqueness across countries c.country_slug || '-' || c.city_slug AS city_key, - -- City identity (from city_market_profile, which has the canonical city_slug) + -- City identity (from location_profiles, which has the canonical city_slug) c.city_slug, c.city_name, c.country_code, @@ -24,8 +24,8 @@ SELECT c.country_slug, -- Market context c.population, - c.padel_venue_count, - c.venues_per_100k, + c.city_padel_venue_count AS padel_venue_count, + c.city_venues_per_100k AS venues_per_100k, c.market_score, -- Pricing benchmarks (from Playtomic availability data) vpb.median_hourly_rate, @@ -38,9 +38,10 @@ SELECT vpb.price_currency, CURRENT_DATE AS refreshed_date FROM serving.venue_pricing_benchmarks vpb --- Join city_market_profile to get the canonical city_slug and country metadata -INNER JOIN serving.city_market_profile c +-- Join location_profiles to get canonical city metadata +INNER JOIN serving.location_profiles c ON vpb.country_code = c.country_code AND vpb.city_slug = c.city_slug + AND c.city_slug IS NOT NULL -- Only cities with enough venues for meaningful pricing statistics WHERE vpb.venue_count >= 2