refactor(serving): replace old models with location_profiles

Delete city_market_profile.sql and location_opportunity_profile.sql.
Update downstream models (planner_defaults, pseo_city_costs_de,
pseo_city_pricing) to read from location_profiles instead.

Subtask 2/5: delete old models + update downstream SQL.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-06 11:39:52 +01:00
parent cda94c9ee4
commit 81b556b205
5 changed files with 17 additions and 220 deletions

View File

@@ -1,117 +0,0 @@
-- One Big Table: per-city padel market intelligence.
-- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints.
--
-- Padelnomics Marktreife-Score v3 (0100):
-- Answers "How mature/established is this padel market?"
-- Only computed for cities with ≥1 padel venue (padel_venue_count > 0).
-- For white-space opportunity scoring, see serving.location_opportunity_profile.
--
-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate
-- (min(1, count/5) kills small-town inflation)
-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise
-- 15 pts addressable market — log-scaled population, ceiling 1M (context only)
-- 10 pts economic context — income PPS normalised to 200 ceiling
-- 10 pts data quality — completeness discount
-- No saturation discount: high density = maturity, not a penalty
MODEL (
name serving.city_market_profile,
kind FULL,
cron '@daily',
grain (country_code, city_slug)
);
WITH base AS (
SELECT
c.country_code,
c.country_name_en,
c.country_slug,
c.city_name,
c.city_slug,
c.lat,
c.lon,
c.population,
c.population_year,
c.padel_venue_count,
c.median_income_pps,
c.income_year,
c.geoname_id,
-- Venue density: padel venues per 100K residents
CASE WHEN c.population > 0
THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2)
ELSE NULL
END AS venues_per_100k,
-- Data confidence: 1.0 if both population and venues are present
CASE
WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0
WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5
ELSE 0.0
END AS data_confidence,
-- Pricing / occupancy from Playtomic (NULL when no availability data)
vpb.median_hourly_rate,
vpb.median_peak_rate,
vpb.median_offpeak_rate,
vpb.median_occupancy_rate,
vpb.median_daily_revenue_per_venue,
vpb.price_currency
FROM foundation.dim_cities c
LEFT JOIN serving.venue_pricing_benchmarks vpb
ON c.country_code = vpb.country_code
AND c.city_slug = vpb.city_slug
WHERE c.padel_venue_count > 0
),
scored AS (
SELECT *,
ROUND(
-- Supply development (40 pts): THE maturity signal.
-- Log-scaled density: LN(density+1)/LN(21) → 20/100k ≈ full marks.
-- Count gate: min(1, count/5) — 1 venue=20%, 5+ venues=100%.
-- Kills small-town inflation (1 court / 5k pop = 20/100k) without hard cutoffs.
40.0 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21))
* LEAST(1.0, padel_venue_count / 5.0)
-- Demand evidence (25 pts): occupancy when Playtomic data available.
-- Fallback: 40% of density score (avoids double-counting with supply component).
+ 25.0 * CASE
WHEN median_occupancy_rate IS NOT NULL
THEN LEAST(1.0, median_occupancy_rate / 0.65)
ELSE 0.4 * LEAST(1.0, LN(COALESCE(venues_per_100k, 0) + 1) / LN(21))
* LEAST(1.0, padel_venue_count / 5.0)
END
-- Addressable market (15 pts): population as context, not maturity signal.
-- LN(1) = 0 so zero-pop cities score 0 here.
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
-- Economic context (10 pts): country-level income PPS.
-- Flat per country — kept as context modifier, not primary signal.
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
-- Data quality (10 pts): completeness discount.
+ 10.0 * data_confidence
, 1)
AS market_score
FROM base
)
SELECT
s.country_code,
s.country_name_en,
s.country_slug,
s.city_name,
s.city_slug,
s.lat,
s.lon,
s.population,
s.population_year,
s.padel_venue_count,
s.venues_per_100k,
s.data_confidence,
s.market_score,
s.median_income_pps,
s.income_year,
s.median_hourly_rate,
s.median_peak_rate,
s.median_offpeak_rate,
s.median_occupancy_rate,
s.median_daily_revenue_per_venue,
s.price_currency,
s.geoname_id,
CURRENT_DATE AS refreshed_date
FROM scored s
ORDER BY s.market_score DESC

View File

@@ -1,86 +0,0 @@
-- Per-location padel investment opportunity intelligence.
-- Consumed by: Gemeinde-level pSEO pages, opportunity map, "top markets" lists.
--
-- Padelnomics Marktpotenzial-Score v2 (0100):
-- Answers "Where should I build a padel court?"
-- Covers ALL GeoNames locations (pop ≥ 1K) — NOT filtered to existing padel markets.
-- Zero-court locations score highest on supply gap component (white space = opportunity).
--
-- 25 pts addressable market — log-scaled population, ceiling 500K
-- (opportunity peaks in mid-size cities; megacities already served)
-- 20 pts economic power — country income PPS, normalised to 35,000
-- EU PPS values range 18k-37k; /35k gives real spread.
-- DE ≈ 13.2pts, ES ≈ 10.7pts, SE ≈ 14.3pts.
-- Previously /200 caused all countries to saturate at 20/20.
-- 30 pts supply gap — INVERTED venue density; 0 courts/100K = full marks.
-- Ceiling raised to 8/100K (was 4) for a gentler gradient
-- and to account for ~87% data undercount vs FIP totals.
-- Linear: GREATEST(0, 1 - density/8)
-- 15 pts catchment gap — distance to nearest padel court.
-- DuckDB LEAST ignores NULLs: LEAST(1.0, NULL/30) = 1.0,
-- so NULL nearest_km = full marks (no court in bounding box
-- = high opportunity). COALESCE fallback is dead code.
-- 10 pts sports culture — tennis courts within 25km (≥10 = full marks).
-- NOTE: dim_locations tennis data is empty (all 0 rows).
-- Component contributes 0 pts everywhere until data lands.
MODEL (
name serving.location_opportunity_profile,
kind FULL,
cron '@daily',
grain (country_code, geoname_id)
);
SELECT
l.geoname_id,
l.country_code,
l.country_name_en,
l.country_slug,
l.location_name,
l.location_slug,
l.lat,
l.lon,
l.admin1_code,
l.admin2_code,
l.population,
l.population_year,
l.median_income_pps,
l.income_year,
l.padel_venue_count,
l.padel_venues_per_100k,
l.nearest_padel_court_km,
l.tennis_courts_within_25km,
ROUND(
-- Addressable market (25 pts): log-scaled to 500K ceiling.
-- Lower ceiling than Marktreife (1M) — opportunity peaks in mid-size cities
-- that can support a court but aren't already saturated by large-city operators.
25.0 * LEAST(1.0, LN(GREATEST(l.population, 1)) / LN(500000))
-- Economic power (20 pts): country-level income PPS normalised to 35,000.
-- Drives willingness-to-pay for court fees (€20-35/hr target range).
-- EU PPS values range 18k-37k; ceiling 35k gives meaningful spread.
-- v1 used /200 which caused LEAST(1.0, 115) = 1.0 for ALL countries (flat, no differentiation).
-- v2: /35000 → DE 0.66×20=13.2pts, ES 0.53×20=10.7pts, SE 0.71×20=14.3pts.
-- Default 15000 for missing data = reasonable developing-market assumption (~0.43).
+ 20.0 * LEAST(1.0, COALESCE(l.median_income_pps, 15000) / 35000.0)
-- Supply gap (30 pts): INVERTED venue density.
-- 0 courts/100K = full 30 pts (white space); ≥8/100K = 0 pts (served market).
-- Ceiling raised from 4→8/100K for a gentler gradient and to account for data
-- undercount (~87% of real courts not in our data).
-- This is the key signal that separates Marktpotenzial from Marktreife.
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(l.padel_venues_per_100k, 0) / 8.0)
-- Catchment gap (15 pts): distance to nearest existing padel court.
-- >30km = full 15 pts (underserved catchment area).
-- NULL = no courts found anywhere (rare edge case) → neutral 0.5.
+ 15.0 * COALESCE(LEAST(1.0, l.nearest_padel_court_km / 30.0), 0.5)
-- Sports culture proxy (10 pts): tennis courts within 25km.
-- ≥10 courts = full 10 pts (proven racket sport market = faster padel adoption).
-- 0 courts = 0 pts. Many new padel courts open inside existing tennis clubs.
+ 10.0 * LEAST(1.0, l.tennis_courts_within_25km / 10.0)
, 1) AS opportunity_score,
CURRENT_DATE AS refreshed_date
FROM foundation.dim_locations l
ORDER BY opportunity_score DESC

View File

@@ -76,11 +76,12 @@ city_profiles AS (
city_slug, city_slug,
country_code, country_code,
city_name, city_name,
padel_venue_count, city_padel_venue_count AS padel_venue_count,
population, population,
market_score, market_score,
venues_per_100k city_venues_per_100k AS venues_per_100k
FROM serving.city_market_profile FROM serving.location_profiles
WHERE city_slug IS NOT NULL
) )
SELECT SELECT
cp.city_slug, cp.city_slug,

View File

@@ -31,10 +31,10 @@ SELECT
c.lon, c.lon,
-- Market metrics -- Market metrics
c.population, c.population,
c.padel_venue_count, c.city_padel_venue_count AS padel_venue_count,
c.venues_per_100k, c.city_venues_per_100k AS venues_per_100k,
c.market_score, c.market_score,
lop.opportunity_score, c.opportunity_score,
c.data_confidence, c.data_confidence,
-- Pricing (from Playtomic, NULL when no coverage) -- Pricing (from Playtomic, NULL when no coverage)
c.median_hourly_rate, c.median_hourly_rate,
@@ -85,15 +85,13 @@ SELECT
cc.working_capital AS "workingCapital", cc.working_capital AS "workingCapital",
cc.permits_compliance AS "permitsCompliance", cc.permits_compliance AS "permitsCompliance",
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM serving.city_market_profile c FROM serving.location_profiles c
LEFT JOIN serving.planner_defaults p LEFT JOIN serving.planner_defaults p
ON c.country_code = p.country_code ON c.country_code = p.country_code
AND c.city_slug = p.city_slug AND c.city_slug = p.city_slug
LEFT JOIN serving.location_opportunity_profile lop
ON c.country_code = lop.country_code
AND c.geoname_id = lop.geoname_id
LEFT JOIN foundation.dim_countries cc LEFT JOIN foundation.dim_countries cc
ON c.country_code = cc.country_code ON c.country_code = cc.country_code
-- Only cities with actual padel presence and at least some rate data -- Only cities with actual padel presence and at least some rate data
WHERE c.padel_venue_count > 0 WHERE c.city_slug IS NOT NULL
AND c.city_padel_venue_count > 0
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)

View File

@@ -1,6 +1,6 @@
-- pSEO article data: per-city padel court pricing. -- pSEO article data: per-city padel court pricing.
-- One row per city — consumed by the city-pricing.md.jinja template. -- One row per city — consumed by the city-pricing.md.jinja template.
-- Joins venue_pricing_benchmarks (real Playtomic data) with city_market_profile -- Joins venue_pricing_benchmarks (real Playtomic data) with location_profiles
-- (population, venue count, country metadata). -- (population, venue count, country metadata).
-- --
-- Stricter filter than pseo_city_costs_de: requires >= 2 venues with real -- Stricter filter than pseo_city_costs_de: requires >= 2 venues with real
@@ -16,7 +16,7 @@ MODEL (
SELECT SELECT
-- Composite natural key: country_slug + city_slug ensures uniqueness across countries -- Composite natural key: country_slug + city_slug ensures uniqueness across countries
c.country_slug || '-' || c.city_slug AS city_key, c.country_slug || '-' || c.city_slug AS city_key,
-- City identity (from city_market_profile, which has the canonical city_slug) -- City identity (from location_profiles, which has the canonical city_slug)
c.city_slug, c.city_slug,
c.city_name, c.city_name,
c.country_code, c.country_code,
@@ -24,8 +24,8 @@ SELECT
c.country_slug, c.country_slug,
-- Market context -- Market context
c.population, c.population,
c.padel_venue_count, c.city_padel_venue_count AS padel_venue_count,
c.venues_per_100k, c.city_venues_per_100k AS venues_per_100k,
c.market_score, c.market_score,
-- Pricing benchmarks (from Playtomic availability data) -- Pricing benchmarks (from Playtomic availability data)
vpb.median_hourly_rate, vpb.median_hourly_rate,
@@ -38,9 +38,10 @@ SELECT
vpb.price_currency, vpb.price_currency,
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM serving.venue_pricing_benchmarks vpb FROM serving.venue_pricing_benchmarks vpb
-- Join city_market_profile to get the canonical city_slug and country metadata -- Join location_profiles to get canonical city metadata
INNER JOIN serving.city_market_profile c INNER JOIN serving.location_profiles c
ON vpb.country_code = c.country_code ON vpb.country_code = c.country_code
AND vpb.city_slug = c.city_slug AND vpb.city_slug = c.city_slug
AND c.city_slug IS NOT NULL
-- Only cities with enough venues for meaningful pricing statistics -- Only cities with enough venues for meaningful pricing statistics
WHERE vpb.venue_count >= 2 WHERE vpb.venue_count >= 2