merge: unified location_profiles serving model + both scores on map tooltips
All checks were successful
CI / test (push) Successful in 55s
CI / tag (push) Successful in 3s

# Conflicts:
#	CHANGELOG.md
#	transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql
This commit is contained in:
Deeman
2026-03-06 14:03:55 +01:00
19 changed files with 320 additions and 315 deletions

View File

@@ -0,0 +1,243 @@
-- Unified location profile: both scores at (country_code, geoname_id) grain.
-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows).
-- Enriched with dim_cities (city_slug, city_name, exact venue count) and
-- venue_pricing_benchmarks (Playtomic pricing/occupancy).
--
-- Two scores per location:
--
-- Padelnomics Market Score (Marktreife-Score v3, 0100):
-- "How mature/established is this padel market?"
-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL)
-- with padel venues. 0 for all other locations.
--
-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate
-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise
-- 15 pts addressable market — log-scaled population, ceiling 1M
-- 10 pts economic context — income PPS normalised to 200 ceiling
-- 10 pts data quality — completeness discount
--
-- Padelnomics Opportunity Score (Marktpotenzial-Score v3, 0100):
-- "Where should I build a padel court?"
-- Computed for ALL locations — zero-court locations score highest on supply gap.
-- H3 catchment methodology: addressable market and supply gap use a regional
-- H3 catchment (res-4 cell + 6 neighbours, ~462km², ~15-18km radius).
--
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
-- 20 pts economic power — income PPS, normalised to 35,000
-- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks
-- 15 pts catchment gap — distance to nearest padel court
-- 10 pts sports culture — tennis courts within 25km
--
-- Consumers query directly with WHERE filters:
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0
-- planner_defaults: WHERE city_slug IS NOT NULL
-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0
MODEL (
name serving.location_profiles,
kind FULL,
cron '@daily',
grain (country_code, geoname_id)
);
WITH
-- All locations from dim_locations (superset)
base AS (
SELECT
l.geoname_id,
l.country_code,
l.country_name_en,
l.country_slug,
l.location_name,
l.location_slug,
l.lat,
l.lon,
l.admin1_code,
l.admin2_code,
l.population,
l.population_year,
l.median_income_pps,
l.income_year,
l.padel_venue_count,
l.padel_venues_per_100k,
l.nearest_padel_court_km,
l.tennis_courts_within_25km,
l.h3_cell_res4
FROM foundation.dim_locations l
),
-- Aggregate population and court counts per H3 cell (res 4, ~10km edge).
-- Grouping by cell first (~30-50K distinct cells vs 140K locations) keeps the
-- subsequent lateral join small.
hex_stats AS (
SELECT
h3_cell_res4,
SUM(population) AS hex_population,
SUM(padel_venue_count) AS hex_padel_courts
FROM foundation.dim_locations
GROUP BY h3_cell_res4
),
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
-- Effective catchment: ~462km², ~15-18km radius — realistic driving distance.
catchment AS (
SELECT
l.geoname_id,
SUM(hs.hex_population) AS catchment_population,
SUM(hs.hex_padel_courts) AS catchment_padel_courts
FROM base l,
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res4, 1)) AS cell) ring
JOIN hex_stats hs ON hs.h3_cell_res4 = ring.cell
GROUP BY l.geoname_id
),
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count).
city_match AS (
SELECT
c.country_code,
c.geoname_id,
c.city_slug,
c.city_name,
c.padel_venue_count AS city_padel_venue_count
FROM foundation.dim_cities c
WHERE c.geoname_id IS NOT NULL
QUALIFY ROW_NUMBER() OVER (
PARTITION BY c.country_code, c.geoname_id
ORDER BY c.padel_venue_count DESC
) = 1
),
-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment
with_pricing AS (
SELECT
b.*,
cm.city_slug,
cm.city_name,
cm.city_padel_venue_count,
vpb.median_hourly_rate,
vpb.median_peak_rate,
vpb.median_offpeak_rate,
vpb.median_occupancy_rate,
vpb.median_daily_revenue_per_venue,
vpb.price_currency,
COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population,
COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts
FROM base b
LEFT JOIN city_match cm
ON b.country_code = cm.country_code
AND b.geoname_id = cm.geoname_id
LEFT JOIN serving.venue_pricing_benchmarks vpb
ON cm.country_code = vpb.country_code
AND cm.city_slug = vpb.city_slug
LEFT JOIN catchment ct
ON b.geoname_id = ct.geoname_id
),
-- Both scores computed from the enriched base
scored AS (
SELECT *,
-- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km)
CASE WHEN population > 0
THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2)
ELSE NULL
END AS city_venues_per_100k,
-- Data confidence (for market_score)
CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END AS data_confidence,
-- ── Market Score (Marktreife-Score v3) ──────────────────────────────────
-- 0 when no city match or no venues (city_padel_venue_count NULL or 0)
CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN
ROUND(
-- Supply development (40 pts)
40.0 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
-- Demand evidence (25 pts)
+ 25.0 * CASE
WHEN median_occupancy_rate IS NOT NULL
THEN LEAST(1.0, median_occupancy_rate / 0.65)
ELSE 0.4 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
END
-- Addressable market (15 pts)
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
-- Economic context (10 pts)
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
-- Data quality (10 pts)
+ 10.0 * CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END
, 1)
ELSE 0
END AS market_score,
-- ── Opportunity Score (Marktpotenzial-Score v3, H3 catchment) ──────────
ROUND(
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K
25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
-- Economic power (20 pts): income PPS normalised to 35,000
+ 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
-- Supply gap (30 pts): inverted catchment venue density
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(
CASE WHEN catchment_population > 0
THEN catchment_padel_courts::DOUBLE / catchment_population * 100000
ELSE 0.0
END, 0.0) / 8.0)
-- Catchment gap (15 pts): distance to nearest court
+ 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
-- Sports culture (10 pts): tennis courts within 25km
+ 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0)
, 1) AS opportunity_score
FROM with_pricing
)
SELECT
s.geoname_id,
s.country_code,
s.country_name_en,
s.country_slug,
s.location_name,
s.location_slug,
s.city_slug,
s.city_name,
s.lat,
s.lon,
s.admin1_code,
s.admin2_code,
s.population,
s.population_year,
s.median_income_pps,
s.income_year,
s.padel_venue_count,
s.padel_venues_per_100k,
s.nearest_padel_court_km,
s.tennis_courts_within_25km,
s.city_padel_venue_count,
s.city_venues_per_100k,
s.data_confidence,
s.catchment_population,
s.catchment_padel_courts,
CASE WHEN s.catchment_population > 0
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
ELSE NULL
END AS catchment_venues_per_100k,
s.market_score,
s.opportunity_score,
s.median_hourly_rate,
s.median_peak_rate,
s.median_offpeak_rate,
s.median_occupancy_rate,
s.median_daily_revenue_per_venue,
s.price_currency,
CURRENT_DATE AS refreshed_date
FROM scored s
ORDER BY s.market_score DESC, s.opportunity_score DESC