Files
padelnomics/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql
Deeman 4d29ecf1d6
All checks were successful
CI / test (push) Successful in 55s
CI / tag (push) Successful in 3s
merge: unified location_profiles serving model + both scores on map tooltips
# Conflicts:
#	CHANGELOG.md
#	transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql
2026-03-06 14:03:55 +01:00

244 lines
8.9 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Unified location profile: both scores at (country_code, geoname_id) grain.
-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows).
-- Enriched with dim_cities (city_slug, city_name, exact venue count) and
-- venue_pricing_benchmarks (Playtomic pricing/occupancy).
--
-- Two scores per location:
--
-- Padelnomics Market Score (Marktreife-Score v3, 0100):
-- "How mature/established is this padel market?"
-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL)
-- with padel venues. 0 for all other locations.
--
-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate
-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise
-- 15 pts addressable market — log-scaled population, ceiling 1M
-- 10 pts economic context — income PPS normalised to 200 ceiling
-- 10 pts data quality — completeness discount
--
-- Padelnomics Opportunity Score (Marktpotenzial-Score v3, 0100):
-- "Where should I build a padel court?"
-- Computed for ALL locations — zero-court locations score highest on supply gap.
-- H3 catchment methodology: addressable market and supply gap use a regional
-- H3 catchment (res-4 cell + 6 neighbours, ~462km², ~15-18km radius).
--
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
-- 20 pts economic power — income PPS, normalised to 35,000
-- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks
-- 15 pts catchment gap — distance to nearest padel court
-- 10 pts sports culture — tennis courts within 25km
--
-- Consumers query directly with WHERE filters:
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0
-- planner_defaults: WHERE city_slug IS NOT NULL
-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0
MODEL (
name serving.location_profiles,
kind FULL,
cron '@daily',
grain (country_code, geoname_id)
);
WITH
-- All locations from dim_locations (superset)
base AS (
SELECT
l.geoname_id,
l.country_code,
l.country_name_en,
l.country_slug,
l.location_name,
l.location_slug,
l.lat,
l.lon,
l.admin1_code,
l.admin2_code,
l.population,
l.population_year,
l.median_income_pps,
l.income_year,
l.padel_venue_count,
l.padel_venues_per_100k,
l.nearest_padel_court_km,
l.tennis_courts_within_25km,
l.h3_cell_res4
FROM foundation.dim_locations l
),
-- Aggregate population and court counts per H3 cell (res 4, ~10km edge).
-- Grouping by cell first (~30-50K distinct cells vs 140K locations) keeps the
-- subsequent lateral join small.
hex_stats AS (
SELECT
h3_cell_res4,
SUM(population) AS hex_population,
SUM(padel_venue_count) AS hex_padel_courts
FROM foundation.dim_locations
GROUP BY h3_cell_res4
),
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
-- Effective catchment: ~462km², ~15-18km radius — realistic driving distance.
catchment AS (
SELECT
l.geoname_id,
SUM(hs.hex_population) AS catchment_population,
SUM(hs.hex_padel_courts) AS catchment_padel_courts
FROM base l,
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res4, 1)) AS cell) ring
JOIN hex_stats hs ON hs.h3_cell_res4 = ring.cell
GROUP BY l.geoname_id
),
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count).
city_match AS (
SELECT
c.country_code,
c.geoname_id,
c.city_slug,
c.city_name,
c.padel_venue_count AS city_padel_venue_count
FROM foundation.dim_cities c
WHERE c.geoname_id IS NOT NULL
QUALIFY ROW_NUMBER() OVER (
PARTITION BY c.country_code, c.geoname_id
ORDER BY c.padel_venue_count DESC
) = 1
),
-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment
with_pricing AS (
SELECT
b.*,
cm.city_slug,
cm.city_name,
cm.city_padel_venue_count,
vpb.median_hourly_rate,
vpb.median_peak_rate,
vpb.median_offpeak_rate,
vpb.median_occupancy_rate,
vpb.median_daily_revenue_per_venue,
vpb.price_currency,
COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population,
COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts
FROM base b
LEFT JOIN city_match cm
ON b.country_code = cm.country_code
AND b.geoname_id = cm.geoname_id
LEFT JOIN serving.venue_pricing_benchmarks vpb
ON cm.country_code = vpb.country_code
AND cm.city_slug = vpb.city_slug
LEFT JOIN catchment ct
ON b.geoname_id = ct.geoname_id
),
-- Both scores computed from the enriched base
scored AS (
SELECT *,
-- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km)
CASE WHEN population > 0
THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2)
ELSE NULL
END AS city_venues_per_100k,
-- Data confidence (for market_score)
CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END AS data_confidence,
-- ── Market Score (Marktreife-Score v3) ──────────────────────────────────
-- 0 when no city match or no venues (city_padel_venue_count NULL or 0)
CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN
ROUND(
-- Supply development (40 pts)
40.0 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
-- Demand evidence (25 pts)
+ 25.0 * CASE
WHEN median_occupancy_rate IS NOT NULL
THEN LEAST(1.0, median_occupancy_rate / 0.65)
ELSE 0.4 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
END
-- Addressable market (15 pts)
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
-- Economic context (10 pts)
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
-- Data quality (10 pts)
+ 10.0 * CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END
, 1)
ELSE 0
END AS market_score,
-- ── Opportunity Score (Marktpotenzial-Score v3, H3 catchment) ──────────
ROUND(
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K
25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
-- Economic power (20 pts): income PPS normalised to 35,000
+ 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
-- Supply gap (30 pts): inverted catchment venue density
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(
CASE WHEN catchment_population > 0
THEN catchment_padel_courts::DOUBLE / catchment_population * 100000
ELSE 0.0
END, 0.0) / 8.0)
-- Catchment gap (15 pts): distance to nearest court
+ 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
-- Sports culture (10 pts): tennis courts within 25km
+ 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0)
, 1) AS opportunity_score
FROM with_pricing
)
SELECT
s.geoname_id,
s.country_code,
s.country_name_en,
s.country_slug,
s.location_name,
s.location_slug,
s.city_slug,
s.city_name,
s.lat,
s.lon,
s.admin1_code,
s.admin2_code,
s.population,
s.population_year,
s.median_income_pps,
s.income_year,
s.padel_venue_count,
s.padel_venues_per_100k,
s.nearest_padel_court_km,
s.tennis_courts_within_25km,
s.city_padel_venue_count,
s.city_venues_per_100k,
s.data_confidence,
s.catchment_population,
s.catchment_padel_courts,
CASE WHEN s.catchment_population > 0
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
ELSE NULL
END AS catchment_venues_per_100k,
s.market_score,
s.opportunity_score,
s.median_hourly_rate,
s.median_peak_rate,
s.median_offpeak_rate,
s.median_occupancy_rate,
s.median_daily_revenue_per_venue,
s.price_currency,
CURRENT_DATE AS refreshed_date
FROM scored s
ORDER BY s.market_score DESC, s.opportunity_score DESC