- Merge supply gap (30pts) + catchment gap (15pts) → supply deficit (35pts, GREATEST) Eliminates ~80% correlated double-count on a single signal. - Add sports culture signal (10pts): tennis court density as racquet-sport adoption proxy. Ceiling 50 courts/25km. Harmless when tennis data is zero (contributes 0). - Add construction affordability (5pts): income relative to PLI construction costs. Joins dim_countries.pli_construction. High income + low build cost = high score. - Reduce economic power from 20 → 15pts to make room. New weights: addressable market 25, economic power 15, supply deficit 35, sports culture 10, construction affordability 5, market validation 10. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
291 lines
12 KiB
SQL
291 lines
12 KiB
SQL
-- Unified location profile: both scores at (country_code, geoname_id) grain.
|
||
-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows).
|
||
-- Enriched with dim_cities (city_slug, city_name, exact venue count) and
|
||
-- venue_pricing_benchmarks (Playtomic pricing/occupancy).
|
||
--
|
||
-- Two scores per location:
|
||
--
|
||
-- Padelnomics Market Score (Marktreife-Score v4, 0–100):
|
||
-- "How mature/established is this padel market?"
|
||
-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL)
|
||
-- with padel venues. 0 for all other locations.
|
||
--
|
||
-- v4 changes: lower count gate (5→3), lower density ceiling (LN(21)→LN(11)),
|
||
-- better demand fallback (0.4→0.65 with 0.3 floor), economic context discrimination (200→25K).
|
||
--
|
||
-- 40 pts supply development — log-scaled density (LN ceiling 10/100k) × count gate (3)
|
||
-- 25 pts demand evidence — occupancy when available; 65% density proxy + 0.3 floor otherwise
|
||
-- 15 pts addressable market — log-scaled population, ceiling 1M
|
||
-- 10 pts economic context — income PPS normalised to 25,000 ceiling
|
||
-- 10 pts data quality — completeness discount
|
||
--
|
||
-- Padelnomics Opportunity Score (Marktpotenzial-Score v5, 0–100):
|
||
-- "Where should I build a padel court?"
|
||
-- Computed for ALL locations — zero-court locations score highest on supply deficit.
|
||
-- H3 catchment methodology: addressable market and supply deficit use a regional
|
||
-- H3 catchment (res-5 cell + 6 neighbours, ~24km radius).
|
||
--
|
||
-- v5 changes: merge supply gap + catchment gap → single supply deficit (35 pts),
|
||
-- add sports culture proxy (10 pts, tennis density), add construction affordability (5 pts),
|
||
-- reduce economic power from 20 → 15 pts.
|
||
--
|
||
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
|
||
-- 15 pts economic power — income PPS, normalised to 35,000
|
||
-- 35 pts supply deficit — max(density gap, distance gap); eliminates double-count
|
||
-- 10 pts sports culture — tennis court density as racquet-sport adoption proxy
|
||
-- 5 pts construction affordability — income relative to construction costs (PLI)
|
||
-- 10 pts market validation — country-level avg market maturity (from market_scored CTE)
|
||
--
|
||
-- Consumers query directly with WHERE filters:
|
||
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
|
||
-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0
|
||
-- planner_defaults: WHERE city_slug IS NOT NULL
|
||
-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0
|
||
|
||
MODEL (
|
||
name serving.location_profiles,
|
||
kind FULL,
|
||
cron '@daily',
|
||
grain (country_code, geoname_id)
|
||
);
|
||
|
||
WITH
|
||
-- All locations from dim_locations (superset)
|
||
base AS (
|
||
SELECT
|
||
l.geoname_id,
|
||
l.country_code,
|
||
l.country_name_en,
|
||
l.country_slug,
|
||
l.location_name,
|
||
l.location_slug,
|
||
l.lat,
|
||
l.lon,
|
||
l.admin1_code,
|
||
l.admin2_code,
|
||
l.population,
|
||
l.population_year,
|
||
l.median_income_pps,
|
||
l.income_year,
|
||
l.padel_venue_count,
|
||
l.padel_venues_per_100k,
|
||
l.nearest_padel_court_km,
|
||
l.tennis_courts_within_25km,
|
||
l.h3_cell_res5
|
||
FROM foundation.dim_locations l
|
||
),
|
||
-- Aggregate population and court counts per H3 cell (res 5, ~8.5km edge).
|
||
-- Grouping by cell first (~50-80K distinct cells vs 140K locations) keeps the
|
||
-- subsequent lateral join small.
|
||
hex_stats AS (
|
||
SELECT
|
||
h3_cell_res5,
|
||
SUM(population) AS hex_population,
|
||
SUM(padel_venue_count) AS hex_padel_courts
|
||
FROM foundation.dim_locations
|
||
GROUP BY h3_cell_res5
|
||
),
|
||
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
|
||
-- Effective catchment: ~24km radius — realistic driving distance.
|
||
catchment AS (
|
||
SELECT
|
||
l.geoname_id,
|
||
SUM(hs.hex_population) AS catchment_population,
|
||
SUM(hs.hex_padel_courts) AS catchment_padel_courts
|
||
FROM base l,
|
||
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res5, 1)) AS cell) ring
|
||
JOIN hex_stats hs ON hs.h3_cell_res5 = ring.cell
|
||
GROUP BY l.geoname_id
|
||
),
|
||
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
|
||
-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count).
|
||
city_match AS (
|
||
SELECT
|
||
c.country_code,
|
||
c.geoname_id,
|
||
c.city_slug,
|
||
c.city_name,
|
||
c.padel_venue_count AS city_padel_venue_count
|
||
FROM foundation.dim_cities c
|
||
WHERE c.geoname_id IS NOT NULL
|
||
QUALIFY ROW_NUMBER() OVER (
|
||
PARTITION BY c.country_code, c.geoname_id
|
||
ORDER BY c.padel_venue_count DESC
|
||
) = 1
|
||
),
|
||
-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment + country PLI
|
||
with_pricing AS (
|
||
SELECT
|
||
b.*,
|
||
cm.city_slug,
|
||
cm.city_name,
|
||
cm.city_padel_venue_count,
|
||
vpb.median_hourly_rate,
|
||
vpb.median_peak_rate,
|
||
vpb.median_offpeak_rate,
|
||
vpb.median_occupancy_rate,
|
||
vpb.median_daily_revenue_per_venue,
|
||
vpb.price_currency,
|
||
dc.pli_construction,
|
||
COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population,
|
||
COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts
|
||
FROM base b
|
||
LEFT JOIN city_match cm
|
||
ON b.country_code = cm.country_code
|
||
AND b.geoname_id = cm.geoname_id
|
||
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
||
ON cm.country_code = vpb.country_code
|
||
AND cm.city_slug = vpb.city_slug
|
||
LEFT JOIN catchment ct
|
||
ON b.geoname_id = ct.geoname_id
|
||
LEFT JOIN foundation.dim_countries dc
|
||
ON b.country_code = dc.country_code
|
||
),
|
||
-- Step 1: market score only — needed first so we can aggregate country averages.
|
||
market_scored AS (
|
||
SELECT *,
|
||
-- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km)
|
||
CASE WHEN population > 0
|
||
THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2)
|
||
ELSE NULL
|
||
END AS city_venues_per_100k,
|
||
-- Data confidence (for market_score)
|
||
CASE
|
||
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
|
||
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
|
||
ELSE 0.0
|
||
END AS data_confidence,
|
||
-- ── Market Score (Marktreife-Score v4) ──────────────────────────────────
|
||
-- 0 when no city match or no venues (city_padel_venue_count NULL or 0)
|
||
CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN
|
||
ROUND(
|
||
-- Supply development (40 pts)
|
||
-- density ceiling 10/100k (LN(11)), count gate 3 venues
|
||
40.0 * LEAST(1.0, LN(
|
||
COALESCE(
|
||
CASE WHEN population > 0
|
||
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
|
||
ELSE 0 END
|
||
, 0) + 1) / LN(11))
|
||
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 3.0)
|
||
-- Demand evidence (25 pts)
|
||
-- with occupancy: scale to 65% target. Without: 65% of supply proxy + 0.3 floor
|
||
-- (existence of venues IS evidence of demand)
|
||
+ 25.0 * CASE
|
||
WHEN median_occupancy_rate IS NOT NULL
|
||
THEN LEAST(1.0, median_occupancy_rate / 0.65)
|
||
ELSE GREATEST(0.3, 0.65 * LEAST(1.0, LN(
|
||
COALESCE(
|
||
CASE WHEN population > 0
|
||
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
|
||
ELSE 0 END
|
||
, 0) + 1) / LN(11))
|
||
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 3.0))
|
||
END
|
||
-- Addressable market (15 pts)
|
||
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
|
||
-- Economic context (10 pts)
|
||
-- ceiling 25,000 PPS discriminates between wealthy and poorer markets
|
||
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 25000.0)
|
||
-- Data quality (10 pts)
|
||
+ 10.0 * CASE
|
||
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
|
||
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
|
||
ELSE 0.0
|
||
END
|
||
, 1)
|
||
ELSE 0
|
||
END AS market_score
|
||
FROM with_pricing
|
||
),
|
||
-- Step 2: country-level avg market maturity — used as market validation signal (10 pts).
|
||
-- Filter to market_score > 0 (cities with padel courts only) so zero-court locations
|
||
-- don't dilute the country signal. ES proven demand → ~60, SE struggling → ~35.
|
||
country_market AS (
|
||
SELECT
|
||
country_code,
|
||
ROUND(AVG(market_score), 1) AS country_avg_market_score
|
||
FROM market_scored
|
||
WHERE market_score > 0
|
||
GROUP BY country_code
|
||
),
|
||
-- Step 3: add opportunity_score using country market validation signal.
|
||
scored AS (
|
||
SELECT ms.*,
|
||
-- ── Opportunity Score (Marktpotenzial-Score v5, H3 catchment) ──────────
|
||
ROUND(
|
||
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K
|
||
25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
|
||
-- Economic power (15 pts): income PPS normalised to 35,000
|
||
+ 15.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
|
||
-- Supply deficit (35 pts): max of density gap and distance gap.
|
||
-- Merges old supply gap (30) + catchment gap (15) which were ~80% correlated.
|
||
+ 35.0 * GREATEST(
|
||
-- density-based gap (H3 catchment): 0 courts = 1.0, 8/100k = 0.0
|
||
GREATEST(0.0, 1.0 - COALESCE(
|
||
CASE WHEN catchment_population > 0
|
||
THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000
|
||
ELSE 0.0
|
||
END, 0.0) / 8.0),
|
||
-- distance-based gap: 30km+ = 1.0, 0km = 0.0; NULL = 0.5
|
||
COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
|
||
)
|
||
-- Sports culture (10 pts): tennis density as racquet-sport adoption proxy.
|
||
-- Ceiling 50 courts within 25km. Harmless when tennis data is zero (contributes 0).
|
||
+ 10.0 * LEAST(1.0, COALESCE(tennis_courts_within_25km, 0) / 50.0)
|
||
-- Construction affordability (5 pts): income purchasing power relative to build costs.
|
||
-- PLI construction is EU27=100 index. High income + low construction cost = high score.
|
||
+ 5.0 * LEAST(1.0,
|
||
COALESCE(median_income_pps, 15000) / 35000.0
|
||
/ GREATEST(0.5, COALESCE(pli_construction, 100.0) / 100.0)
|
||
)
|
||
-- Market validation (10 pts): country-level avg market maturity.
|
||
-- ES (~70/100): proven demand → ~7 pts. SE (~35/100): emerging → ~3.5 pts.
|
||
-- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised).
|
||
+ 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5)
|
||
, 1) AS opportunity_score
|
||
FROM market_scored ms
|
||
LEFT JOIN country_market cm ON ms.country_code = cm.country_code
|
||
)
|
||
SELECT
|
||
s.geoname_id,
|
||
s.country_code,
|
||
s.country_name_en,
|
||
s.country_slug,
|
||
s.location_name,
|
||
s.location_slug,
|
||
s.city_slug,
|
||
s.city_name,
|
||
s.lat,
|
||
s.lon,
|
||
s.admin1_code,
|
||
s.admin2_code,
|
||
s.population,
|
||
s.population_year,
|
||
s.median_income_pps,
|
||
s.income_year,
|
||
s.padel_venue_count,
|
||
s.padel_venues_per_100k,
|
||
s.nearest_padel_court_km,
|
||
s.tennis_courts_within_25km,
|
||
s.city_padel_venue_count,
|
||
s.city_venues_per_100k,
|
||
s.data_confidence,
|
||
s.catchment_population,
|
||
s.catchment_padel_courts,
|
||
CASE WHEN s.catchment_population > 0
|
||
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
|
||
ELSE NULL
|
||
END AS catchment_venues_per_100k,
|
||
s.market_score,
|
||
s.opportunity_score,
|
||
s.median_hourly_rate,
|
||
s.median_peak_rate,
|
||
s.median_offpeak_rate,
|
||
s.median_occupancy_rate,
|
||
s.median_daily_revenue_per_venue,
|
||
s.price_currency,
|
||
CURRENT_DATE AS refreshed_date
|
||
FROM scored s
|
||
ORDER BY s.market_score DESC, s.opportunity_score DESC
|