Files
padelnomics/transform/sqlmesh_padelnomics/models/serving/location_profiles.sql
Deeman f215ea8e3a fix: supply gap inflation + inline map data + guard API endpoints
A. location_profiles.sql: supply gap now uses GREATEST(catchment_padel_courts,
   COALESCE(city_padel_venue_count, 0)) so Playtomic venues prevent cities like
   Murcia/Cordoba/Gijon from receiving a full 30-pt supply gap bonus when their
   OSM catchment count is zero. Expected ~10-15 pt drop for affected ES cities.

B. pseo_country_overview.sql: add population-weighted lat/lon centroid columns
   so the markets map can use accurate country positions from this table.

C/D. content/routes.py + markets.html: query pseo_country_overview in the route
   and pass as map_countries to the template, replacing the fetch('/api/...') call
   with inline JSON. Map scores now match pseo_country_overview (pop-weighted),
   and the page loads without an extra round-trip.

E. api.py: add @login_required to all 4 endpoints. Unauthenticated callers get
   a 302 redirect to login instead of data.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-07 20:33:31 +01:00

266 lines
10 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Unified location profile: both scores at (country_code, geoname_id) grain.
-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows).
-- Enriched with dim_cities (city_slug, city_name, exact venue count) and
-- venue_pricing_benchmarks (Playtomic pricing/occupancy).
--
-- Two scores per location:
--
-- Padelnomics Market Score (Marktreife-Score v3, 0100):
-- "How mature/established is this padel market?"
-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL)
-- with padel venues. 0 for all other locations.
--
-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate
-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise
-- 15 pts addressable market — log-scaled population, ceiling 1M
-- 10 pts economic context — income PPS normalised to 200 ceiling
-- 10 pts data quality — completeness discount
--
-- Padelnomics Opportunity Score (Marktpotenzial-Score v4, 0100):
-- "Where should I build a padel court?"
-- Computed for ALL locations — zero-court locations score highest on supply gap.
-- H3 catchment methodology: addressable market and supply gap use a regional
-- H3 catchment (res-5 cell + 6 neighbours, ~24km radius).
--
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
-- 20 pts economic power — income PPS, normalised to 35,000
-- 30 pts supply gap — inverted catchment venue density; 0 courts = full marks
-- 15 pts catchment gap — distance to nearest padel court
-- 10 pts market validation — country-level avg market maturity (from market_scored CTE).
-- Replaces sports culture proxy (v3: tennis data was all zeros).
-- ES (~60/100) → ~6 pts, SE (~35/100) → ~3.5 pts, unknown → 5 pts.
--
-- Consumers query directly with WHERE filters:
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0
-- planner_defaults: WHERE city_slug IS NOT NULL
-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0
MODEL (
name serving.location_profiles,
kind FULL,
cron '@daily',
grain (country_code, geoname_id)
);
WITH
-- All locations from dim_locations (superset)
base AS (
SELECT
l.geoname_id,
l.country_code,
l.country_name_en,
l.country_slug,
l.location_name,
l.location_slug,
l.lat,
l.lon,
l.admin1_code,
l.admin2_code,
l.population,
l.population_year,
l.median_income_pps,
l.income_year,
l.padel_venue_count,
l.padel_venues_per_100k,
l.nearest_padel_court_km,
l.tennis_courts_within_25km,
l.h3_cell_res5
FROM foundation.dim_locations l
),
-- Aggregate population and court counts per H3 cell (res 5, ~8.5km edge).
-- Grouping by cell first (~50-80K distinct cells vs 140K locations) keeps the
-- subsequent lateral join small.
hex_stats AS (
SELECT
h3_cell_res5,
SUM(population) AS hex_population,
SUM(padel_venue_count) AS hex_padel_courts
FROM foundation.dim_locations
GROUP BY h3_cell_res5
),
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
-- Effective catchment: ~24km radius — realistic driving distance.
catchment AS (
SELECT
l.geoname_id,
SUM(hs.hex_population) AS catchment_population,
SUM(hs.hex_padel_courts) AS catchment_padel_courts
FROM base l,
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res5, 1)) AS cell) ring
JOIN hex_stats hs ON hs.h3_cell_res5 = ring.cell
GROUP BY l.geoname_id
),
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count).
city_match AS (
SELECT
c.country_code,
c.geoname_id,
c.city_slug,
c.city_name,
c.padel_venue_count AS city_padel_venue_count
FROM foundation.dim_cities c
WHERE c.geoname_id IS NOT NULL
QUALIFY ROW_NUMBER() OVER (
PARTITION BY c.country_code, c.geoname_id
ORDER BY c.padel_venue_count DESC
) = 1
),
-- Pricing / occupancy from Playtomic (via city_slug) + H3 catchment
with_pricing AS (
SELECT
b.*,
cm.city_slug,
cm.city_name,
cm.city_padel_venue_count,
vpb.median_hourly_rate,
vpb.median_peak_rate,
vpb.median_offpeak_rate,
vpb.median_occupancy_rate,
vpb.median_daily_revenue_per_venue,
vpb.price_currency,
COALESCE(ct.catchment_population, b.population)::BIGINT AS catchment_population,
COALESCE(ct.catchment_padel_courts, b.padel_venue_count)::INTEGER AS catchment_padel_courts
FROM base b
LEFT JOIN city_match cm
ON b.country_code = cm.country_code
AND b.geoname_id = cm.geoname_id
LEFT JOIN serving.venue_pricing_benchmarks vpb
ON cm.country_code = vpb.country_code
AND cm.city_slug = vpb.city_slug
LEFT JOIN catchment ct
ON b.geoname_id = ct.geoname_id
),
-- Step 1: market score only — needed first so we can aggregate country averages.
market_scored AS (
SELECT *,
-- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km)
CASE WHEN population > 0
THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2)
ELSE NULL
END AS city_venues_per_100k,
-- Data confidence (for market_score)
CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END AS data_confidence,
-- ── Market Score (Marktreife-Score v3) ──────────────────────────────────
-- 0 when no city match or no venues (city_padel_venue_count NULL or 0)
CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN
ROUND(
-- Supply development (40 pts)
40.0 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
-- Demand evidence (25 pts)
+ 25.0 * CASE
WHEN median_occupancy_rate IS NOT NULL
THEN LEAST(1.0, median_occupancy_rate / 0.65)
ELSE 0.4 * LEAST(1.0, LN(
COALESCE(
CASE WHEN population > 0
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
ELSE 0 END
, 0) + 1) / LN(21))
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
END
-- Addressable market (15 pts)
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
-- Economic context (10 pts)
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
-- Data quality (10 pts)
+ 10.0 * CASE
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
ELSE 0.0
END
, 1)
ELSE 0
END AS market_score
FROM with_pricing
),
-- Step 2: country-level avg market maturity — used as market validation signal (10 pts).
-- Filter to market_score > 0 (cities with padel courts only) so zero-court locations
-- don't dilute the country signal. ES proven demand → ~60, SE struggling → ~35.
country_market AS (
SELECT
country_code,
ROUND(AVG(market_score), 1) AS country_avg_market_score
FROM market_scored
WHERE market_score > 0
GROUP BY country_code
),
-- Step 3: add opportunity_score using country market validation signal.
scored AS (
SELECT ms.*,
-- ── Opportunity Score (Marktpotenzial-Score v4, H3 catchment) ──────────
ROUND(
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K
25.0 * LEAST(1.0, LN(GREATEST(catchment_population, 1)) / LN(500000))
-- Economic power (20 pts): income PPS normalised to 35,000
+ 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
-- Supply gap (30 pts): inverted catchment venue density
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(
CASE WHEN catchment_population > 0
THEN GREATEST(catchment_padel_courts, COALESCE(city_padel_venue_count, 0))::DOUBLE / catchment_population * 100000
ELSE 0.0
END, 0.0) / 8.0)
-- Catchment gap (15 pts): distance to nearest court
+ 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
-- Market validation (10 pts): country-level avg market maturity.
-- Replaces sports culture (v3 tennis data was all zeros = dead code).
-- ES (~60/100): proven demand → ~6 pts. SE (~35/100): struggling → ~3.5 pts.
-- NULL (no courts in country yet): 0.5 neutral → 5 pts (untested, not penalised).
+ 10.0 * COALESCE(cm.country_avg_market_score / 100.0, 0.5)
, 1) AS opportunity_score
FROM market_scored ms
LEFT JOIN country_market cm ON ms.country_code = cm.country_code
)
SELECT
s.geoname_id,
s.country_code,
s.country_name_en,
s.country_slug,
s.location_name,
s.location_slug,
s.city_slug,
s.city_name,
s.lat,
s.lon,
s.admin1_code,
s.admin2_code,
s.population,
s.population_year,
s.median_income_pps,
s.income_year,
s.padel_venue_count,
s.padel_venues_per_100k,
s.nearest_padel_court_km,
s.tennis_courts_within_25km,
s.city_padel_venue_count,
s.city_venues_per_100k,
s.data_confidence,
s.catchment_population,
s.catchment_padel_courts,
CASE WHEN s.catchment_population > 0
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
ELSE NULL
END AS catchment_venues_per_100k,
s.market_score,
s.opportunity_score,
s.median_hourly_rate,
s.median_peak_rate,
s.median_offpeak_rate,
s.median_occupancy_rate,
s.median_daily_revenue_per_venue,
s.price_currency,
CURRENT_DATE AS refreshed_date
FROM scored s
ORDER BY s.market_score DESC, s.opportunity_score DESC