feat(serving): add unified location_profiles model
Combines city_market_profile and location_opportunity_profile into a single serving model at (country_code, geoname_id) grain. Both Market Score and Opportunity Score computed per location. City data enriched via LEFT JOIN dim_cities on geoname_id. Subtask 1/5: create new model (old models not yet removed). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,203 @@
|
||||
-- Unified location profile: both scores at (country_code, geoname_id) grain.
|
||||
-- Base: dim_locations (ALL GeoNames locations, pop ≥ 1K, ~140K rows).
|
||||
-- Enriched with dim_cities (city_slug, city_name, exact venue count) and
|
||||
-- venue_pricing_benchmarks (Playtomic pricing/occupancy).
|
||||
--
|
||||
-- Two scores per location:
|
||||
--
|
||||
-- Padelnomics Market Score (Marktreife-Score v3, 0–100):
|
||||
-- "How mature/established is this padel market?"
|
||||
-- Only meaningful for locations matched to a dim_cities row (city_slug IS NOT NULL)
|
||||
-- with padel venues. 0 for all other locations.
|
||||
--
|
||||
-- 40 pts supply development — log-scaled density (LN ceiling 20/100k) × count gate
|
||||
-- 25 pts demand evidence — occupancy when available; 40% density proxy otherwise
|
||||
-- 15 pts addressable market — log-scaled population, ceiling 1M
|
||||
-- 10 pts economic context — income PPS normalised to 200 ceiling
|
||||
-- 10 pts data quality — completeness discount
|
||||
--
|
||||
-- Padelnomics Opportunity Score (Marktpotenzial-Score v2, 0–100):
|
||||
-- "Where should I build a padel court?"
|
||||
-- Computed for ALL locations — zero-court locations score highest on supply gap.
|
||||
--
|
||||
-- 25 pts addressable market — log-scaled population, ceiling 500K
|
||||
-- 20 pts economic power — income PPS, normalised to 35,000
|
||||
-- 30 pts supply gap — inverted venue density; 0 courts = full marks
|
||||
-- 15 pts catchment gap — distance to nearest padel court
|
||||
-- 10 pts sports culture — tennis courts within 25km
|
||||
--
|
||||
-- Consumers query directly with WHERE filters:
|
||||
-- cities API: WHERE country_slug = ? AND city_slug IS NOT NULL
|
||||
-- opportunity API: WHERE country_slug = ? AND opportunity_score > 0
|
||||
-- planner_defaults: WHERE city_slug IS NOT NULL
|
||||
-- pseo_*: WHERE city_slug IS NOT NULL AND city_padel_venue_count > 0
|
||||
|
||||
MODEL (
|
||||
name serving.location_profiles,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, geoname_id)
|
||||
);
|
||||
|
||||
WITH
|
||||
-- All locations from dim_locations (superset)
|
||||
base AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
l.country_name_en,
|
||||
l.country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
l.lon,
|
||||
l.admin1_code,
|
||||
l.admin2_code,
|
||||
l.population,
|
||||
l.population_year,
|
||||
l.median_income_pps,
|
||||
l.income_year,
|
||||
l.padel_venue_count,
|
||||
l.padel_venues_per_100k,
|
||||
l.nearest_padel_court_km,
|
||||
l.tennis_courts_within_25km
|
||||
FROM foundation.dim_locations l
|
||||
),
|
||||
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
|
||||
-- QUALIFY handles rare multi-city-per-geoname collisions (keep highest venue count).
|
||||
city_match AS (
|
||||
SELECT
|
||||
c.country_code,
|
||||
c.geoname_id,
|
||||
c.city_slug,
|
||||
c.city_name,
|
||||
c.padel_venue_count AS city_padel_venue_count
|
||||
FROM foundation.dim_cities c
|
||||
WHERE c.geoname_id IS NOT NULL
|
||||
QUALIFY ROW_NUMBER() OVER (
|
||||
PARTITION BY c.country_code, c.geoname_id
|
||||
ORDER BY c.padel_venue_count DESC
|
||||
) = 1
|
||||
),
|
||||
-- Pricing / occupancy from Playtomic (via city_slug)
|
||||
with_pricing AS (
|
||||
SELECT
|
||||
b.*,
|
||||
cm.city_slug,
|
||||
cm.city_name,
|
||||
cm.city_padel_venue_count,
|
||||
vpb.median_hourly_rate,
|
||||
vpb.median_peak_rate,
|
||||
vpb.median_offpeak_rate,
|
||||
vpb.median_occupancy_rate,
|
||||
vpb.median_daily_revenue_per_venue,
|
||||
vpb.price_currency
|
||||
FROM base b
|
||||
LEFT JOIN city_match cm
|
||||
ON b.country_code = cm.country_code
|
||||
AND b.geoname_id = cm.geoname_id
|
||||
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
||||
ON cm.country_code = vpb.country_code
|
||||
AND cm.city_slug = vpb.city_slug
|
||||
),
|
||||
-- Both scores computed from the enriched base
|
||||
scored AS (
|
||||
SELECT *,
|
||||
-- City-level venue density (from dim_cities exact count, not dim_locations spatial 5km)
|
||||
CASE WHEN population > 0
|
||||
THEN ROUND(COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000, 2)
|
||||
ELSE NULL
|
||||
END AS city_venues_per_100k,
|
||||
-- Data confidence (for market_score)
|
||||
CASE
|
||||
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
|
||||
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
|
||||
ELSE 0.0
|
||||
END AS data_confidence,
|
||||
-- ── Market Score (Marktreife-Score v3) ──────────────────────────────────
|
||||
-- 0 when no city match or no venues (city_padel_venue_count NULL or 0)
|
||||
CASE WHEN COALESCE(city_padel_venue_count, 0) > 0 THEN
|
||||
ROUND(
|
||||
-- Supply development (40 pts)
|
||||
40.0 * LEAST(1.0, LN(
|
||||
COALESCE(
|
||||
CASE WHEN population > 0
|
||||
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
|
||||
ELSE 0 END
|
||||
, 0) + 1) / LN(21))
|
||||
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
|
||||
-- Demand evidence (25 pts)
|
||||
+ 25.0 * CASE
|
||||
WHEN median_occupancy_rate IS NOT NULL
|
||||
THEN LEAST(1.0, median_occupancy_rate / 0.65)
|
||||
ELSE 0.4 * LEAST(1.0, LN(
|
||||
COALESCE(
|
||||
CASE WHEN population > 0
|
||||
THEN COALESCE(city_padel_venue_count, 0)::DOUBLE / population * 100000
|
||||
ELSE 0 END
|
||||
, 0) + 1) / LN(21))
|
||||
* LEAST(1.0, COALESCE(city_padel_venue_count, 0) / 5.0)
|
||||
END
|
||||
-- Addressable market (15 pts)
|
||||
+ 15.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
|
||||
-- Economic context (10 pts)
|
||||
+ 10.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
|
||||
-- Data quality (10 pts)
|
||||
+ 10.0 * CASE
|
||||
WHEN population > 0 AND COALESCE(city_padel_venue_count, 0) > 0 THEN 1.0
|
||||
WHEN population > 0 OR COALESCE(city_padel_venue_count, 0) > 0 THEN 0.5
|
||||
ELSE 0.0
|
||||
END
|
||||
, 1)
|
||||
ELSE 0
|
||||
END AS market_score,
|
||||
-- ── Opportunity Score (Marktpotenzial-Score v2) ────────────────────────
|
||||
ROUND(
|
||||
-- Addressable market (25 pts): ceiling 500K
|
||||
25.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000))
|
||||
-- Economic power (20 pts): income PPS normalised to 35,000
|
||||
+ 20.0 * LEAST(1.0, COALESCE(median_income_pps, 15000) / 35000.0)
|
||||
-- Supply gap (30 pts): inverted venue density
|
||||
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(padel_venues_per_100k, 0) / 8.0)
|
||||
-- Catchment gap (15 pts): distance to nearest court
|
||||
+ 15.0 * COALESCE(LEAST(1.0, nearest_padel_court_km / 30.0), 0.5)
|
||||
-- Sports culture (10 pts): tennis courts within 25km
|
||||
+ 10.0 * LEAST(1.0, tennis_courts_within_25km / 10.0)
|
||||
, 1) AS opportunity_score
|
||||
FROM with_pricing
|
||||
)
|
||||
SELECT
|
||||
s.geoname_id,
|
||||
s.country_code,
|
||||
s.country_name_en,
|
||||
s.country_slug,
|
||||
s.location_name,
|
||||
s.location_slug,
|
||||
s.city_slug,
|
||||
s.city_name,
|
||||
s.lat,
|
||||
s.lon,
|
||||
s.admin1_code,
|
||||
s.admin2_code,
|
||||
s.population,
|
||||
s.population_year,
|
||||
s.median_income_pps,
|
||||
s.income_year,
|
||||
s.padel_venue_count,
|
||||
s.padel_venues_per_100k,
|
||||
s.nearest_padel_court_km,
|
||||
s.tennis_courts_within_25km,
|
||||
s.city_padel_venue_count,
|
||||
s.city_venues_per_100k,
|
||||
s.data_confidence,
|
||||
s.market_score,
|
||||
s.opportunity_score,
|
||||
s.median_hourly_rate,
|
||||
s.median_peak_rate,
|
||||
s.median_offpeak_rate,
|
||||
s.median_occupancy_rate,
|
||||
s.median_daily_revenue_per_venue,
|
||||
s.price_currency,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM scored s
|
||||
ORDER BY s.market_score DESC, s.opportunity_score DESC
|
||||
Reference in New Issue
Block a user