Files
Deeman 4e82907a70 refactor(transform): conform geographic dimension hierarchy via city_slug
Propagates the conformed city key (city_slug) from dim_venues through the
full pricing pipeline, eliminating 3 fragile LOWER(TRIM(...)) fuzzy string
joins with deterministic key joins.

Changes (cascading, task-by-task):
- dim_venues: add city_slug computed column (REGEXP_REPLACE slug derivation)
- dim_venue_capacity: join foundation.dim_venues instead of stg_playtomic_venues;
  carry city_slug alongside country_code/city
- fct_daily_availability: carry city_slug from dim_venue_capacity
- venue_pricing_benchmarks: carry city_slug from fct_daily_availability;
  add to venue_stats GROUP BY and final SELECT/GROUP BY
- city_market_profile: join vpb on city_slug = city_slug (was LOWER(TRIM))
- planner_defaults: add city_slug to city_benchmarks CTE; join on city_slug
- pseo_city_pricing: join city_market_profile on city_slug (was LOWER(TRIM))
- pipeline_routes._DAG: dim_venue_capacity now depends on dim_venues, not stg_playtomic_venues

Result: dim_venues.city_slug → dim_cities.(country_code, city_slug) forms a
fully conformed geographic hierarchy with no fuzzy string comparisons.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-27 13:23:03 +01:00

137 lines
4.5 KiB
SQL

-- Per-city planner defaults for the financial calculator.
-- When a user selects a city in the planner, these values pre-fill the inputs.
-- Consumed by: padelnomics.planner.routes — /api/market-data endpoint.
--
-- 3-tier data cascade:
-- 1. City-level: real pricing/occupancy from Playtomic availability snapshots
-- 2. Country-level: median across cities in same country
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
--
-- Units are explicit in column names. Monetary values in local currency.
MODEL (
name serving.planner_defaults,
kind FULL,
cron '@daily',
grain (country_code, city_slug)
);
WITH -- Real city-level benchmarks from Playtomic
city_benchmarks AS (
SELECT
country_code,
city,
city_slug,
median_peak_rate,
median_offpeak_rate,
median_occupancy_rate,
median_daily_revenue_per_venue,
median_court_count,
venue_count,
total_venue_days_observed,
price_currency
FROM serving.venue_pricing_benchmarks
),
-- Country-level medians (fallback when a city has no availability data)
country_benchmarks AS (
SELECT
country_code,
MEDIAN(median_peak_rate) AS median_peak_rate,
MEDIAN(median_offpeak_rate) AS median_offpeak_rate,
MEDIAN(median_occupancy_rate) AS median_occupancy_rate,
MEDIAN(median_court_count) AS median_court_count,
SUM(venue_count) AS total_venues,
MIN(price_currency) AS price_currency
FROM city_benchmarks
GROUP BY country_code
),
-- Hardcoded global fallbacks (only for countries with zero Playtomic coverage)
hardcoded_fallbacks AS (
SELECT * FROM (VALUES
('DE', 22.0, 16.5, 0.55, 4, 'EUR'),
('ES', 16.0, 12.0, 0.62, 6, 'EUR'),
('GB', 24.0, 18.0, 0.52, 4, 'GBP'),
('FR', 18.0, 13.5, 0.58, 5, 'EUR'),
('IT', 15.0, 11.0, 0.60, 6, 'EUR'),
('PT', 12.0, 9.0, 0.65, 6, 'EUR'),
('AT', 20.0, 15.0, 0.54, 4, 'EUR'),
('CH', 28.0, 21.0, 0.50, 4, 'CHF'),
('NL', 20.0, 15.0, 0.56, 4, 'EUR'),
('BE', 18.0, 13.5, 0.57, 4, 'EUR'),
('SE', 22.0, 16.5, 0.50, 4, 'SEK'),
('US', 20.0, 15.0, 0.58, 6, 'USD'),
('MX', 12.0, 9.0, 0.55, 4, 'MXN'),
('AR', 10.0, 7.5, 0.60, 4, 'ARS'),
('DK', 24.0, 18.0, 0.48, 4, 'DKK'),
('NO', 26.0, 19.5, 0.45, 4, 'NOK'),
('FI', 22.0, 16.5, 0.48, 4, 'EUR')
) AS t(country_code, peak_rate, offpeak_rate, occupancy, courts, currency)
),
city_profiles AS (
SELECT
city_slug,
country_code,
city_name,
padel_venue_count,
population,
market_score,
venues_per_100k
FROM serving.city_market_profile
)
SELECT
cp.city_slug,
cp.country_code,
cp.city_name,
cp.padel_venue_count,
cp.population,
cp.market_score,
-- Peak rate: city → country → hardcoded
ROUND(COALESCE(
cb.median_peak_rate,
ctb.median_peak_rate,
hf.peak_rate
), 2) AS rate_peak,
-- Off-peak rate
ROUND(COALESCE(
cb.median_offpeak_rate,
ctb.median_offpeak_rate,
hf.offpeak_rate
), 2) AS rate_off_peak,
-- Occupancy (utilisation)
ROUND(COALESCE(
cb.median_occupancy_rate,
ctb.median_occupancy_rate,
hf.occupancy
), 4) AS avg_utilisation_pct,
-- Typical court count
COALESCE(
cb.median_court_count,
ctb.median_court_count,
hf.courts
) AS courts_typical,
-- Revenue estimate (city-level only)
cb.median_daily_revenue_per_venue AS daily_revenue_per_venue,
-- Data provenance
CASE
WHEN cb.venue_count IS NOT NULL THEN 'city_data'
WHEN ctb.total_venues IS NOT NULL THEN 'country_data'
ELSE 'hardcoded'
END AS data_source,
CASE
WHEN cb.total_venue_days_observed >= 100 THEN 1.0
WHEN cb.total_venue_days_observed >= 30 THEN 0.8
WHEN cb.venue_count IS NOT NULL THEN 0.6
WHEN ctb.total_venues IS NOT NULL THEN 0.4
ELSE 0.2
END AS data_confidence,
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
CURRENT_DATE AS refreshed_date
FROM city_profiles cp
LEFT JOIN city_benchmarks cb
ON cp.country_code = cb.country_code
AND cp.city_slug = cb.city_slug
LEFT JOIN country_benchmarks ctb
ON cp.country_code = ctb.country_code
LEFT JOIN hardcoded_fallbacks hf
ON cp.country_code = hf.country_code