Propagates the conformed city key (city_slug) from dim_venues through the full pricing pipeline, eliminating 3 fragile LOWER(TRIM(...)) fuzzy string joins with deterministic key joins. Changes (cascading, task-by-task): - dim_venues: add city_slug computed column (REGEXP_REPLACE slug derivation) - dim_venue_capacity: join foundation.dim_venues instead of stg_playtomic_venues; carry city_slug alongside country_code/city - fct_daily_availability: carry city_slug from dim_venue_capacity - venue_pricing_benchmarks: carry city_slug from fct_daily_availability; add to venue_stats GROUP BY and final SELECT/GROUP BY - city_market_profile: join vpb on city_slug = city_slug (was LOWER(TRIM)) - planner_defaults: add city_slug to city_benchmarks CTE; join on city_slug - pseo_city_pricing: join city_market_profile on city_slug (was LOWER(TRIM)) - pipeline_routes._DAG: dim_venue_capacity now depends on dim_venues, not stg_playtomic_venues Result: dim_venues.city_slug → dim_cities.(country_code, city_slug) forms a fully conformed geographic hierarchy with no fuzzy string comparisons. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
137 lines
4.5 KiB
SQL
137 lines
4.5 KiB
SQL
-- Per-city planner defaults for the financial calculator.
|
|
-- When a user selects a city in the planner, these values pre-fill the inputs.
|
|
-- Consumed by: padelnomics.planner.routes — /api/market-data endpoint.
|
|
--
|
|
-- 3-tier data cascade:
|
|
-- 1. City-level: real pricing/occupancy from Playtomic availability snapshots
|
|
-- 2. Country-level: median across cities in same country
|
|
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
|
|
--
|
|
-- Units are explicit in column names. Monetary values in local currency.
|
|
|
|
MODEL (
|
|
name serving.planner_defaults,
|
|
kind FULL,
|
|
cron '@daily',
|
|
grain (country_code, city_slug)
|
|
);
|
|
|
|
WITH -- Real city-level benchmarks from Playtomic
|
|
city_benchmarks AS (
|
|
SELECT
|
|
country_code,
|
|
city,
|
|
city_slug,
|
|
median_peak_rate,
|
|
median_offpeak_rate,
|
|
median_occupancy_rate,
|
|
median_daily_revenue_per_venue,
|
|
median_court_count,
|
|
venue_count,
|
|
total_venue_days_observed,
|
|
price_currency
|
|
FROM serving.venue_pricing_benchmarks
|
|
),
|
|
-- Country-level medians (fallback when a city has no availability data)
|
|
country_benchmarks AS (
|
|
SELECT
|
|
country_code,
|
|
MEDIAN(median_peak_rate) AS median_peak_rate,
|
|
MEDIAN(median_offpeak_rate) AS median_offpeak_rate,
|
|
MEDIAN(median_occupancy_rate) AS median_occupancy_rate,
|
|
MEDIAN(median_court_count) AS median_court_count,
|
|
SUM(venue_count) AS total_venues,
|
|
MIN(price_currency) AS price_currency
|
|
FROM city_benchmarks
|
|
GROUP BY country_code
|
|
),
|
|
-- Hardcoded global fallbacks (only for countries with zero Playtomic coverage)
|
|
hardcoded_fallbacks AS (
|
|
SELECT * FROM (VALUES
|
|
('DE', 22.0, 16.5, 0.55, 4, 'EUR'),
|
|
('ES', 16.0, 12.0, 0.62, 6, 'EUR'),
|
|
('GB', 24.0, 18.0, 0.52, 4, 'GBP'),
|
|
('FR', 18.0, 13.5, 0.58, 5, 'EUR'),
|
|
('IT', 15.0, 11.0, 0.60, 6, 'EUR'),
|
|
('PT', 12.0, 9.0, 0.65, 6, 'EUR'),
|
|
('AT', 20.0, 15.0, 0.54, 4, 'EUR'),
|
|
('CH', 28.0, 21.0, 0.50, 4, 'CHF'),
|
|
('NL', 20.0, 15.0, 0.56, 4, 'EUR'),
|
|
('BE', 18.0, 13.5, 0.57, 4, 'EUR'),
|
|
('SE', 22.0, 16.5, 0.50, 4, 'SEK'),
|
|
('US', 20.0, 15.0, 0.58, 6, 'USD'),
|
|
('MX', 12.0, 9.0, 0.55, 4, 'MXN'),
|
|
('AR', 10.0, 7.5, 0.60, 4, 'ARS'),
|
|
('DK', 24.0, 18.0, 0.48, 4, 'DKK'),
|
|
('NO', 26.0, 19.5, 0.45, 4, 'NOK'),
|
|
('FI', 22.0, 16.5, 0.48, 4, 'EUR')
|
|
) AS t(country_code, peak_rate, offpeak_rate, occupancy, courts, currency)
|
|
),
|
|
city_profiles AS (
|
|
SELECT
|
|
city_slug,
|
|
country_code,
|
|
city_name,
|
|
padel_venue_count,
|
|
population,
|
|
market_score,
|
|
venues_per_100k
|
|
FROM serving.city_market_profile
|
|
)
|
|
SELECT
|
|
cp.city_slug,
|
|
cp.country_code,
|
|
cp.city_name,
|
|
cp.padel_venue_count,
|
|
cp.population,
|
|
cp.market_score,
|
|
-- Peak rate: city → country → hardcoded
|
|
ROUND(COALESCE(
|
|
cb.median_peak_rate,
|
|
ctb.median_peak_rate,
|
|
hf.peak_rate
|
|
), 2) AS rate_peak,
|
|
-- Off-peak rate
|
|
ROUND(COALESCE(
|
|
cb.median_offpeak_rate,
|
|
ctb.median_offpeak_rate,
|
|
hf.offpeak_rate
|
|
), 2) AS rate_off_peak,
|
|
-- Occupancy (utilisation)
|
|
ROUND(COALESCE(
|
|
cb.median_occupancy_rate,
|
|
ctb.median_occupancy_rate,
|
|
hf.occupancy
|
|
), 4) AS avg_utilisation_pct,
|
|
-- Typical court count
|
|
COALESCE(
|
|
cb.median_court_count,
|
|
ctb.median_court_count,
|
|
hf.courts
|
|
) AS courts_typical,
|
|
-- Revenue estimate (city-level only)
|
|
cb.median_daily_revenue_per_venue AS daily_revenue_per_venue,
|
|
-- Data provenance
|
|
CASE
|
|
WHEN cb.venue_count IS NOT NULL THEN 'city_data'
|
|
WHEN ctb.total_venues IS NOT NULL THEN 'country_data'
|
|
ELSE 'hardcoded'
|
|
END AS data_source,
|
|
CASE
|
|
WHEN cb.total_venue_days_observed >= 100 THEN 1.0
|
|
WHEN cb.total_venue_days_observed >= 30 THEN 0.8
|
|
WHEN cb.venue_count IS NOT NULL THEN 0.6
|
|
WHEN ctb.total_venues IS NOT NULL THEN 0.4
|
|
ELSE 0.2
|
|
END AS data_confidence,
|
|
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
|
|
CURRENT_DATE AS refreshed_date
|
|
FROM city_profiles cp
|
|
LEFT JOIN city_benchmarks cb
|
|
ON cp.country_code = cb.country_code
|
|
AND cp.city_slug = cb.city_slug
|
|
LEFT JOIN country_benchmarks ctb
|
|
ON cp.country_code = ctb.country_code
|
|
LEFT JOIN hardcoded_fallbacks hf
|
|
ON cp.country_code = hf.country_code
|