Three deviations from the quart_saas_boilerplate methodology corrected:
1. Fix dim_cities LIKE join (data quality bug)
- Old: FROM eurostat_cities LEFT JOIN venue_counts LIKE '%country_code%'
→ cartesian product (2.6M rows vs ~5500 expected)
- New: FROM venue_cities (dim_venues) as primary table, Eurostat for
enrichment only. grain (country_code, city_slug).
- Also fixes REGEXP_REPLACE to LOWER() before regex so uppercase city
names aren't stripped to '-'
2. Rename fct_venue_capacity → dim_venue_capacity
- Static venue attributes with no time key are a dimension, not a fact
- No SQL logic changes; update fct_daily_availability reference
3. Add fct_availability_slot at event grain
- New: grain (snapshot_date, tenant_id, resource_id, slot_start_time)
- Recheck dedup logic moves here from fct_daily_availability
- fct_daily_availability now reads fct_availability_slot (cleaner DAG)
Downstream fixes:
- city_market_profile, planner_defaults grain → (country_code, city_slug)
- pseo_city_costs_de, pseo_city_pricing add city_key composite natural key
(country_slug || '-' || city_slug) to avoid URL collisions across countries
- planner_defaults join in pseo_city_costs_de uses both country_code + city_slug
- Templates updated: natural_key city_slug → city_key
Added transform/sqlmesh_padelnomics/CLAUDE.md documenting data modeling rules,
conformed dimension map, and source integration architecture.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
136 lines
4.5 KiB
SQL
136 lines
4.5 KiB
SQL
-- Per-city planner defaults for the financial calculator.
|
|
-- When a user selects a city in the planner, these values pre-fill the inputs.
|
|
-- Consumed by: padelnomics.planner.routes — /api/market-data endpoint.
|
|
--
|
|
-- 3-tier data cascade:
|
|
-- 1. City-level: real pricing/occupancy from Playtomic availability snapshots
|
|
-- 2. Country-level: median across cities in same country
|
|
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
|
|
--
|
|
-- Units are explicit in column names. Monetary values in local currency.
|
|
|
|
MODEL (
|
|
name serving.planner_defaults,
|
|
kind FULL,
|
|
cron '@daily',
|
|
grain (country_code, city_slug)
|
|
);
|
|
|
|
WITH -- Real city-level benchmarks from Playtomic
|
|
city_benchmarks AS (
|
|
SELECT
|
|
country_code,
|
|
city,
|
|
median_peak_rate,
|
|
median_offpeak_rate,
|
|
median_occupancy_rate,
|
|
median_daily_revenue_per_venue,
|
|
median_court_count,
|
|
venue_count,
|
|
total_venue_days_observed,
|
|
price_currency
|
|
FROM serving.venue_pricing_benchmarks
|
|
),
|
|
-- Country-level medians (fallback when a city has no availability data)
|
|
country_benchmarks AS (
|
|
SELECT
|
|
country_code,
|
|
MEDIAN(median_peak_rate) AS median_peak_rate,
|
|
MEDIAN(median_offpeak_rate) AS median_offpeak_rate,
|
|
MEDIAN(median_occupancy_rate) AS median_occupancy_rate,
|
|
MEDIAN(median_court_count) AS median_court_count,
|
|
SUM(venue_count) AS total_venues,
|
|
MIN(price_currency) AS price_currency
|
|
FROM city_benchmarks
|
|
GROUP BY country_code
|
|
),
|
|
-- Hardcoded global fallbacks (only for countries with zero Playtomic coverage)
|
|
hardcoded_fallbacks AS (
|
|
SELECT * FROM (VALUES
|
|
('DE', 22.0, 16.5, 0.55, 4, 'EUR'),
|
|
('ES', 16.0, 12.0, 0.62, 6, 'EUR'),
|
|
('GB', 24.0, 18.0, 0.52, 4, 'GBP'),
|
|
('FR', 18.0, 13.5, 0.58, 5, 'EUR'),
|
|
('IT', 15.0, 11.0, 0.60, 6, 'EUR'),
|
|
('PT', 12.0, 9.0, 0.65, 6, 'EUR'),
|
|
('AT', 20.0, 15.0, 0.54, 4, 'EUR'),
|
|
('CH', 28.0, 21.0, 0.50, 4, 'CHF'),
|
|
('NL', 20.0, 15.0, 0.56, 4, 'EUR'),
|
|
('BE', 18.0, 13.5, 0.57, 4, 'EUR'),
|
|
('SE', 22.0, 16.5, 0.50, 4, 'SEK'),
|
|
('US', 20.0, 15.0, 0.58, 6, 'USD'),
|
|
('MX', 12.0, 9.0, 0.55, 4, 'MXN'),
|
|
('AR', 10.0, 7.5, 0.60, 4, 'ARS'),
|
|
('DK', 24.0, 18.0, 0.48, 4, 'DKK'),
|
|
('NO', 26.0, 19.5, 0.45, 4, 'NOK'),
|
|
('FI', 22.0, 16.5, 0.48, 4, 'EUR')
|
|
) AS t(country_code, peak_rate, offpeak_rate, occupancy, courts, currency)
|
|
),
|
|
city_profiles AS (
|
|
SELECT
|
|
city_slug,
|
|
country_code,
|
|
city_name,
|
|
padel_venue_count,
|
|
population,
|
|
market_score,
|
|
venues_per_100k
|
|
FROM serving.city_market_profile
|
|
)
|
|
SELECT
|
|
cp.city_slug,
|
|
cp.country_code,
|
|
cp.city_name,
|
|
cp.padel_venue_count,
|
|
cp.population,
|
|
cp.market_score,
|
|
-- Peak rate: city → country → hardcoded
|
|
ROUND(COALESCE(
|
|
cb.median_peak_rate,
|
|
ctb.median_peak_rate,
|
|
hf.peak_rate
|
|
), 2) AS rate_peak,
|
|
-- Off-peak rate
|
|
ROUND(COALESCE(
|
|
cb.median_offpeak_rate,
|
|
ctb.median_offpeak_rate,
|
|
hf.offpeak_rate
|
|
), 2) AS rate_off_peak,
|
|
-- Occupancy (utilisation)
|
|
ROUND(COALESCE(
|
|
cb.median_occupancy_rate,
|
|
ctb.median_occupancy_rate,
|
|
hf.occupancy
|
|
), 4) AS avg_utilisation_pct,
|
|
-- Typical court count
|
|
COALESCE(
|
|
cb.median_court_count,
|
|
ctb.median_court_count,
|
|
hf.courts
|
|
) AS courts_typical,
|
|
-- Revenue estimate (city-level only)
|
|
cb.median_daily_revenue_per_venue AS daily_revenue_per_venue,
|
|
-- Data provenance
|
|
CASE
|
|
WHEN cb.venue_count IS NOT NULL THEN 'city_data'
|
|
WHEN ctb.total_venues IS NOT NULL THEN 'country_data'
|
|
ELSE 'hardcoded'
|
|
END AS data_source,
|
|
CASE
|
|
WHEN cb.total_venue_days_observed >= 100 THEN 1.0
|
|
WHEN cb.total_venue_days_observed >= 30 THEN 0.8
|
|
WHEN cb.venue_count IS NOT NULL THEN 0.6
|
|
WHEN ctb.total_venues IS NOT NULL THEN 0.4
|
|
ELSE 0.2
|
|
END AS data_confidence,
|
|
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
|
|
CURRENT_DATE AS refreshed_date
|
|
FROM city_profiles cp
|
|
LEFT JOIN city_benchmarks cb
|
|
ON cp.country_code = cb.country_code
|
|
AND LOWER(TRIM(cp.city_name)) = LOWER(TRIM(cb.city))
|
|
LEFT JOIN country_benchmarks ctb
|
|
ON cp.country_code = ctb.country_code
|
|
LEFT JOIN hardcoded_fallbacks hf
|
|
ON cp.country_code = hf.country_code
|