Merge branch 'worktree-extraction-overhaul'
# Conflicts: # transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql # transform/sqlmesh_padelnomics/models/staging/stg_playtomic_venues.sql
This commit is contained in:
@@ -31,6 +31,22 @@ venue_counts AS (
|
||||
FROM foundation.dim_venues
|
||||
WHERE city IS NOT NULL AND city != ''
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Eurostat city label mapping to canonical city names
|
||||
-- (Eurostat uses codes like DE001C → Berlin; we keep both)
|
||||
eurostat_labels AS (
|
||||
SELECT DISTINCT
|
||||
city_code,
|
||||
country_code,
|
||||
-- Derive a slug-friendly city name from the code as fallback
|
||||
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
|
||||
FROM eurostat_cities
|
||||
),
|
||||
-- Country-level median income (latest year per country)
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
)
|
||||
SELECT
|
||||
ec.city_code,
|
||||
@@ -43,8 +59,12 @@ SELECT
|
||||
COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon,
|
||||
ec.population,
|
||||
ec.ref_year AS population_year,
|
||||
COALESCE(vc.venue_count, 0) AS padel_venue_count
|
||||
COALESCE(vc.venue_count, 0) AS padel_venue_count,
|
||||
ci.median_income_pps,
|
||||
ci.income_year
|
||||
FROM eurostat_cities ec
|
||||
LEFT JOIN venue_counts vc
|
||||
ON ec.country_code = vc.country_code
|
||||
AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'
|
||||
LEFT JOIN country_income ci
|
||||
ON ec.country_code = ci.country_code
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
-- Venues from both sources are unioned; near-duplicates (within ~100m) are
|
||||
-- collapsed to a single record preferring Playtomic data (richer metadata).
|
||||
-- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km.
|
||||
--
|
||||
-- Playtomic venues include court counts, indoor/outdoor split, currency, and timezone.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_venues,
|
||||
@@ -10,51 +12,74 @@ MODEL (
|
||||
grain venue_id
|
||||
);
|
||||
|
||||
WITH all_venues AS (
|
||||
WITH playtomic_venues AS (
|
||||
SELECT
|
||||
'osm:' || osm_id::TEXT AS venue_id,
|
||||
source,
|
||||
'pt:' || v.tenant_id AS venue_id,
|
||||
v.tenant_id,
|
||||
'playtomic' AS source,
|
||||
v.lat,
|
||||
v.lon,
|
||||
v.country_code,
|
||||
v.name,
|
||||
v.city,
|
||||
v.postcode,
|
||||
v.tenant_type,
|
||||
v.timezone,
|
||||
v.vat_rate,
|
||||
v.default_currency,
|
||||
-- Court counts from resources
|
||||
COUNT(r.resource_id) AS court_count,
|
||||
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'indoor') AS indoor_court_count,
|
||||
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'outdoor') AS outdoor_court_count,
|
||||
v.extracted_date
|
||||
FROM staging.stg_playtomic_venues v
|
||||
LEFT JOIN staging.stg_playtomic_resources r
|
||||
ON v.tenant_id = r.tenant_id AND r.is_active = TRUE
|
||||
WHERE v.country_code IS NOT NULL
|
||||
GROUP BY
|
||||
v.tenant_id, v.lat, v.lon, v.country_code, v.name, v.city,
|
||||
v.postcode, v.tenant_type, v.timezone, v.vat_rate,
|
||||
v.default_currency, v.extracted_date
|
||||
),
|
||||
osm_venues AS (
|
||||
SELECT
|
||||
'osm:' || osm_id::TEXT AS venue_id,
|
||||
NULL AS tenant_id,
|
||||
'osm' AS source,
|
||||
lat,
|
||||
lon,
|
||||
country_code,
|
||||
name,
|
||||
city,
|
||||
postcode,
|
||||
NULL AS tenant_type,
|
||||
NULL AS tenant_type,
|
||||
NULL AS timezone,
|
||||
NULL AS vat_rate,
|
||||
NULL AS default_currency,
|
||||
NULL AS court_count,
|
||||
NULL AS indoor_court_count,
|
||||
NULL AS outdoor_court_count,
|
||||
extracted_date
|
||||
FROM staging.stg_padel_courts
|
||||
WHERE country_code IS NOT NULL
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'pt:' || tenant_id AS venue_id,
|
||||
source,
|
||||
lat,
|
||||
lon,
|
||||
country_code,
|
||||
name,
|
||||
city,
|
||||
postcode,
|
||||
tenant_type,
|
||||
extracted_date
|
||||
FROM staging.stg_playtomic_venues
|
||||
WHERE country_code IS NOT NULL
|
||||
),
|
||||
-- Rank venues so Playtomic records win ties in proximity dedup
|
||||
all_venues AS (
|
||||
SELECT * FROM playtomic_venues
|
||||
UNION ALL
|
||||
SELECT * FROM osm_venues
|
||||
),
|
||||
ranked AS (
|
||||
SELECT *,
|
||||
CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank
|
||||
FROM all_venues
|
||||
)
|
||||
-- Note: full proximity dedup (haversine clustering) is expensive in SQL.
|
||||
-- For now, deduplicate on exact (country_code, ROUND(lat,3), ROUND(lon,3))
|
||||
-- — ≈111m grid cells. Refine with spatial index if volumes grow.
|
||||
-- Deduplicate on ~111m grid cells, preferring Playtomic
|
||||
SELECT
|
||||
MIN(venue_id) OVER (
|
||||
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
|
||||
ORDER BY source_rank
|
||||
) AS venue_id,
|
||||
) AS venue_id,
|
||||
tenant_id,
|
||||
country_code,
|
||||
lat,
|
||||
lon,
|
||||
@@ -62,11 +87,17 @@ SELECT
|
||||
MAX(CASE WHEN source = 'playtomic' THEN name END)
|
||||
OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT),
|
||||
name
|
||||
) AS name,
|
||||
COALESCE(city, '') AS city,
|
||||
) AS name,
|
||||
COALESCE(city, '') AS city,
|
||||
postcode,
|
||||
source,
|
||||
tenant_type,
|
||||
timezone,
|
||||
vat_rate,
|
||||
default_currency,
|
||||
court_count,
|
||||
indoor_court_count,
|
||||
outdoor_court_count,
|
||||
extracted_date
|
||||
FROM ranked
|
||||
QUALIFY ROW_NUMBER() OVER (
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
-- Daily venue-level availability, pricing, occupancy, and revenue estimates.
|
||||
-- Aggregates slot-level data from stg_playtomic_availability into per-venue
|
||||
-- per-day statistics, then calculates occupancy by comparing available hours
|
||||
-- against total capacity from fct_venue_capacity.
|
||||
--
|
||||
-- Occupancy = 1 - (available_court_hours / capacity_court_hours_per_day)
|
||||
-- Revenue estimate = booked_court_hours × avg_price_of_available_slots
|
||||
--
|
||||
-- Peak hours defined as 17:00–21:00 (captures main evening rush across markets).
|
||||
|
||||
MODEL (
|
||||
name foundation.fct_daily_availability,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (snapshot_date, tenant_id)
|
||||
);
|
||||
|
||||
WITH slot_agg AS (
|
||||
SELECT
|
||||
a.snapshot_date,
|
||||
a.tenant_id,
|
||||
-- Slot counts: each row is one 60-min available slot on one court
|
||||
COUNT(*) AS available_slot_count,
|
||||
COUNT(DISTINCT a.resource_id) AS courts_with_availability,
|
||||
-- Available (unbooked) court-hours: slots are on 30-min increments for 60-min bookings
|
||||
-- Each available start_time represents a 60-min bookable window
|
||||
ROUND(COUNT(*) * 1.0, 2) AS available_court_hours,
|
||||
-- Pricing stats (60-min slots only)
|
||||
ROUND(MEDIAN(a.price_amount), 2) AS median_price,
|
||||
ROUND(AVG(a.price_amount), 2) AS avg_price,
|
||||
MIN(a.price_amount) AS min_price,
|
||||
MAX(a.price_amount) AS max_price,
|
||||
-- Peak: 17:00–21:00
|
||||
ROUND(MEDIAN(a.price_amount) FILTER (
|
||||
WHERE a.slot_start_time::TIME >= '17:00:00'
|
||||
AND a.slot_start_time::TIME < '21:00:00'
|
||||
), 2) AS median_price_peak,
|
||||
-- Off-peak: everything outside 17:00–21:00
|
||||
ROUND(MEDIAN(a.price_amount) FILTER (
|
||||
WHERE a.slot_start_time::TIME < '17:00:00'
|
||||
OR a.slot_start_time::TIME >= '21:00:00'
|
||||
), 2) AS median_price_offpeak,
|
||||
MAX(a.price_currency) AS price_currency,
|
||||
MAX(a.captured_at_utc) AS captured_at_utc
|
||||
FROM staging.stg_playtomic_availability a
|
||||
WHERE a.price_amount IS NOT NULL
|
||||
AND a.price_amount > 0
|
||||
GROUP BY a.snapshot_date, a.tenant_id
|
||||
)
|
||||
SELECT
|
||||
sa.snapshot_date,
|
||||
sa.tenant_id,
|
||||
cap.country_code,
|
||||
cap.city,
|
||||
cap.active_court_count,
|
||||
cap.capacity_court_hours_per_day,
|
||||
sa.available_slot_count,
|
||||
sa.courts_with_availability,
|
||||
sa.available_court_hours,
|
||||
-- Occupancy: (capacity - available) / capacity
|
||||
CASE
|
||||
WHEN cap.capacity_court_hours_per_day > 0
|
||||
THEN ROUND(
|
||||
1.0 - (sa.available_court_hours / cap.capacity_court_hours_per_day),
|
||||
4
|
||||
)
|
||||
ELSE NULL
|
||||
END AS occupancy_rate,
|
||||
-- Estimated booked court-hours
|
||||
ROUND(
|
||||
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0),
|
||||
2
|
||||
) AS booked_court_hours,
|
||||
-- Estimated daily revenue: booked hours × avg price
|
||||
ROUND(
|
||||
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0)
|
||||
* sa.avg_price,
|
||||
2
|
||||
) AS estimated_revenue_eur,
|
||||
-- Pricing
|
||||
sa.median_price,
|
||||
sa.avg_price,
|
||||
sa.min_price,
|
||||
sa.max_price,
|
||||
sa.median_price_peak,
|
||||
sa.median_price_offpeak,
|
||||
sa.price_currency,
|
||||
sa.captured_at_utc
|
||||
FROM slot_agg sa
|
||||
JOIN foundation.fct_venue_capacity cap ON sa.tenant_id = cap.tenant_id
|
||||
@@ -0,0 +1,45 @@
|
||||
-- Venue capacity: total bookable court-hours per day and week.
|
||||
-- Derived from active court count × opening hours.
|
||||
-- Used as the denominator for occupancy rate in fct_daily_availability.
|
||||
--
|
||||
-- One row per venue (Playtomic tenant).
|
||||
|
||||
MODEL (
|
||||
name foundation.fct_venue_capacity,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain tenant_id
|
||||
);
|
||||
|
||||
WITH weekly_hours AS (
|
||||
SELECT
|
||||
tenant_id,
|
||||
SUM(hours_open) AS hours_open_per_week,
|
||||
AVG(hours_open) AS avg_hours_open_per_day,
|
||||
COUNT(*) AS days_open_per_week
|
||||
FROM staging.stg_playtomic_opening_hours
|
||||
GROUP BY tenant_id
|
||||
),
|
||||
court_counts AS (
|
||||
SELECT
|
||||
tenant_id,
|
||||
COUNT(*) AS active_court_count
|
||||
FROM staging.stg_playtomic_resources
|
||||
WHERE is_active = TRUE
|
||||
GROUP BY tenant_id
|
||||
)
|
||||
SELECT
|
||||
v.tenant_id,
|
||||
v.country_code,
|
||||
v.city,
|
||||
cc.active_court_count,
|
||||
ROUND(wh.hours_open_per_week, 1) AS hours_open_per_week,
|
||||
ROUND(wh.avg_hours_open_per_day, 1) AS avg_hours_open_per_day,
|
||||
wh.days_open_per_week,
|
||||
-- Total bookable court-hours per day (capacity denominator for occupancy)
|
||||
ROUND(cc.active_court_count * wh.avg_hours_open_per_day, 1) AS capacity_court_hours_per_day,
|
||||
-- Total bookable court-hours per week
|
||||
ROUND(cc.active_court_count * wh.hours_open_per_week, 1) AS capacity_court_hours_per_week
|
||||
FROM staging.stg_playtomic_venues v
|
||||
JOIN court_counts cc ON v.tenant_id = cc.tenant_id
|
||||
JOIN weekly_hours wh ON v.tenant_id = wh.tenant_id
|
||||
@@ -24,6 +24,8 @@ WITH base AS (
|
||||
c.population,
|
||||
c.population_year,
|
||||
c.padel_venue_count,
|
||||
c.median_income_pps,
|
||||
c.income_year,
|
||||
-- Venue density: padel venues per 100K residents
|
||||
CASE WHEN c.population > 0
|
||||
THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2)
|
||||
@@ -51,18 +53,30 @@ scored AS (
|
||||
FROM base
|
||||
)
|
||||
SELECT
|
||||
city_code,
|
||||
country_code,
|
||||
city_name,
|
||||
city_slug,
|
||||
lat,
|
||||
lon,
|
||||
population,
|
||||
population_year,
|
||||
padel_venue_count,
|
||||
venues_per_100k,
|
||||
data_confidence,
|
||||
market_score,
|
||||
s.city_code,
|
||||
s.country_code,
|
||||
s.city_name,
|
||||
s.city_slug,
|
||||
s.lat,
|
||||
s.lon,
|
||||
s.population,
|
||||
s.population_year,
|
||||
s.padel_venue_count,
|
||||
s.venues_per_100k,
|
||||
s.data_confidence,
|
||||
s.market_score,
|
||||
s.median_income_pps,
|
||||
s.income_year,
|
||||
-- Playtomic pricing/occupancy (NULL when no availability data)
|
||||
vpb.median_hourly_rate,
|
||||
vpb.median_peak_rate,
|
||||
vpb.median_offpeak_rate,
|
||||
vpb.median_occupancy_rate,
|
||||
vpb.median_daily_revenue_per_venue,
|
||||
vpb.price_currency,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM scored
|
||||
ORDER BY market_score DESC
|
||||
FROM scored s
|
||||
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
||||
ON s.country_code = vpb.country_code
|
||||
AND LOWER(TRIM(s.city_name)) = LOWER(TRIM(vpb.city))
|
||||
ORDER BY s.market_score DESC
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
-- Per-city planner defaults for the financial calculator.
|
||||
-- When a user selects a city in the planner, these values pre-fill the inputs.
|
||||
-- Consumed by: padelnomics.planner.routes — city_defaults(city_slug) lookup.
|
||||
-- Consumed by: padelnomics.planner.routes — /api/market-data endpoint.
|
||||
--
|
||||
-- Values are derived from market data where available, otherwise fall back to
|
||||
-- country-level medians, then to global fallbacks from market research report.
|
||||
-- 3-tier data cascade:
|
||||
-- 1. City-level: real pricing/occupancy from Playtomic availability snapshots
|
||||
-- 2. Country-level: median across cities in same country
|
||||
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
|
||||
--
|
||||
-- Units are explicit in column names (EUR, %, h). All monetary values in EUR.
|
||||
-- Units are explicit in column names. Monetary values in local currency.
|
||||
|
||||
MODEL (
|
||||
name serving.planner_defaults,
|
||||
@@ -14,59 +16,120 @@ MODEL (
|
||||
grain city_slug
|
||||
);
|
||||
|
||||
WITH country_medians AS (
|
||||
-- Country-level fallback values from market research (hardcoded until we
|
||||
-- have richer pricing data from Playtomic or direct scraping).
|
||||
SELECT * FROM (VALUES
|
||||
-- (country_code, hourly_rate_peak_eur, monthly_rent_eur_sqm, capex_court_eur,
|
||||
-- avg_utilisation_pct, courts_typical)
|
||||
('DE', 22.0, 14.0, 42000.0, 0.55, 4),
|
||||
('ES', 16.0, 9.0, 32000.0, 0.62, 6),
|
||||
('GB', 24.0, 18.0, 48000.0, 0.52, 4),
|
||||
('FR', 18.0, 12.0, 36000.0, 0.58, 5),
|
||||
('IT', 15.0, 10.0, 30000.0, 0.60, 6),
|
||||
('PT', 12.0, 8.0, 28000.0, 0.65, 6),
|
||||
('AT', 20.0, 13.0, 40000.0, 0.54, 4),
|
||||
('CH', 28.0, 22.0, 55000.0, 0.50, 4),
|
||||
('NL', 20.0, 15.0, 40000.0, 0.56, 4),
|
||||
('BE', 18.0, 13.0, 36000.0, 0.57, 4),
|
||||
('SE', 22.0, 14.0, 42000.0, 0.50, 4),
|
||||
('US', 20.0, 12.0, 38000.0, 0.58, 6)
|
||||
) AS t(country_code, hourly_rate_peak_eur, monthly_rent_eur_sqm, capex_court_eur,
|
||||
avg_utilisation_pct, courts_typical)
|
||||
WITH -- Real city-level benchmarks from Playtomic
|
||||
city_benchmarks AS (
|
||||
SELECT
|
||||
country_code,
|
||||
city,
|
||||
median_peak_rate,
|
||||
median_offpeak_rate,
|
||||
median_occupancy_rate,
|
||||
median_daily_revenue_per_venue,
|
||||
median_court_count,
|
||||
venue_count,
|
||||
total_venue_days_observed,
|
||||
price_currency
|
||||
FROM serving.venue_pricing_benchmarks
|
||||
),
|
||||
city_venue_density AS (
|
||||
-- Country-level medians (fallback when a city has no availability data)
|
||||
country_benchmarks AS (
|
||||
SELECT
|
||||
country_code,
|
||||
MEDIAN(median_peak_rate) AS median_peak_rate,
|
||||
MEDIAN(median_offpeak_rate) AS median_offpeak_rate,
|
||||
MEDIAN(median_occupancy_rate) AS median_occupancy_rate,
|
||||
MEDIAN(median_court_count) AS median_court_count,
|
||||
SUM(venue_count) AS total_venues,
|
||||
MIN(price_currency) AS price_currency
|
||||
FROM city_benchmarks
|
||||
GROUP BY country_code
|
||||
),
|
||||
-- Hardcoded global fallbacks (only for countries with zero Playtomic coverage)
|
||||
hardcoded_fallbacks AS (
|
||||
SELECT * FROM (VALUES
|
||||
('DE', 22.0, 16.5, 0.55, 4, 'EUR'),
|
||||
('ES', 16.0, 12.0, 0.62, 6, 'EUR'),
|
||||
('GB', 24.0, 18.0, 0.52, 4, 'GBP'),
|
||||
('FR', 18.0, 13.5, 0.58, 5, 'EUR'),
|
||||
('IT', 15.0, 11.0, 0.60, 6, 'EUR'),
|
||||
('PT', 12.0, 9.0, 0.65, 6, 'EUR'),
|
||||
('AT', 20.0, 15.0, 0.54, 4, 'EUR'),
|
||||
('CH', 28.0, 21.0, 0.50, 4, 'CHF'),
|
||||
('NL', 20.0, 15.0, 0.56, 4, 'EUR'),
|
||||
('BE', 18.0, 13.5, 0.57, 4, 'EUR'),
|
||||
('SE', 22.0, 16.5, 0.50, 4, 'SEK'),
|
||||
('US', 20.0, 15.0, 0.58, 6, 'USD'),
|
||||
('MX', 12.0, 9.0, 0.55, 4, 'MXN'),
|
||||
('AR', 10.0, 7.5, 0.60, 4, 'ARS'),
|
||||
('DK', 24.0, 18.0, 0.48, 4, 'DKK'),
|
||||
('NO', 26.0, 19.5, 0.45, 4, 'NOK'),
|
||||
('FI', 22.0, 16.5, 0.48, 4, 'EUR')
|
||||
) AS t(country_code, peak_rate, offpeak_rate, occupancy, courts, currency)
|
||||
),
|
||||
city_profiles AS (
|
||||
SELECT
|
||||
city_slug,
|
||||
country_code,
|
||||
city_name,
|
||||
padel_venue_count,
|
||||
population,
|
||||
venues_per_100k,
|
||||
market_score
|
||||
market_score,
|
||||
venues_per_100k
|
||||
FROM serving.city_market_profile
|
||||
)
|
||||
SELECT
|
||||
cvd.city_slug,
|
||||
cvd.country_code,
|
||||
cvd.padel_venue_count,
|
||||
cvd.population,
|
||||
cvd.market_score,
|
||||
-- Hourly rate: adjust country median by market maturity
|
||||
-- (high-density markets → slightly lower rates from competition)
|
||||
ROUND(
|
||||
cm.hourly_rate_peak_eur
|
||||
* CASE
|
||||
WHEN cvd.venues_per_100k > 4 THEN 0.90 -- very competitive
|
||||
WHEN cvd.venues_per_100k > 2 THEN 0.95 -- competitive
|
||||
WHEN cvd.venues_per_100k < 0.5 THEN 1.10 -- underserved premium
|
||||
ELSE 1.0
|
||||
END
|
||||
, 2) AS hourly_rate_peak_eur,
|
||||
ROUND(cm.hourly_rate_peak_eur * 0.75, 2) AS hourly_rate_offpeak_eur,
|
||||
cm.monthly_rent_eur_sqm,
|
||||
cm.capex_court_eur,
|
||||
cm.avg_utilisation_pct,
|
||||
cm.courts_typical,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM city_venue_density cvd
|
||||
LEFT JOIN country_medians cm ON cvd.country_code = cm.country_code
|
||||
cp.city_slug,
|
||||
cp.country_code,
|
||||
cp.city_name,
|
||||
cp.padel_venue_count,
|
||||
cp.population,
|
||||
cp.market_score,
|
||||
-- Peak rate: city → country → hardcoded
|
||||
ROUND(COALESCE(
|
||||
cb.median_peak_rate,
|
||||
ctb.median_peak_rate,
|
||||
hf.peak_rate
|
||||
), 2) AS rate_peak,
|
||||
-- Off-peak rate
|
||||
ROUND(COALESCE(
|
||||
cb.median_offpeak_rate,
|
||||
ctb.median_offpeak_rate,
|
||||
hf.offpeak_rate
|
||||
), 2) AS rate_off_peak,
|
||||
-- Occupancy (utilisation)
|
||||
ROUND(COALESCE(
|
||||
cb.median_occupancy_rate,
|
||||
ctb.median_occupancy_rate,
|
||||
hf.occupancy
|
||||
), 4) AS avg_utilisation_pct,
|
||||
-- Typical court count
|
||||
COALESCE(
|
||||
cb.median_court_count,
|
||||
ctb.median_court_count,
|
||||
hf.courts
|
||||
) AS courts_typical,
|
||||
-- Revenue estimate (city-level only)
|
||||
cb.median_daily_revenue_per_venue AS daily_revenue_per_venue,
|
||||
-- Data provenance
|
||||
CASE
|
||||
WHEN cb.venue_count IS NOT NULL THEN 'city_data'
|
||||
WHEN ctb.total_venues IS NOT NULL THEN 'country_data'
|
||||
ELSE 'hardcoded'
|
||||
END AS data_source,
|
||||
CASE
|
||||
WHEN cb.total_venue_days_observed >= 100 THEN 1.0
|
||||
WHEN cb.total_venue_days_observed >= 30 THEN 0.8
|
||||
WHEN cb.venue_count IS NOT NULL THEN 0.6
|
||||
WHEN ctb.total_venues IS NOT NULL THEN 0.4
|
||||
ELSE 0.2
|
||||
END AS data_confidence,
|
||||
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM city_profiles cp
|
||||
LEFT JOIN city_benchmarks cb
|
||||
ON cp.country_code = cb.country_code
|
||||
AND LOWER(TRIM(cp.city_name)) = LOWER(TRIM(cb.city))
|
||||
LEFT JOIN country_benchmarks ctb
|
||||
ON cp.country_code = ctb.country_code
|
||||
LEFT JOIN hardcoded_fallbacks hf
|
||||
ON cp.country_code = hf.country_code
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
-- Per-city pricing and occupancy benchmarks from Playtomic availability data.
|
||||
-- Aggregates venue-level daily metrics (last 30 days) into city-level benchmarks.
|
||||
-- Consumed by: planner defaults (pre-fill), city market profile, SEO articles.
|
||||
--
|
||||
-- Minimum data threshold: venues with >= 3 days of observations.
|
||||
|
||||
MODEL (
|
||||
name serving.venue_pricing_benchmarks,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, city)
|
||||
);
|
||||
|
||||
WITH venue_stats AS (
|
||||
-- Aggregate last 30 days per venue
|
||||
SELECT
|
||||
da.tenant_id,
|
||||
da.country_code,
|
||||
da.city,
|
||||
da.price_currency,
|
||||
AVG(da.occupancy_rate) AS avg_occupancy_rate,
|
||||
MEDIAN(da.median_price) AS median_hourly_rate,
|
||||
MEDIAN(da.median_price_peak) AS median_peak_rate,
|
||||
MEDIAN(da.median_price_offpeak) AS median_offpeak_rate,
|
||||
AVG(da.estimated_revenue_eur) AS avg_daily_revenue,
|
||||
MAX(da.active_court_count) AS court_count,
|
||||
COUNT(DISTINCT da.snapshot_date) AS days_observed
|
||||
FROM foundation.fct_daily_availability da
|
||||
WHERE da.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
AND da.occupancy_rate IS NOT NULL
|
||||
AND da.occupancy_rate BETWEEN 0 AND 1.5
|
||||
GROUP BY da.tenant_id, da.country_code, da.city, da.price_currency
|
||||
HAVING COUNT(DISTINCT da.snapshot_date) >= 3
|
||||
)
|
||||
SELECT
|
||||
country_code,
|
||||
city,
|
||||
price_currency,
|
||||
COUNT(*) AS venue_count,
|
||||
-- Pricing benchmarks
|
||||
ROUND(MEDIAN(median_hourly_rate), 2) AS median_hourly_rate,
|
||||
ROUND(MEDIAN(median_peak_rate), 2) AS median_peak_rate,
|
||||
ROUND(MEDIAN(median_offpeak_rate), 2) AS median_offpeak_rate,
|
||||
ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY median_hourly_rate), 2) AS hourly_rate_p25,
|
||||
ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY median_hourly_rate), 2) AS hourly_rate_p75,
|
||||
-- Occupancy benchmarks
|
||||
ROUND(MEDIAN(avg_occupancy_rate), 4) AS median_occupancy_rate,
|
||||
ROUND(AVG(avg_occupancy_rate), 4) AS avg_occupancy_rate,
|
||||
-- Revenue benchmarks (per venue per day)
|
||||
ROUND(MEDIAN(avg_daily_revenue), 2) AS median_daily_revenue_per_venue,
|
||||
-- Court mix
|
||||
ROUND(MEDIAN(court_count), 0)::INTEGER AS median_court_count,
|
||||
-- Data quality
|
||||
SUM(days_observed) AS total_venue_days_observed,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM venue_stats
|
||||
GROUP BY country_code, city, price_currency
|
||||
44
transform/sqlmesh_padelnomics/models/staging/stg_income.sql
Normal file
44
transform/sqlmesh_padelnomics/models/staging/stg_income.sql
Normal file
@@ -0,0 +1,44 @@
|
||||
-- Eurostat median equivalised net income in PPS (dataset: ilc_di03).
|
||||
-- Country-level income data for purchasing power adjustments.
|
||||
-- One row per (country_code, year) with median income values.
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/ilc_di03.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 23127}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_income,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_year)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/ilc_di03.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
CAST(r.ref_year AS INTEGER) AS ref_year,
|
||||
CAST(r.value AS DOUBLE) AS median_income_pps,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_year,
|
||||
median_income_pps,
|
||||
extracted_date
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA')
|
||||
AND median_income_pps > 0
|
||||
@@ -0,0 +1,82 @@
|
||||
-- Daily availability snapshots from Playtomic — slot-level pricing data.
|
||||
-- One row per available 60-minute booking slot per court per venue per day.
|
||||
-- "Available" = the slot was NOT booked at capture time. Missing slots = booked.
|
||||
--
|
||||
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
|
||||
-- The API returns 60/90/120-min variants per start_time — filtering to 60 avoids
|
||||
-- double-counting the same time window.
|
||||
--
|
||||
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/availability_{date}.json.gz
|
||||
-- Format: {date, captured_at_utc, venues: [{tenant_id, slots: [{resource_id, start_date, slots: [...]}]}]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_availability,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (snapshot_date, tenant_id, resource_id, slot_start_time)
|
||||
);
|
||||
|
||||
WITH raw_files AS (
|
||||
SELECT *
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/availability_*.json.gz',
|
||||
format = 'auto',
|
||||
columns = {
|
||||
date: 'VARCHAR',
|
||||
captured_at_utc: 'VARCHAR',
|
||||
venues: 'JSON[]'
|
||||
}
|
||||
)
|
||||
),
|
||||
raw_venues AS (
|
||||
SELECT
|
||||
rf.date AS snapshot_date,
|
||||
rf.captured_at_utc,
|
||||
venue_json
|
||||
FROM raw_files rf,
|
||||
LATERAL UNNEST(rf.venues) AS t(venue_json)
|
||||
),
|
||||
-- Each venue has: {tenant_id, slots: [{resource_id, start_date, slots: [...]}]}
|
||||
raw_resources AS (
|
||||
SELECT
|
||||
rv.snapshot_date,
|
||||
rv.captured_at_utc,
|
||||
rv.venue_json ->> 'tenant_id' AS tenant_id,
|
||||
resource_json
|
||||
FROM raw_venues rv,
|
||||
LATERAL UNNEST(
|
||||
from_json(rv.venue_json -> 'slots', '["JSON"]')
|
||||
) AS t(resource_json)
|
||||
),
|
||||
-- Each resource has: {resource_id, start_date, slots: [{start_time, duration, price}]}
|
||||
raw_slots AS (
|
||||
SELECT
|
||||
rr.snapshot_date,
|
||||
rr.captured_at_utc,
|
||||
rr.tenant_id,
|
||||
rr.resource_json ->> 'resource_id' AS resource_id,
|
||||
slot_json
|
||||
FROM raw_resources rr,
|
||||
LATERAL UNNEST(
|
||||
from_json(rr.resource_json -> 'slots', '["JSON"]')
|
||||
) AS t(slot_json)
|
||||
)
|
||||
SELECT
|
||||
snapshot_date,
|
||||
tenant_id,
|
||||
resource_id,
|
||||
slot_json ->> 'start_time' AS slot_start_time,
|
||||
TRY_CAST(slot_json ->> 'duration' AS INTEGER) AS duration_minutes,
|
||||
-- Parse "14.56 EUR" → 14.56
|
||||
TRY_CAST(
|
||||
SPLIT_PART(slot_json ->> 'price', ' ', 1) AS DOUBLE
|
||||
) AS price_amount,
|
||||
-- Parse "14.56 EUR" → EUR
|
||||
SPLIT_PART(slot_json ->> 'price', ' ', 2) AS price_currency,
|
||||
captured_at_utc
|
||||
FROM raw_slots
|
||||
WHERE resource_id IS NOT NULL
|
||||
AND (slot_json ->> 'start_time') IS NOT NULL
|
||||
AND TRY_CAST(slot_json ->> 'duration' AS INTEGER) = 60
|
||||
@@ -0,0 +1,54 @@
|
||||
-- Venue opening hours by day of week from Playtomic.
|
||||
-- Unpivots the opening_hours JSON object into one row per (tenant_id, day_of_week).
|
||||
-- Used downstream to calculate total weekly/daily capacity hours.
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
|
||||
-- Each tenant has opening_hours: {MONDAY: {opening_time, closing_time}, ...}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_opening_hours,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (tenant_id, day_of_week)
|
||||
);
|
||||
|
||||
WITH venues AS (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto'
|
||||
)
|
||||
),
|
||||
days AS (
|
||||
SELECT day_name, day_number FROM (VALUES
|
||||
('MONDAY', 1), ('TUESDAY', 2), ('WEDNESDAY', 3), ('THURSDAY', 4),
|
||||
('FRIDAY', 5), ('SATURDAY', 6), ('SUNDAY', 7)
|
||||
) AS t(day_name, day_number)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
d.day_name AS day_of_week,
|
||||
d.day_number,
|
||||
tenant -> 'opening_hours' -> d.day_name ->> 'opening_time' AS opening_time,
|
||||
tenant -> 'opening_hours' -> d.day_name ->> 'closing_time' AS closing_time
|
||||
FROM venues
|
||||
CROSS JOIN days d
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
AND (tenant -> 'opening_hours') IS NOT NULL
|
||||
AND (tenant -> 'opening_hours' -> d.day_name) IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
tenant_id,
|
||||
day_of_week,
|
||||
day_number,
|
||||
opening_time,
|
||||
closing_time,
|
||||
-- Hours open this day (e.g., 09:00 to 23:00 = 14.0h)
|
||||
ROUND(
|
||||
(EXTRACT(HOUR FROM closing_time::TIME) + EXTRACT(MINUTE FROM closing_time::TIME) / 60.0)
|
||||
- (EXTRACT(HOUR FROM opening_time::TIME) + EXTRACT(MINUTE FROM opening_time::TIME) / 60.0)
|
||||
, 2) AS hours_open
|
||||
FROM parsed
|
||||
WHERE opening_time IS NOT NULL
|
||||
AND closing_time IS NOT NULL
|
||||
@@ -0,0 +1,46 @@
|
||||
-- Individual court (resource) records from Playtomic venues.
|
||||
-- Reads resources array from the landing zone JSON directly (double UNNEST:
|
||||
-- tenants → resources) to extract court type, size, surface, and booking config.
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
|
||||
-- Each tenant has a resources[] array of court objects.
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_resources,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (tenant_id, resource_id)
|
||||
);
|
||||
|
||||
WITH raw AS (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto'
|
||||
)
|
||||
),
|
||||
unnested AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
UPPER(tenant -> 'address' ->> 'country_code') AS country_code,
|
||||
UNNEST(from_json(tenant -> 'resources', '["JSON"]')) AS resource_json
|
||||
FROM raw
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
AND (tenant -> 'resources') IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
tenant_id,
|
||||
resource_json ->> 'resource_id' AS resource_id,
|
||||
country_code,
|
||||
NULLIF(TRIM(resource_json ->> 'name'), '') AS resource_name,
|
||||
resource_json ->> 'sport_id' AS sport_id,
|
||||
CASE WHEN LOWER(resource_json ->> 'is_active') IN ('true', '1')
|
||||
THEN TRUE ELSE FALSE END AS is_active,
|
||||
LOWER(resource_json -> 'properties' ->> 'resource_type') AS resource_type,
|
||||
LOWER(resource_json -> 'properties' ->> 'resource_size') AS resource_size,
|
||||
LOWER(resource_json -> 'properties' ->> 'resource_feature') AS resource_feature,
|
||||
CASE WHEN LOWER(resource_json -> 'booking_settings' ->> 'is_bookable_online') IN ('true', '1')
|
||||
THEN TRUE ELSE FALSE END AS is_bookable_online
|
||||
FROM unnested
|
||||
WHERE (resource_json ->> 'resource_id') IS NOT NULL
|
||||
AND (resource_json ->> 'sport_id') = 'PADEL'
|
||||
@@ -1,8 +1,10 @@
|
||||
-- Playtomic padel venue records from unauthenticated tenant search API.
|
||||
-- Reads landing zone JSON directly, unnests tenant array, deduplicates on
|
||||
-- tenant_id (keeps most recent), and normalizes address fields.
|
||||
-- Playtomic padel venue records — full metadata extraction.
|
||||
-- Reads landing zone JSON, unnests tenant array, extracts all venue metadata
|
||||
-- including address, opening hours, court resources, VAT rate, and facilities.
|
||||
-- Deduplicates on tenant_id (keeps most recent extraction).
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
|
||||
-- Format: {"tenants": [{tenant_id, tenant_name, address, resources, opening_hours, ...}]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_venues,
|
||||
@@ -13,18 +15,42 @@ MODEL (
|
||||
|
||||
WITH parsed AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
tenant ->> 'tenant_name' AS tenant_name,
|
||||
tenant -> 'address' ->> 'street' AS street,
|
||||
tenant -> 'address' ->> 'city' AS city,
|
||||
tenant -> 'address' ->> 'postal_code' AS postal_code,
|
||||
tenant -> 'address' ->> 'country_code' AS country_code,
|
||||
-- Identity
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
tenant ->> 'tenant_name' AS tenant_name,
|
||||
tenant ->> 'slug' AS slug,
|
||||
tenant ->> 'tenant_type' AS tenant_type,
|
||||
tenant ->> 'tenant_status' AS tenant_status,
|
||||
tenant ->> 'playtomic_status' AS playtomic_status,
|
||||
tenant ->> 'booking_type' AS booking_type,
|
||||
|
||||
-- Address
|
||||
tenant -> 'address' ->> 'street' AS street,
|
||||
tenant -> 'address' ->> 'city' AS city,
|
||||
tenant -> 'address' ->> 'postal_code' AS postal_code,
|
||||
UPPER(tenant -> 'address' ->> 'country_code') AS country_code,
|
||||
tenant -> 'address' ->> 'timezone' AS timezone,
|
||||
tenant -> 'address' ->> 'administrative_area' AS administrative_area,
|
||||
TRY_CAST(tenant -> 'address' -> 'coordinate' ->> 'lat' AS DOUBLE) AS lat,
|
||||
TRY_CAST(tenant -> 'address' -> 'coordinate' ->> 'lon' AS DOUBLE) AS lon,
|
||||
tenant ->> 'sport_ids' AS sport_ids_raw,
|
||||
tenant ->> 'tenant_type' AS tenant_type,
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
|
||||
-- Commercial
|
||||
TRY_CAST(tenant ->> 'vat_rate' AS DOUBLE) AS vat_rate,
|
||||
tenant ->> 'default_currency' AS default_currency,
|
||||
|
||||
-- Booking settings (venue-level)
|
||||
TRY_CAST(tenant -> 'booking_settings' ->> 'booking_ahead_limit' AS INTEGER) AS booking_ahead_limit_minutes,
|
||||
|
||||
-- Opening hours and resources stored as JSON for downstream models
|
||||
tenant -> 'opening_hours' AS opening_hours_json,
|
||||
tenant -> 'resources' AS resources_json,
|
||||
|
||||
-- Metadata
|
||||
tenant ->> 'created_at' AS created_at,
|
||||
tenant ->> 'is_playtomic_partner' AS is_playtomic_partner_raw,
|
||||
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM (
|
||||
SELECT UNNEST(tenants) AS tenant, filename
|
||||
FROM read_json(
|
||||
@@ -37,22 +63,37 @@ WITH parsed AS (
|
||||
),
|
||||
deduped AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY tenant_id ORDER BY extracted_date DESC) AS rn
|
||||
ROW_NUMBER() OVER (PARTITION BY tenant_id ORDER BY source_file DESC) AS rn
|
||||
FROM parsed
|
||||
WHERE tenant_id IS NOT NULL
|
||||
AND lat IS NOT NULL AND lon IS NOT NULL
|
||||
AND lat BETWEEN -90 AND 90
|
||||
AND lat BETWEEN -90 AND 90
|
||||
AND lon BETWEEN -180 AND 180
|
||||
)
|
||||
SELECT
|
||||
tenant_id,
|
||||
'playtomic' AS source,
|
||||
lat, lon,
|
||||
UPPER(country_code) AS country_code,
|
||||
NULLIF(TRIM(tenant_name), '') AS name,
|
||||
NULLIF(TRIM(city), '') AS city,
|
||||
postal_code AS postcode,
|
||||
'playtomic' AS source,
|
||||
NULLIF(TRIM(tenant_name), '') AS name,
|
||||
slug,
|
||||
tenant_type,
|
||||
tenant_status,
|
||||
playtomic_status,
|
||||
booking_type,
|
||||
street,
|
||||
NULLIF(TRIM(city), '') AS city,
|
||||
postal_code AS postcode,
|
||||
country_code,
|
||||
timezone,
|
||||
administrative_area,
|
||||
lat,
|
||||
lon,
|
||||
vat_rate,
|
||||
default_currency,
|
||||
booking_ahead_limit_minutes,
|
||||
opening_hours_json,
|
||||
resources_json,
|
||||
created_at,
|
||||
CASE WHEN LOWER(is_playtomic_partner_raw) IN ('true', '1') THEN TRUE ELSE FALSE END AS is_playtomic_partner,
|
||||
extracted_date
|
||||
FROM deduped
|
||||
WHERE rn = 1
|
||||
|
||||
Reference in New Issue
Block a user