feat: Playtomic pricing/occupancy pipeline + email i18n + audience restructure

Three workstreams:

1. Playtomic full data extraction & transform pipeline:
   - Expand venue bounding boxes from 4 to 23 regions (global coverage)
   - New staging models for court resources, opening hours, and slot-level
     availability with real prices from the Playtomic API
   - Foundation fact tables for venue capacity and daily occupancy/revenue
   - City-level pricing benchmarks replacing hardcoded country estimates
   - Planner defaults now use 3-tier cascade: city data → country → fallback

2. Transactional email i18n:
   - _t() helper in worker.py with ~70 translation keys (EN + DE)
   - All 8 email handlers translated, lang passed in task payloads

3. Resend audiences restructured to 3 named audiences (free plan limit)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 00:54:53 +01:00
parent c25e20f83a
commit 79f7fc6fad
24 changed files with 1318 additions and 324 deletions

View File

@@ -40,6 +40,12 @@ eurostat_labels AS (
-- Derive a slug-friendly city name from the code as fallback
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
FROM eurostat_cities
),
-- Country-level median income (latest year per country)
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
)
SELECT
ec.city_code,
@@ -52,8 +58,12 @@ SELECT
COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon,
ec.population,
ec.ref_year AS population_year,
COALESCE(vc.venue_count, 0) AS padel_venue_count
COALESCE(vc.venue_count, 0) AS padel_venue_count,
ci.median_income_pps,
ci.income_year
FROM eurostat_cities ec
LEFT JOIN venue_counts vc
ON ec.country_code = vc.country_code
AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'
LEFT JOIN country_income ci
ON ec.country_code = ci.country_code

View File

@@ -2,6 +2,8 @@
-- Venues from both sources are unioned; near-duplicates (within ~100m) are
-- collapsed to a single record preferring Playtomic data (richer metadata).
-- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km.
--
-- Playtomic venues include court counts, indoor/outdoor split, currency, and timezone.
MODEL (
name foundation.dim_venues,
@@ -10,51 +12,74 @@ MODEL (
grain venue_id
);
WITH all_venues AS (
WITH playtomic_venues AS (
SELECT
'osm:' || osm_id::TEXT AS venue_id,
source,
'pt:' || v.tenant_id AS venue_id,
v.tenant_id,
'playtomic' AS source,
v.lat,
v.lon,
v.country_code,
v.name,
v.city,
v.postcode,
v.tenant_type,
v.timezone,
v.vat_rate,
v.default_currency,
-- Court counts from resources
COUNT(r.resource_id) AS court_count,
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'indoor') AS indoor_court_count,
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'outdoor') AS outdoor_court_count,
v.extracted_date
FROM staging.stg_playtomic_venues v
LEFT JOIN staging.stg_playtomic_resources r
ON v.tenant_id = r.tenant_id AND r.is_active = TRUE
WHERE v.country_code IS NOT NULL
GROUP BY
v.tenant_id, v.lat, v.lon, v.country_code, v.name, v.city,
v.postcode, v.tenant_type, v.timezone, v.vat_rate,
v.default_currency, v.extracted_date
),
osm_venues AS (
SELECT
'osm:' || osm_id::TEXT AS venue_id,
NULL AS tenant_id,
'osm' AS source,
lat,
lon,
country_code,
name,
city,
postcode,
NULL AS tenant_type,
NULL AS tenant_type,
NULL AS timezone,
NULL AS vat_rate,
NULL AS default_currency,
NULL AS court_count,
NULL AS indoor_court_count,
NULL AS outdoor_court_count,
extracted_date
FROM staging.stg_padel_courts
WHERE country_code IS NOT NULL
UNION ALL
SELECT
'pt:' || tenant_id AS venue_id,
source,
lat,
lon,
country_code,
name,
city,
postcode,
tenant_type,
extracted_date
FROM staging.stg_playtomic_venues
WHERE country_code IS NOT NULL
),
-- Rank venues so Playtomic records win ties in proximity dedup
all_venues AS (
SELECT * FROM playtomic_venues
UNION ALL
SELECT * FROM osm_venues
),
ranked AS (
SELECT *,
CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank
FROM all_venues
)
-- Note: full proximity dedup (haversine clustering) is expensive in SQL.
-- For now, deduplicate on exact (country_code, ROUND(lat,3), ROUND(lon,3))
-- — ≈111m grid cells. Refine with spatial index if volumes grow.
-- Deduplicate on ~111m grid cells, preferring Playtomic
SELECT
MIN(venue_id) OVER (
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
ORDER BY source_rank
) AS venue_id,
) AS venue_id,
tenant_id,
country_code,
lat,
lon,
@@ -62,11 +87,17 @@ SELECT
MAX(CASE WHEN source = 'playtomic' THEN name END)
OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT),
name
) AS name,
COALESCE(city, '') AS city,
) AS name,
COALESCE(city, '') AS city,
postcode,
source,
tenant_type,
timezone,
vat_rate,
default_currency,
court_count,
indoor_court_count,
outdoor_court_count,
extracted_date
FROM ranked
QUALIFY ROW_NUMBER() OVER (

View File

@@ -0,0 +1,90 @@
-- Daily venue-level availability, pricing, occupancy, and revenue estimates.
-- Aggregates slot-level data from stg_playtomic_availability into per-venue
-- per-day statistics, then calculates occupancy by comparing available hours
-- against total capacity from fct_venue_capacity.
--
-- Occupancy = 1 - (available_court_hours / capacity_court_hours_per_day)
-- Revenue estimate = booked_court_hours × avg_price_of_available_slots
--
-- Peak hours defined as 17:0021:00 (captures main evening rush across markets).
MODEL (
name foundation.fct_daily_availability,
kind FULL,
cron '@daily',
grain (snapshot_date, tenant_id)
);
WITH slot_agg AS (
SELECT
a.snapshot_date,
a.tenant_id,
-- Slot counts: each row is one 60-min available slot on one court
COUNT(*) AS available_slot_count,
COUNT(DISTINCT a.resource_id) AS courts_with_availability,
-- Available (unbooked) court-hours: slots are on 30-min increments for 60-min bookings
-- Each available start_time represents a 60-min bookable window
ROUND(COUNT(*) * 1.0, 2) AS available_court_hours,
-- Pricing stats (60-min slots only)
ROUND(MEDIAN(a.price_amount), 2) AS median_price,
ROUND(AVG(a.price_amount), 2) AS avg_price,
MIN(a.price_amount) AS min_price,
MAX(a.price_amount) AS max_price,
-- Peak: 17:0021:00
ROUND(MEDIAN(a.price_amount) FILTER (
WHERE a.slot_start_time::TIME >= '17:00:00'
AND a.slot_start_time::TIME < '21:00:00'
), 2) AS median_price_peak,
-- Off-peak: everything outside 17:0021:00
ROUND(MEDIAN(a.price_amount) FILTER (
WHERE a.slot_start_time::TIME < '17:00:00'
OR a.slot_start_time::TIME >= '21:00:00'
), 2) AS median_price_offpeak,
MAX(a.price_currency) AS price_currency,
MAX(a.captured_at_utc) AS captured_at_utc
FROM staging.stg_playtomic_availability a
WHERE a.price_amount IS NOT NULL
AND a.price_amount > 0
GROUP BY a.snapshot_date, a.tenant_id
)
SELECT
sa.snapshot_date,
sa.tenant_id,
cap.country_code,
cap.city,
cap.active_court_count,
cap.capacity_court_hours_per_day,
sa.available_slot_count,
sa.courts_with_availability,
sa.available_court_hours,
-- Occupancy: (capacity - available) / capacity
CASE
WHEN cap.capacity_court_hours_per_day > 0
THEN ROUND(
1.0 - (sa.available_court_hours / cap.capacity_court_hours_per_day),
4
)
ELSE NULL
END AS occupancy_rate,
-- Estimated booked court-hours
ROUND(
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0),
2
) AS booked_court_hours,
-- Estimated daily revenue: booked hours × avg price
ROUND(
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0)
* sa.avg_price,
2
) AS estimated_revenue_eur,
-- Pricing
sa.median_price,
sa.avg_price,
sa.min_price,
sa.max_price,
sa.median_price_peak,
sa.median_price_offpeak,
sa.price_currency,
sa.captured_at_utc
FROM slot_agg sa
JOIN foundation.fct_venue_capacity cap ON sa.tenant_id = cap.tenant_id

View File

@@ -0,0 +1,45 @@
-- Venue capacity: total bookable court-hours per day and week.
-- Derived from active court count × opening hours.
-- Used as the denominator for occupancy rate in fct_daily_availability.
--
-- One row per venue (Playtomic tenant).
MODEL (
name foundation.fct_venue_capacity,
kind FULL,
cron '@daily',
grain tenant_id
);
WITH weekly_hours AS (
SELECT
tenant_id,
SUM(hours_open) AS hours_open_per_week,
AVG(hours_open) AS avg_hours_open_per_day,
COUNT(*) AS days_open_per_week
FROM staging.stg_playtomic_opening_hours
GROUP BY tenant_id
),
court_counts AS (
SELECT
tenant_id,
COUNT(*) AS active_court_count
FROM staging.stg_playtomic_resources
WHERE is_active = TRUE
GROUP BY tenant_id
)
SELECT
v.tenant_id,
v.country_code,
v.city,
cc.active_court_count,
ROUND(wh.hours_open_per_week, 1) AS hours_open_per_week,
ROUND(wh.avg_hours_open_per_day, 1) AS avg_hours_open_per_day,
wh.days_open_per_week,
-- Total bookable court-hours per day (capacity denominator for occupancy)
ROUND(cc.active_court_count * wh.avg_hours_open_per_day, 1) AS capacity_court_hours_per_day,
-- Total bookable court-hours per week
ROUND(cc.active_court_count * wh.hours_open_per_week, 1) AS capacity_court_hours_per_week
FROM staging.stg_playtomic_venues v
JOIN court_counts cc ON v.tenant_id = cc.tenant_id
JOIN weekly_hours wh ON v.tenant_id = wh.tenant_id