Files
padelnomics/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql
Deeman 79f7fc6fad feat: Playtomic pricing/occupancy pipeline + email i18n + audience restructure
Three workstreams:

1. Playtomic full data extraction & transform pipeline:
   - Expand venue bounding boxes from 4 to 23 regions (global coverage)
   - New staging models for court resources, opening hours, and slot-level
     availability with real prices from the Playtomic API
   - Foundation fact tables for venue capacity and daily occupancy/revenue
   - City-level pricing benchmarks replacing hardcoded country estimates
   - Planner defaults now use 3-tier cascade: city data → country → fallback

2. Transactional email i18n:
   - _t() helper in worker.py with ~70 translation keys (EN + DE)
   - All 8 email handlers translated, lang passed in task payloads

3. Resend audiences restructured to 3 named audiences (free plan limit)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 00:54:53 +01:00

107 lines
3.3 KiB
SQL

-- Deduplicated venue dimension combining OSM courts and Playtomic tenants.
-- Venues from both sources are unioned; near-duplicates (within ~100m) are
-- collapsed to a single record preferring Playtomic data (richer metadata).
-- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km.
--
-- Playtomic venues include court counts, indoor/outdoor split, currency, and timezone.
MODEL (
name foundation.dim_venues,
kind FULL,
cron '@daily',
grain venue_id
);
WITH playtomic_venues AS (
SELECT
'pt:' || v.tenant_id AS venue_id,
v.tenant_id,
'playtomic' AS source,
v.lat,
v.lon,
v.country_code,
v.name,
v.city,
v.postcode,
v.tenant_type,
v.timezone,
v.vat_rate,
v.default_currency,
-- Court counts from resources
COUNT(r.resource_id) AS court_count,
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'indoor') AS indoor_court_count,
COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'outdoor') AS outdoor_court_count,
v.extracted_date
FROM staging.stg_playtomic_venues v
LEFT JOIN staging.stg_playtomic_resources r
ON v.tenant_id = r.tenant_id AND r.is_active = TRUE
WHERE v.country_code IS NOT NULL
GROUP BY
v.tenant_id, v.lat, v.lon, v.country_code, v.name, v.city,
v.postcode, v.tenant_type, v.timezone, v.vat_rate,
v.default_currency, v.extracted_date
),
osm_venues AS (
SELECT
'osm:' || osm_id::TEXT AS venue_id,
NULL AS tenant_id,
'osm' AS source,
lat,
lon,
country_code,
name,
city,
postcode,
NULL AS tenant_type,
NULL AS timezone,
NULL AS vat_rate,
NULL AS default_currency,
NULL AS court_count,
NULL AS indoor_court_count,
NULL AS outdoor_court_count,
extracted_date
FROM staging.stg_padel_courts
WHERE country_code IS NOT NULL
),
all_venues AS (
SELECT * FROM playtomic_venues
UNION ALL
SELECT * FROM osm_venues
),
ranked AS (
SELECT *,
CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank
FROM all_venues
)
-- Deduplicate on ~111m grid cells, preferring Playtomic
SELECT
MIN(venue_id) OVER (
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
ORDER BY source_rank
) AS venue_id,
tenant_id,
country_code,
lat,
lon,
COALESCE(
MAX(CASE WHEN source = 'playtomic' THEN name END)
OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT),
name
) AS name,
COALESCE(city, '') AS city,
postcode,
source,
tenant_type,
timezone,
vat_rate,
default_currency,
court_count,
indoor_court_count,
outdoor_court_count,
extracted_date
FROM ranked
QUALIFY ROW_NUMBER() OVER (
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
ORDER BY source_rank
) = 1