fix: DuckDB compat issues in Playtomic pipeline + export_serving

- Add maximum_object_size=128MB to read_json for 14K-venue tenants file
- Rewrite opening_hours to use UNION ALL unpivot (DuckDB struct dynamic access)
- Add seed file guard for availability model (empty result on first run)
- Fix snapshot_date VARCHAR→DATE comparison in venue_pricing_benchmarks
- Fix export_serving to resolve SQLMesh physical tables from view definitions
  (SQLMesh views reference "local" catalog unavailable outside its context)
- Add pyarrow dependency for Arrow-based cross-connection data transfer

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 01:27:51 +01:00
parent a055660cd2
commit 7737b79230
8 changed files with 144 additions and 36 deletions

View File

@@ -26,7 +26,7 @@ WITH venue_stats AS (
MAX(da.active_court_count) AS court_count,
COUNT(DISTINCT da.snapshot_date) AS days_observed
FROM foundation.fct_daily_availability da
WHERE da.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
WHERE TRY_CAST(da.snapshot_date AS DATE) >= CURRENT_DATE - INTERVAL '30 days'
AND da.occupancy_rate IS NOT NULL
AND da.occupancy_rate BETWEEN 0 AND 1.5
GROUP BY da.tenant_id, da.country_code, da.city, da.price_currency

View File

@@ -8,6 +8,10 @@
--
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
--
-- Requires: at least one availability file in the landing zone.
-- A seed file (data/landing/playtomic/1970/01/availability_1970-01-01.json.gz)
-- with empty venues[] ensures this model runs before real data arrives.
--
-- Source: data/landing/playtomic/{year}/{month}/availability_{date}.json.gz
-- Format: {date, captured_at_utc, venues: [{tenant_id, slots: [{resource_id, start_date, slots: [...]}]}]}
@@ -29,6 +33,8 @@ WITH raw_files AS (
venues: 'JSON[]'
}
)
WHERE venues IS NOT NULL
AND json_array_length(venues) > 0
),
raw_venues AS (
SELECT

View File

@@ -1,7 +1,10 @@
-- Venue opening hours by day of week from Playtomic.
-- Unpivots the opening_hours JSON object into one row per (tenant_id, day_of_week).
-- Unpivots the opening_hours struct into one row per (tenant_id, day_of_week).
-- Used downstream to calculate total weekly/daily capacity hours.
--
-- DuckDB auto-infers opening_hours as STRUCT, so we access each day by literal
-- key (no dynamic access) and UNION ALL to unpivot.
--
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
-- Each tenant has opening_hours: {MONDAY: {opening_time, closing_time}, ...}
@@ -13,30 +16,56 @@ MODEL (
);
WITH venues AS (
SELECT UNNEST(tenants) AS tenant
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
format = 'auto'
)
),
days AS (
SELECT day_name, day_number FROM (VALUES
('MONDAY', 1), ('TUESDAY', 2), ('WEDNESDAY', 3), ('THURSDAY', 4),
('FRIDAY', 5), ('SATURDAY', 6), ('SUNDAY', 7)
) AS t(day_name, day_number)
),
parsed AS (
SELECT
tenant ->> 'tenant_id' AS tenant_id,
d.day_name AS day_of_week,
d.day_number,
tenant -> 'opening_hours' -> d.day_name ->> 'opening_time' AS opening_time,
tenant -> 'opening_hours' -> d.day_name ->> 'closing_time' AS closing_time
FROM venues
CROSS JOIN days d
tenant ->> 'tenant_id' AS tenant_id,
tenant -> 'opening_hours' AS oh
FROM (
SELECT UNNEST(tenants) AS tenant
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
format = 'auto',
maximum_object_size = 134217728
)
)
WHERE (tenant ->> 'tenant_id') IS NOT NULL
AND (tenant -> 'opening_hours') IS NOT NULL
AND (tenant -> 'opening_hours' -> d.day_name) IS NOT NULL
),
-- Unpivot by UNION ALL — 7 literal key accesses
unpivoted AS (
SELECT tenant_id, 'MONDAY' AS day_of_week, 1 AS day_number,
oh -> 'MONDAY' ->> 'opening_time' AS opening_time,
oh -> 'MONDAY' ->> 'closing_time' AS closing_time
FROM venues
UNION ALL
SELECT tenant_id, 'TUESDAY' AS day_of_week, 2,
oh -> 'TUESDAY' ->> 'opening_time',
oh -> 'TUESDAY' ->> 'closing_time'
FROM venues
UNION ALL
SELECT tenant_id, 'WEDNESDAY' AS day_of_week, 3,
oh -> 'WEDNESDAY' ->> 'opening_time',
oh -> 'WEDNESDAY' ->> 'closing_time'
FROM venues
UNION ALL
SELECT tenant_id, 'THURSDAY' AS day_of_week, 4,
oh -> 'THURSDAY' ->> 'opening_time',
oh -> 'THURSDAY' ->> 'closing_time'
FROM venues
UNION ALL
SELECT tenant_id, 'FRIDAY' AS day_of_week, 5,
oh -> 'FRIDAY' ->> 'opening_time',
oh -> 'FRIDAY' ->> 'closing_time'
FROM venues
UNION ALL
SELECT tenant_id, 'SATURDAY' AS day_of_week, 6,
oh -> 'SATURDAY' ->> 'opening_time',
oh -> 'SATURDAY' ->> 'closing_time'
FROM venues
UNION ALL
SELECT tenant_id, 'SUNDAY' AS day_of_week, 7,
oh -> 'SUNDAY' ->> 'opening_time',
oh -> 'SUNDAY' ->> 'closing_time'
FROM venues
)
SELECT
tenant_id,
@@ -49,6 +78,6 @@ SELECT
(EXTRACT(HOUR FROM closing_time::TIME) + EXTRACT(MINUTE FROM closing_time::TIME) / 60.0)
- (EXTRACT(HOUR FROM opening_time::TIME) + EXTRACT(MINUTE FROM opening_time::TIME) / 60.0)
, 2) AS hours_open
FROM parsed
FROM unpivoted
WHERE opening_time IS NOT NULL
AND closing_time IS NOT NULL

View File

@@ -16,7 +16,8 @@ WITH raw AS (
SELECT UNNEST(tenants) AS tenant
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
format = 'auto'
format = 'auto',
maximum_object_size = 134217728
)
),
unnested AS (

View File

@@ -56,7 +56,8 @@ WITH parsed AS (
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
format = 'auto',
filename = true
filename = true,
maximum_object_size = 134217728
)
)
WHERE (tenant ->> 'tenant_id') IS NOT NULL