fix: DuckDB compat issues in Playtomic pipeline + export_serving
- Add maximum_object_size=128MB to read_json for 14K-venue tenants file - Rewrite opening_hours to use UNION ALL unpivot (DuckDB struct dynamic access) - Add seed file guard for availability model (empty result on first run) - Fix snapshot_date VARCHAR→DATE comparison in venue_pricing_benchmarks - Fix export_serving to resolve SQLMesh physical tables from view definitions (SQLMesh views reference "local" catalog unavailable outside its context) - Add pyarrow dependency for Arrow-based cross-connection data transfer Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,7 +26,7 @@ WITH venue_stats AS (
|
||||
MAX(da.active_court_count) AS court_count,
|
||||
COUNT(DISTINCT da.snapshot_date) AS days_observed
|
||||
FROM foundation.fct_daily_availability da
|
||||
WHERE da.snapshot_date >= CURRENT_DATE - INTERVAL '30 days'
|
||||
WHERE TRY_CAST(da.snapshot_date AS DATE) >= CURRENT_DATE - INTERVAL '30 days'
|
||||
AND da.occupancy_rate IS NOT NULL
|
||||
AND da.occupancy_rate BETWEEN 0 AND 1.5
|
||||
GROUP BY da.tenant_id, da.country_code, da.city, da.price_currency
|
||||
|
||||
@@ -8,6 +8,10 @@
|
||||
--
|
||||
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
|
||||
--
|
||||
-- Requires: at least one availability file in the landing zone.
|
||||
-- A seed file (data/landing/playtomic/1970/01/availability_1970-01-01.json.gz)
|
||||
-- with empty venues[] ensures this model runs before real data arrives.
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/availability_{date}.json.gz
|
||||
-- Format: {date, captured_at_utc, venues: [{tenant_id, slots: [{resource_id, start_date, slots: [...]}]}]}
|
||||
|
||||
@@ -29,6 +33,8 @@ WITH raw_files AS (
|
||||
venues: 'JSON[]'
|
||||
}
|
||||
)
|
||||
WHERE venues IS NOT NULL
|
||||
AND json_array_length(venues) > 0
|
||||
),
|
||||
raw_venues AS (
|
||||
SELECT
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
-- Venue opening hours by day of week from Playtomic.
|
||||
-- Unpivots the opening_hours JSON object into one row per (tenant_id, day_of_week).
|
||||
-- Unpivots the opening_hours struct into one row per (tenant_id, day_of_week).
|
||||
-- Used downstream to calculate total weekly/daily capacity hours.
|
||||
--
|
||||
-- DuckDB auto-infers opening_hours as STRUCT, so we access each day by literal
|
||||
-- key (no dynamic access) and UNION ALL to unpivot.
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
|
||||
-- Each tenant has opening_hours: {MONDAY: {opening_time, closing_time}, ...}
|
||||
|
||||
@@ -13,30 +16,56 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH venues AS (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto'
|
||||
)
|
||||
),
|
||||
days AS (
|
||||
SELECT day_name, day_number FROM (VALUES
|
||||
('MONDAY', 1), ('TUESDAY', 2), ('WEDNESDAY', 3), ('THURSDAY', 4),
|
||||
('FRIDAY', 5), ('SATURDAY', 6), ('SUNDAY', 7)
|
||||
) AS t(day_name, day_number)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
d.day_name AS day_of_week,
|
||||
d.day_number,
|
||||
tenant -> 'opening_hours' -> d.day_name ->> 'opening_time' AS opening_time,
|
||||
tenant -> 'opening_hours' -> d.day_name ->> 'closing_time' AS closing_time
|
||||
FROM venues
|
||||
CROSS JOIN days d
|
||||
tenant ->> 'tenant_id' AS tenant_id,
|
||||
tenant -> 'opening_hours' AS oh
|
||||
FROM (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto',
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
)
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
AND (tenant -> 'opening_hours') IS NOT NULL
|
||||
AND (tenant -> 'opening_hours' -> d.day_name) IS NOT NULL
|
||||
),
|
||||
-- Unpivot by UNION ALL — 7 literal key accesses
|
||||
unpivoted AS (
|
||||
SELECT tenant_id, 'MONDAY' AS day_of_week, 1 AS day_number,
|
||||
oh -> 'MONDAY' ->> 'opening_time' AS opening_time,
|
||||
oh -> 'MONDAY' ->> 'closing_time' AS closing_time
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'TUESDAY' AS day_of_week, 2,
|
||||
oh -> 'TUESDAY' ->> 'opening_time',
|
||||
oh -> 'TUESDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'WEDNESDAY' AS day_of_week, 3,
|
||||
oh -> 'WEDNESDAY' ->> 'opening_time',
|
||||
oh -> 'WEDNESDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'THURSDAY' AS day_of_week, 4,
|
||||
oh -> 'THURSDAY' ->> 'opening_time',
|
||||
oh -> 'THURSDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'FRIDAY' AS day_of_week, 5,
|
||||
oh -> 'FRIDAY' ->> 'opening_time',
|
||||
oh -> 'FRIDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'SATURDAY' AS day_of_week, 6,
|
||||
oh -> 'SATURDAY' ->> 'opening_time',
|
||||
oh -> 'SATURDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
UNION ALL
|
||||
SELECT tenant_id, 'SUNDAY' AS day_of_week, 7,
|
||||
oh -> 'SUNDAY' ->> 'opening_time',
|
||||
oh -> 'SUNDAY' ->> 'closing_time'
|
||||
FROM venues
|
||||
)
|
||||
SELECT
|
||||
tenant_id,
|
||||
@@ -49,6 +78,6 @@ SELECT
|
||||
(EXTRACT(HOUR FROM closing_time::TIME) + EXTRACT(MINUTE FROM closing_time::TIME) / 60.0)
|
||||
- (EXTRACT(HOUR FROM opening_time::TIME) + EXTRACT(MINUTE FROM opening_time::TIME) / 60.0)
|
||||
, 2) AS hours_open
|
||||
FROM parsed
|
||||
FROM unpivoted
|
||||
WHERE opening_time IS NOT NULL
|
||||
AND closing_time IS NOT NULL
|
||||
|
||||
@@ -16,7 +16,8 @@ WITH raw AS (
|
||||
SELECT UNNEST(tenants) AS tenant
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto'
|
||||
format = 'auto',
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
),
|
||||
unnested AS (
|
||||
|
||||
@@ -56,7 +56,8 @@ WITH parsed AS (
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
|
||||
format = 'auto',
|
||||
filename = true
|
||||
filename = true,
|
||||
maximum_object_size = 134217728
|
||||
)
|
||||
)
|
||||
WHERE (tenant ->> 'tenant_id') IS NOT NULL
|
||||
|
||||
Reference in New Issue
Block a user