feat: Python supervisor + feature flags
Supervisor (replaces supervisor.sh): - supervisor.py — cron-based pipeline orchestration, reads workflows.toml on every tick, runs due extractors in topological waves with parallel execution, then SQLMesh transform + serving export - workflows.toml — workflow registry: overpass (monthly), eurostat (monthly), playtomic_tenants (weekly), playtomic_availability (daily), playtomic_recheck (hourly 6–23) - padelnomics-supervisor.service — updated ExecStart to Python supervisor Extraction enhancements: - proxy.py — optional round-robin/sticky proxy rotation via PROXY_URLS env - playtomic_availability.py — parallel fetch (EXTRACT_WORKERS), recheck mode (main_recheck) re-queries imminent slots for accurate occupancy measurement - _shared.py — realistic browser User-Agent on all extractor sessions - stg_playtomic_availability.sql — reads morning + recheck snapshots, tags each - fct_daily_availability.sql — prefers recheck over morning for same slot Feature flags (replaces WAITLIST_MODE env var): - migration 0019 — feature_flags table, 5 initial flags: markets (on), payments/planner_export/supplier_signup/lead_unlock (off) - core.py — is_flag_enabled() + feature_gate() decorator - routes — payments, markets, planner_export, supplier_signup, lead_unlock gated - admin flags UI — /admin/flags toggle page + nav link - app.py — flag() injected as Jinja2 global Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,6 +3,10 @@
|
||||
-- per-day statistics, then calculates occupancy by comparing available hours
|
||||
-- against total capacity from fct_venue_capacity.
|
||||
--
|
||||
-- Recheck-aware occupancy: for each (tenant, resource, slot_start_time),
|
||||
-- prefer the latest snapshot (recheck > morning). A slot present in morning
|
||||
-- but absent in the recheck = booked between snapshots → more accurate occupancy.
|
||||
--
|
||||
-- Occupancy = 1 - (available_court_hours / capacity_court_hours_per_day)
|
||||
-- Revenue estimate = booked_court_hours × avg_price_of_available_slots
|
||||
--
|
||||
@@ -15,14 +19,31 @@ MODEL (
|
||||
grain (snapshot_date, tenant_id)
|
||||
);
|
||||
|
||||
WITH slot_agg AS (
|
||||
-- Prefer the latest snapshot for each slot:
|
||||
-- If a recheck exists for a (date, tenant, resource, start_time), use it.
|
||||
-- Otherwise fall back to the morning snapshot.
|
||||
WITH ranked_slots AS (
|
||||
SELECT
|
||||
a.*,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY a.snapshot_date, a.tenant_id, a.resource_id, a.slot_start_time
|
||||
ORDER BY
|
||||
CASE WHEN a.snapshot_type = 'recheck' THEN 1 ELSE 2 END,
|
||||
a.captured_at_utc DESC
|
||||
) AS rn
|
||||
FROM staging.stg_playtomic_availability a
|
||||
WHERE a.price_amount IS NOT NULL
|
||||
AND a.price_amount > 0
|
||||
),
|
||||
latest_slots AS (
|
||||
SELECT * FROM ranked_slots WHERE rn = 1
|
||||
),
|
||||
slot_agg AS (
|
||||
SELECT
|
||||
a.snapshot_date,
|
||||
a.tenant_id,
|
||||
-- Slot counts: each row is one 60-min available slot on one court
|
||||
COUNT(*) AS available_slot_count,
|
||||
COUNT(DISTINCT a.resource_id) AS courts_with_availability,
|
||||
-- Available (unbooked) court-hours: slots are on 30-min increments for 60-min bookings
|
||||
-- Each available start_time represents a 60-min bookable window
|
||||
ROUND(COUNT(*) * 1.0, 2) AS available_court_hours,
|
||||
-- Pricing stats (60-min slots only)
|
||||
@@ -42,9 +63,7 @@ WITH slot_agg AS (
|
||||
), 2) AS median_price_offpeak,
|
||||
MAX(a.price_currency) AS price_currency,
|
||||
MAX(a.captured_at_utc) AS captured_at_utc
|
||||
FROM staging.stg_playtomic_availability a
|
||||
WHERE a.price_amount IS NOT NULL
|
||||
AND a.price_amount > 0
|
||||
FROM latest_slots a
|
||||
GROUP BY a.snapshot_date, a.tenant_id
|
||||
)
|
||||
SELECT
|
||||
|
||||
@@ -2,28 +2,30 @@
|
||||
-- One row per available 60-minute booking slot per court per venue per day.
|
||||
-- "Available" = the slot was NOT booked at capture time. Missing slots = booked.
|
||||
--
|
||||
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
|
||||
-- The API returns 60/90/120-min variants per start_time — filtering to 60 avoids
|
||||
-- double-counting the same time window.
|
||||
-- Reads BOTH morning snapshots and recheck files:
|
||||
-- Morning: availability_{date}.json.gz → snapshot_type = 'morning'
|
||||
-- Recheck: availability_{date}_recheck_{HH}.json.gz → snapshot_type = 'recheck'
|
||||
--
|
||||
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
|
||||
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
|
||||
--
|
||||
-- Requires: at least one availability file in the landing zone.
|
||||
-- A seed file (data/landing/playtomic/1970/01/availability_1970-01-01.json.gz)
|
||||
-- with empty venues[] ensures this model runs before real data arrives.
|
||||
--
|
||||
-- Source: data/landing/playtomic/{year}/{month}/availability_{date}.json.gz
|
||||
-- Format: {date, captured_at_utc, venues: [{tenant_id, slots: [{resource_id, start_date, slots: [...]}]}]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_playtomic_availability,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (snapshot_date, tenant_id, resource_id, slot_start_time)
|
||||
grain (snapshot_date, tenant_id, resource_id, slot_start_time, snapshot_type, captured_at_utc)
|
||||
);
|
||||
|
||||
WITH raw_files AS (
|
||||
SELECT *
|
||||
-- Morning snapshots (filename does NOT contain '_recheck_')
|
||||
WITH morning_files AS (
|
||||
SELECT
|
||||
*,
|
||||
'morning' AS snapshot_type,
|
||||
NULL::INTEGER AS recheck_hour
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/availability_*.json.gz',
|
||||
format = 'auto',
|
||||
@@ -31,24 +33,57 @@ WITH raw_files AS (
|
||||
date: 'VARCHAR',
|
||||
captured_at_utc: 'VARCHAR',
|
||||
venues: 'JSON[]'
|
||||
}
|
||||
},
|
||||
filename = true
|
||||
)
|
||||
WHERE filename NOT LIKE '%_recheck_%'
|
||||
AND venues IS NOT NULL
|
||||
AND json_array_length(venues) > 0
|
||||
),
|
||||
-- Recheck snapshots (filename contains '_recheck_')
|
||||
-- Use TRY_CAST on a regex-extracted hour to get the recheck_hour.
|
||||
-- If no recheck files exist yet, this CTE produces zero rows (safe).
|
||||
recheck_files AS (
|
||||
SELECT
|
||||
*,
|
||||
'recheck' AS snapshot_type,
|
||||
TRY_CAST(
|
||||
regexp_extract(filename, '_recheck_(\d+)', 1) AS INTEGER
|
||||
) AS recheck_hour
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/playtomic/*/*/availability_*_recheck_*.json.gz',
|
||||
format = 'auto',
|
||||
columns = {
|
||||
date: 'VARCHAR',
|
||||
captured_at_utc: 'VARCHAR',
|
||||
venues: 'JSON[]'
|
||||
},
|
||||
filename = true
|
||||
)
|
||||
WHERE venues IS NOT NULL
|
||||
AND json_array_length(venues) > 0
|
||||
),
|
||||
all_files AS (
|
||||
SELECT date, captured_at_utc, venues, snapshot_type, recheck_hour FROM morning_files
|
||||
UNION ALL
|
||||
SELECT date, captured_at_utc, venues, snapshot_type, recheck_hour FROM recheck_files
|
||||
),
|
||||
raw_venues AS (
|
||||
SELECT
|
||||
rf.date AS snapshot_date,
|
||||
rf.captured_at_utc,
|
||||
af.date AS snapshot_date,
|
||||
af.captured_at_utc,
|
||||
af.snapshot_type,
|
||||
af.recheck_hour,
|
||||
venue_json
|
||||
FROM raw_files rf,
|
||||
LATERAL UNNEST(rf.venues) AS t(venue_json)
|
||||
FROM all_files af,
|
||||
LATERAL UNNEST(af.venues) AS t(venue_json)
|
||||
),
|
||||
-- Each venue has: {tenant_id, slots: [{resource_id, start_date, slots: [...]}]}
|
||||
raw_resources AS (
|
||||
SELECT
|
||||
rv.snapshot_date,
|
||||
rv.captured_at_utc,
|
||||
rv.snapshot_type,
|
||||
rv.recheck_hour,
|
||||
rv.venue_json ->> 'tenant_id' AS tenant_id,
|
||||
resource_json
|
||||
FROM raw_venues rv,
|
||||
@@ -56,11 +91,12 @@ raw_resources AS (
|
||||
from_json(rv.venue_json -> 'slots', '["JSON"]')
|
||||
) AS t(resource_json)
|
||||
),
|
||||
-- Each resource has: {resource_id, start_date, slots: [{start_time, duration, price}]}
|
||||
raw_slots AS (
|
||||
SELECT
|
||||
rr.snapshot_date,
|
||||
rr.captured_at_utc,
|
||||
rr.snapshot_type,
|
||||
rr.recheck_hour,
|
||||
rr.tenant_id,
|
||||
rr.resource_json ->> 'resource_id' AS resource_id,
|
||||
slot_json
|
||||
@@ -75,12 +111,12 @@ SELECT
|
||||
resource_id,
|
||||
slot_json ->> 'start_time' AS slot_start_time,
|
||||
TRY_CAST(slot_json ->> 'duration' AS INTEGER) AS duration_minutes,
|
||||
-- Parse "14.56 EUR" → 14.56
|
||||
TRY_CAST(
|
||||
SPLIT_PART(slot_json ->> 'price', ' ', 1) AS DOUBLE
|
||||
) AS price_amount,
|
||||
-- Parse "14.56 EUR" → EUR
|
||||
SPLIT_PART(slot_json ->> 'price', ' ', 2) AS price_currency,
|
||||
snapshot_type,
|
||||
recheck_hour,
|
||||
captured_at_utc
|
||||
FROM raw_slots
|
||||
WHERE resource_id IS NOT NULL
|
||||
|
||||
Reference in New Issue
Block a user