feat: Python supervisor + feature flags

Supervisor (replaces supervisor.sh):
- supervisor.py — cron-based pipeline orchestration, reads workflows.toml
  on every tick, runs due extractors in topological waves with parallel
  execution, then SQLMesh transform + serving export
- workflows.toml — workflow registry: overpass (monthly), eurostat (monthly),
  playtomic_tenants (weekly), playtomic_availability (daily),
  playtomic_recheck (hourly 6–23)
- padelnomics-supervisor.service — updated ExecStart to Python supervisor

Extraction enhancements:
- proxy.py — optional round-robin/sticky proxy rotation via PROXY_URLS env
- playtomic_availability.py — parallel fetch (EXTRACT_WORKERS), recheck mode
  (main_recheck) re-queries imminent slots for accurate occupancy measurement
- _shared.py — realistic browser User-Agent on all extractor sessions
- stg_playtomic_availability.sql — reads morning + recheck snapshots, tags each
- fct_daily_availability.sql — prefers recheck over morning for same slot

Feature flags (replaces WAITLIST_MODE env var):
- migration 0019 — feature_flags table, 5 initial flags:
  markets (on), payments/planner_export/supplier_signup/lead_unlock (off)
- core.py — is_flag_enabled() + feature_gate() decorator
- routes — payments, markets, planner_export, supplier_signup, lead_unlock gated
- admin flags UI — /admin/flags toggle page + nav link
- app.py — flag() injected as Jinja2 global

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-23 13:53:45 +01:00
parent 488e47b4b4
commit a1faddbed6
22 changed files with 1133 additions and 134 deletions

View File

@@ -3,6 +3,10 @@
-- per-day statistics, then calculates occupancy by comparing available hours
-- against total capacity from fct_venue_capacity.
--
-- Recheck-aware occupancy: for each (tenant, resource, slot_start_time),
-- prefer the latest snapshot (recheck > morning). A slot present in morning
-- but absent in the recheck = booked between snapshots → more accurate occupancy.
--
-- Occupancy = 1 - (available_court_hours / capacity_court_hours_per_day)
-- Revenue estimate = booked_court_hours × avg_price_of_available_slots
--
@@ -15,14 +19,31 @@ MODEL (
grain (snapshot_date, tenant_id)
);
WITH slot_agg AS (
-- Prefer the latest snapshot for each slot:
-- If a recheck exists for a (date, tenant, resource, start_time), use it.
-- Otherwise fall back to the morning snapshot.
WITH ranked_slots AS (
SELECT
a.*,
ROW_NUMBER() OVER (
PARTITION BY a.snapshot_date, a.tenant_id, a.resource_id, a.slot_start_time
ORDER BY
CASE WHEN a.snapshot_type = 'recheck' THEN 1 ELSE 2 END,
a.captured_at_utc DESC
) AS rn
FROM staging.stg_playtomic_availability a
WHERE a.price_amount IS NOT NULL
AND a.price_amount > 0
),
latest_slots AS (
SELECT * FROM ranked_slots WHERE rn = 1
),
slot_agg AS (
SELECT
a.snapshot_date,
a.tenant_id,
-- Slot counts: each row is one 60-min available slot on one court
COUNT(*) AS available_slot_count,
COUNT(DISTINCT a.resource_id) AS courts_with_availability,
-- Available (unbooked) court-hours: slots are on 30-min increments for 60-min bookings
-- Each available start_time represents a 60-min bookable window
ROUND(COUNT(*) * 1.0, 2) AS available_court_hours,
-- Pricing stats (60-min slots only)
@@ -42,9 +63,7 @@ WITH slot_agg AS (
), 2) AS median_price_offpeak,
MAX(a.price_currency) AS price_currency,
MAX(a.captured_at_utc) AS captured_at_utc
FROM staging.stg_playtomic_availability a
WHERE a.price_amount IS NOT NULL
AND a.price_amount > 0
FROM latest_slots a
GROUP BY a.snapshot_date, a.tenant_id
)
SELECT

View File

@@ -2,28 +2,30 @@
-- One row per available 60-minute booking slot per court per venue per day.
-- "Available" = the slot was NOT booked at capture time. Missing slots = booked.
--
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
-- The API returns 60/90/120-min variants per start_time — filtering to 60 avoids
-- double-counting the same time window.
-- Reads BOTH morning snapshots and recheck files:
-- Morning: availability_{date}.json.gz → snapshot_type = 'morning'
-- Recheck: availability_{date}_recheck_{HH}.json.gz → snapshot_type = 'recheck'
--
-- Only 60-min duration slots are kept (canonical hourly rate + occupancy unit).
-- Price parsed from strings like "14.56 EUR" or "48 GBP".
--
-- Requires: at least one availability file in the landing zone.
-- A seed file (data/landing/playtomic/1970/01/availability_1970-01-01.json.gz)
-- with empty venues[] ensures this model runs before real data arrives.
--
-- Source: data/landing/playtomic/{year}/{month}/availability_{date}.json.gz
-- Format: {date, captured_at_utc, venues: [{tenant_id, slots: [{resource_id, start_date, slots: [...]}]}]}
MODEL (
name staging.stg_playtomic_availability,
kind FULL,
cron '@daily',
grain (snapshot_date, tenant_id, resource_id, slot_start_time)
grain (snapshot_date, tenant_id, resource_id, slot_start_time, snapshot_type, captured_at_utc)
);
WITH raw_files AS (
SELECT *
-- Morning snapshots (filename does NOT contain '_recheck_')
WITH morning_files AS (
SELECT
*,
'morning' AS snapshot_type,
NULL::INTEGER AS recheck_hour
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/availability_*.json.gz',
format = 'auto',
@@ -31,24 +33,57 @@ WITH raw_files AS (
date: 'VARCHAR',
captured_at_utc: 'VARCHAR',
venues: 'JSON[]'
}
},
filename = true
)
WHERE filename NOT LIKE '%_recheck_%'
AND venues IS NOT NULL
AND json_array_length(venues) > 0
),
-- Recheck snapshots (filename contains '_recheck_')
-- Use TRY_CAST on a regex-extracted hour to get the recheck_hour.
-- If no recheck files exist yet, this CTE produces zero rows (safe).
recheck_files AS (
SELECT
*,
'recheck' AS snapshot_type,
TRY_CAST(
regexp_extract(filename, '_recheck_(\d+)', 1) AS INTEGER
) AS recheck_hour
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/availability_*_recheck_*.json.gz',
format = 'auto',
columns = {
date: 'VARCHAR',
captured_at_utc: 'VARCHAR',
venues: 'JSON[]'
},
filename = true
)
WHERE venues IS NOT NULL
AND json_array_length(venues) > 0
),
all_files AS (
SELECT date, captured_at_utc, venues, snapshot_type, recheck_hour FROM morning_files
UNION ALL
SELECT date, captured_at_utc, venues, snapshot_type, recheck_hour FROM recheck_files
),
raw_venues AS (
SELECT
rf.date AS snapshot_date,
rf.captured_at_utc,
af.date AS snapshot_date,
af.captured_at_utc,
af.snapshot_type,
af.recheck_hour,
venue_json
FROM raw_files rf,
LATERAL UNNEST(rf.venues) AS t(venue_json)
FROM all_files af,
LATERAL UNNEST(af.venues) AS t(venue_json)
),
-- Each venue has: {tenant_id, slots: [{resource_id, start_date, slots: [...]}]}
raw_resources AS (
SELECT
rv.snapshot_date,
rv.captured_at_utc,
rv.snapshot_type,
rv.recheck_hour,
rv.venue_json ->> 'tenant_id' AS tenant_id,
resource_json
FROM raw_venues rv,
@@ -56,11 +91,12 @@ raw_resources AS (
from_json(rv.venue_json -> 'slots', '["JSON"]')
) AS t(resource_json)
),
-- Each resource has: {resource_id, start_date, slots: [{start_time, duration, price}]}
raw_slots AS (
SELECT
rr.snapshot_date,
rr.captured_at_utc,
rr.snapshot_type,
rr.recheck_hour,
rr.tenant_id,
rr.resource_json ->> 'resource_id' AS resource_id,
slot_json
@@ -75,12 +111,12 @@ SELECT
resource_id,
slot_json ->> 'start_time' AS slot_start_time,
TRY_CAST(slot_json ->> 'duration' AS INTEGER) AS duration_minutes,
-- Parse "14.56 EUR" → 14.56
TRY_CAST(
SPLIT_PART(slot_json ->> 'price', ' ', 1) AS DOUBLE
) AS price_amount,
-- Parse "14.56 EUR" → EUR
SPLIT_PART(slot_json ->> 'price', ' ', 2) AS price_currency,
snapshot_type,
recheck_hour,
captured_at_utc
FROM raw_slots
WHERE resource_id IS NOT NULL