Supervisor (replaces supervisor.sh): - supervisor.py — cron-based pipeline orchestration, reads workflows.toml on every tick, runs due extractors in topological waves with parallel execution, then SQLMesh transform + serving export - workflows.toml — workflow registry: overpass (monthly), eurostat (monthly), playtomic_tenants (weekly), playtomic_availability (daily), playtomic_recheck (hourly 6–23) - padelnomics-supervisor.service — updated ExecStart to Python supervisor Extraction enhancements: - proxy.py — optional round-robin/sticky proxy rotation via PROXY_URLS env - playtomic_availability.py — parallel fetch (EXTRACT_WORKERS), recheck mode (main_recheck) re-queries imminent slots for accurate occupancy measurement - _shared.py — realistic browser User-Agent on all extractor sessions - stg_playtomic_availability.sql — reads morning + recheck snapshots, tags each - fct_daily_availability.sql — prefers recheck over morning for same slot Feature flags (replaces WAITLIST_MODE env var): - migration 0019 — feature_flags table, 5 initial flags: markets (on), payments/planner_export/supplier_signup/lead_unlock (off) - core.py — is_flag_enabled() + feature_gate() decorator - routes — payments, markets, planner_export, supplier_signup, lead_unlock gated - admin flags UI — /admin/flags toggle page + nav link - app.py — flag() injected as Jinja2 global Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
110 lines
4.1 KiB
SQL
110 lines
4.1 KiB
SQL
-- Daily venue-level availability, pricing, occupancy, and revenue estimates.
|
||
-- Aggregates slot-level data from stg_playtomic_availability into per-venue
|
||
-- per-day statistics, then calculates occupancy by comparing available hours
|
||
-- against total capacity from fct_venue_capacity.
|
||
--
|
||
-- Recheck-aware occupancy: for each (tenant, resource, slot_start_time),
|
||
-- prefer the latest snapshot (recheck > morning). A slot present in morning
|
||
-- but absent in the recheck = booked between snapshots → more accurate occupancy.
|
||
--
|
||
-- Occupancy = 1 - (available_court_hours / capacity_court_hours_per_day)
|
||
-- Revenue estimate = booked_court_hours × avg_price_of_available_slots
|
||
--
|
||
-- Peak hours defined as 17:00–21:00 (captures main evening rush across markets).
|
||
|
||
MODEL (
|
||
name foundation.fct_daily_availability,
|
||
kind FULL,
|
||
cron '@daily',
|
||
grain (snapshot_date, tenant_id)
|
||
);
|
||
|
||
-- Prefer the latest snapshot for each slot:
|
||
-- If a recheck exists for a (date, tenant, resource, start_time), use it.
|
||
-- Otherwise fall back to the morning snapshot.
|
||
WITH ranked_slots AS (
|
||
SELECT
|
||
a.*,
|
||
ROW_NUMBER() OVER (
|
||
PARTITION BY a.snapshot_date, a.tenant_id, a.resource_id, a.slot_start_time
|
||
ORDER BY
|
||
CASE WHEN a.snapshot_type = 'recheck' THEN 1 ELSE 2 END,
|
||
a.captured_at_utc DESC
|
||
) AS rn
|
||
FROM staging.stg_playtomic_availability a
|
||
WHERE a.price_amount IS NOT NULL
|
||
AND a.price_amount > 0
|
||
),
|
||
latest_slots AS (
|
||
SELECT * FROM ranked_slots WHERE rn = 1
|
||
),
|
||
slot_agg AS (
|
||
SELECT
|
||
a.snapshot_date,
|
||
a.tenant_id,
|
||
COUNT(*) AS available_slot_count,
|
||
COUNT(DISTINCT a.resource_id) AS courts_with_availability,
|
||
-- Each available start_time represents a 60-min bookable window
|
||
ROUND(COUNT(*) * 1.0, 2) AS available_court_hours,
|
||
-- Pricing stats (60-min slots only)
|
||
ROUND(MEDIAN(a.price_amount), 2) AS median_price,
|
||
ROUND(AVG(a.price_amount), 2) AS avg_price,
|
||
MIN(a.price_amount) AS min_price,
|
||
MAX(a.price_amount) AS max_price,
|
||
-- Peak: 17:00–21:00
|
||
ROUND(MEDIAN(a.price_amount) FILTER (
|
||
WHERE a.slot_start_time::TIME >= '17:00:00'
|
||
AND a.slot_start_time::TIME < '21:00:00'
|
||
), 2) AS median_price_peak,
|
||
-- Off-peak: everything outside 17:00–21:00
|
||
ROUND(MEDIAN(a.price_amount) FILTER (
|
||
WHERE a.slot_start_time::TIME < '17:00:00'
|
||
OR a.slot_start_time::TIME >= '21:00:00'
|
||
), 2) AS median_price_offpeak,
|
||
MAX(a.price_currency) AS price_currency,
|
||
MAX(a.captured_at_utc) AS captured_at_utc
|
||
FROM latest_slots a
|
||
GROUP BY a.snapshot_date, a.tenant_id
|
||
)
|
||
SELECT
|
||
sa.snapshot_date,
|
||
sa.tenant_id,
|
||
cap.country_code,
|
||
cap.city,
|
||
cap.active_court_count,
|
||
cap.capacity_court_hours_per_day,
|
||
sa.available_slot_count,
|
||
sa.courts_with_availability,
|
||
sa.available_court_hours,
|
||
-- Occupancy: (capacity - available) / capacity
|
||
CASE
|
||
WHEN cap.capacity_court_hours_per_day > 0
|
||
THEN ROUND(
|
||
1.0 - (sa.available_court_hours / cap.capacity_court_hours_per_day),
|
||
4
|
||
)
|
||
ELSE NULL
|
||
END AS occupancy_rate,
|
||
-- Estimated booked court-hours
|
||
ROUND(
|
||
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0),
|
||
2
|
||
) AS booked_court_hours,
|
||
-- Estimated daily revenue: booked hours × avg price
|
||
ROUND(
|
||
GREATEST(cap.capacity_court_hours_per_day - sa.available_court_hours, 0)
|
||
* sa.avg_price,
|
||
2
|
||
) AS estimated_revenue_eur,
|
||
-- Pricing
|
||
sa.median_price,
|
||
sa.avg_price,
|
||
sa.min_price,
|
||
sa.max_price,
|
||
sa.median_price_peak,
|
||
sa.median_price_offpeak,
|
||
sa.price_currency,
|
||
sa.captured_at_utc
|
||
FROM slot_agg sa
|
||
JOIN foundation.fct_venue_capacity cap ON sa.tenant_id = cap.tenant_id
|