Compare commits
7 Commits
v202603051
...
v202603051
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8205744444 | ||
|
|
1cbefe349c | ||
|
|
003f19e071 | ||
|
|
c3f15535b8 | ||
|
|
fcb8ec4227 | ||
|
|
6b7fa45bce | ||
|
|
0d8687859d |
@@ -3,6 +3,7 @@ APP_NAME=ENC[AES256_GCM,data:ldJf4P0iD9ziMVg=,iv:hiVl2whhd02yZCafzBfbxX5/EU/suvz
|
|||||||
SECRET_KEY=ENC[AES256_GCM,data:hmlXm7NKVVFmeea4DnlrH/oSnsoaMAkUz42oWwFXOXL1XwAh3iemIKHUQOV2G4SPlmjfmEVQD64xbxaJW0OcPQ/8KqhrRYDsy0F/u0h7nmNQdwJrcvzcmbvjgcwU5IITPIr23d/W5PeSJzxhB93uaJ0+zFN2CyHfeewrJKafPfw=,iv:e+ZSLUO+dlt+ET8r/0/pf74UtGIBMkaVoJMWlJn1W5U=,tag:LdDCCrHcJnKLkKL/cY/R/Q==,type:str]
|
SECRET_KEY=ENC[AES256_GCM,data:hmlXm7NKVVFmeea4DnlrH/oSnsoaMAkUz42oWwFXOXL1XwAh3iemIKHUQOV2G4SPlmjfmEVQD64xbxaJW0OcPQ/8KqhrRYDsy0F/u0h7nmNQdwJrcvzcmbvjgcwU5IITPIr23d/W5PeSJzxhB93uaJ0+zFN2CyHfeewrJKafPfw=,iv:e+ZSLUO+dlt+ET8r/0/pf74UtGIBMkaVoJMWlJn1W5U=,tag:LdDCCrHcJnKLkKL/cY/R/Q==,type:str]
|
||||||
BASE_URL=ENC[AES256_GCM,data:50k/RqlZ1EHqGM4UkSmTaCsuJgyU4w==,iv:f8zKr2jkts4RsawA97hzICHwj9Quzgp+Dw8AhQ7GSWA=,tag:9KhNvwmoOtDyuIql7okeew==,type:str]
|
BASE_URL=ENC[AES256_GCM,data:50k/RqlZ1EHqGM4UkSmTaCsuJgyU4w==,iv:f8zKr2jkts4RsawA97hzICHwj9Quzgp+Dw8AhQ7GSWA=,tag:9KhNvwmoOtDyuIql7okeew==,type:str]
|
||||||
DEBUG=ENC[AES256_GCM,data:O0/uRF4=,iv:cZ+vyUuXjQOYYRf4l8lWS3JIWqL/w3pnlCTDPAZpB1E=,tag:OmJE9oJpzYzth0xwaMqADQ==,type:str]
|
DEBUG=ENC[AES256_GCM,data:O0/uRF4=,iv:cZ+vyUuXjQOYYRf4l8lWS3JIWqL/w3pnlCTDPAZpB1E=,tag:OmJE9oJpzYzth0xwaMqADQ==,type:str]
|
||||||
|
LANDING_DIR=ENC[AES256_GCM,data:rn8u+tGob0vU7kSAtxmrpYQlneesvyO10A==,iv:PuGtdcQBdRbnybulzd6L7JVQClcK3/QjMeYFXZSxGW0=,tag:K2PJPMCWXdqTlQpwP9+DOQ==,type:str]
|
||||||
#ENC[AES256_GCM,data:xmJc6WTb3yumHzvLeA==,iv:9jKuYaDgm4zR/DTswIMwsajV0s5UTe+AOX4Sue0GPCs=,tag:b/7H9js1HmFYjuQE4zJz8w==,type:comment]
|
#ENC[AES256_GCM,data:xmJc6WTb3yumHzvLeA==,iv:9jKuYaDgm4zR/DTswIMwsajV0s5UTe+AOX4Sue0GPCs=,tag:b/7H9js1HmFYjuQE4zJz8w==,type:comment]
|
||||||
ADMIN_EMAILS=ENC[AES256_GCM,data:R/2YTk8KDEpNQ71RN8Fm6miLZvXNJQ==,iv:kzmiaBK7KvnSjR5gx6lp7zEMzs5xRul6LBhmLf48bCU=,tag:csVZ0W1TxBAoJacQurW9VQ==,type:str]
|
ADMIN_EMAILS=ENC[AES256_GCM,data:R/2YTk8KDEpNQ71RN8Fm6miLZvXNJQ==,iv:kzmiaBK7KvnSjR5gx6lp7zEMzs5xRul6LBhmLf48bCU=,tag:csVZ0W1TxBAoJacQurW9VQ==,type:str]
|
||||||
#ENC[AES256_GCM,data:S7Pdg9tcom3N,iv:OjmYk3pqbZHKPS1Y06w1y8BE7CU0y6Vx2wnio9tEhus=,tag:YAOGbrHQ+UOcdSQFWdiCDA==,type:comment]
|
#ENC[AES256_GCM,data:S7Pdg9tcom3N,iv:OjmYk3pqbZHKPS1Y06w1y8BE7CU0y6Vx2wnio9tEhus=,tag:YAOGbrHQ+UOcdSQFWdiCDA==,type:comment]
|
||||||
@@ -63,7 +64,7 @@ sops_age__list_1__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb2
|
|||||||
sops_age__list_1__map_recipient=age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
sops_age__list_1__map_recipient=age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
||||||
sops_age__list_2__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFeHhaOURNZnRVMEwxNThu\nUjF4Q0kwUXhTUE1QSzZJbmpubnh3RnpQTmdvCjRmWWxpNkxFUmVGb3NRbnlydW5O\nWEg3ZXJQTU4vcndzS2pUQXY3Q0ttYjAKLS0tIE9IRFJ1c2ZxbGVHa2xTL0swbGN1\nTzgwMThPUDRFTWhuZHJjZUYxOTZrU00KY62qrNBCUQYxwcLMXFEnLkwncxq3BPJB\nKm4NzeHBU87XmPWVrgrKuf+PH1mxJlBsl7Hev8xBTy7l6feiZjLIvQ==\n-----END AGE ENCRYPTED FILE-----\n
|
sops_age__list_2__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFeHhaOURNZnRVMEwxNThu\nUjF4Q0kwUXhTUE1QSzZJbmpubnh3RnpQTmdvCjRmWWxpNkxFUmVGb3NRbnlydW5O\nWEg3ZXJQTU4vcndzS2pUQXY3Q0ttYjAKLS0tIE9IRFJ1c2ZxbGVHa2xTL0swbGN1\nTzgwMThPUDRFTWhuZHJjZUYxOTZrU00KY62qrNBCUQYxwcLMXFEnLkwncxq3BPJB\nKm4NzeHBU87XmPWVrgrKuf+PH1mxJlBsl7Hev8xBTy7l6feiZjLIvQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||||
sops_age__list_2__map_recipient=age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
sops_age__list_2__map_recipient=age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
||||||
sops_lastmodified=2026-03-01T20:26:09Z
|
sops_lastmodified=2026-03-05T15:55:19Z
|
||||||
sops_mac=ENC[AES256_GCM,data:IxzU6VehA0iHgpIEqDSoMywKyKONI6jSr/6Amo+g3JI72awJtk6ft0ppfDWZjeHhL0ixfnvgqMNwai+1e0V/U8hSP8/FqYKEVpAO0UGJfBPKP3pbw+tx3WJQMF5dIh2/UVNrKvoACZq0IDJfXlVqalCnRMQEHGtKVTIT3fn8m6c=,iv:0w0ohOBsqTzuoQdtt6AI5ZdHEKw9+hI73tycBjDSS0o=,tag:Guw7LweA4m4Nw+3kSuZKWA==,type:str]
|
sops_mac=ENC[AES256_GCM,data:orLypjurBTYmk3um0bDQV3wFxj1pjCsjOf2D+AZyoIYY88MeY8BjK8mg8BWhmJYlGWqHH1FCpoJS+2SECv2Bvgejqvx/C/HSysA8et5CArM/p/MBbcupLAKOD8bTXorKMRDYPkWpK/snkPToxIZZd7dNj/zSU+OhRp5qLGCHkvM=,iv:eBn93z4DSk8UPHgP/Jf/Kz+3KwoKIQ9Et72pbLFcLP8=,tag:79kzPIKp0rtHGhH1CkXqwg==,type:str]
|
||||||
sops_unencrypted_suffix=_unencrypted
|
sops_unencrypted_suffix=_unencrypted
|
||||||
sops_version=3.12.1
|
sops_version=3.12.1
|
||||||
|
|||||||
@@ -6,7 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- **Pipeline diagnostic script** (`scripts/check_pipeline.py`) — handle DuckDB catalog naming quirk where `lakehouse.duckdb` uses catalog `lakehouse` instead of `local`, causing SQLMesh logical views to break. Script now auto-detects the catalog via `USE`, and falls back to querying physical tables (`sqlmesh__<schema>.<table>__<hash>`) when views fail.
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
- **Pipeline diagnostic script** (`scripts/check_pipeline.py`) — read-only script that reports row counts at every layer of the pricing pipeline (staging → foundation → serving), date range analysis, HAVING filter impact, and join coverage. Run on prod to diagnose empty serving tables.
|
||||||
|
- **Extraction card descriptions** — each workflow card on the admin pipeline page now shows a one-line description explaining what the data source is (e.g. "EU geographic boundaries (NUTS2 polygons) from Eurostat GISCO"). Descriptions defined in `workflows.toml`.
|
||||||
|
- **Running state indicator** — extraction cards show a spinner + "Running" label with a blue-tinted border when an extraction is actively running, replacing the plain Run button. Cards also display the start time with "running..." text.
|
||||||
|
|
||||||
- **Interactive Leaflet maps** — geographic visualization across 4 key placements using self-hosted Leaflet 1.9.4 (GDPR-safe, no CDN):
|
- **Interactive Leaflet maps** — geographic visualization across 4 key placements using self-hosted Leaflet 1.9.4 (GDPR-safe, no CDN):
|
||||||
- **Markets hub** (`/markets`): country bubble map with circles sized by total venues, colored by avg market score (green ≥ 60, amber 30-60, red < 30). Click navigates to country overview.
|
- **Markets hub** (`/markets`): country bubble map with circles sized by total venues, colored by avg market score (green ≥ 60, amber 30-60, red < 30). Click navigates to country overview.
|
||||||
- **Country overview articles**: city bubble map loads after article render, auto-fits bounds, click navigates to city page. Bubbles colored by market score.
|
- **Country overview articles**: city bubble map loads after article render, auto-fits bounds, click navigates to city page. Bubbles colored by market score.
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ WORKDIR /app
|
|||||||
RUN mkdir -p /app/data && chown -R appuser:appuser /app
|
RUN mkdir -p /app/data && chown -R appuser:appuser /app
|
||||||
COPY --from=build --chown=appuser:appuser /app .
|
COPY --from=build --chown=appuser:appuser /app .
|
||||||
COPY --from=css-build /app/web/src/padelnomics/static/css/output.css ./web/src/padelnomics/static/css/output.css
|
COPY --from=css-build /app/web/src/padelnomics/static/css/output.css ./web/src/padelnomics/static/css/output.css
|
||||||
|
COPY --chown=appuser:appuser infra/supervisor/workflows.toml ./infra/supervisor/workflows.toml
|
||||||
USER appuser
|
USER appuser
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
ENV DATABASE_PATH=/app/data/app.db
|
ENV DATABASE_PATH=/app/data/app.db
|
||||||
|
|||||||
@@ -8,54 +8,67 @@
|
|||||||
# entry — optional: function name if not "main" (default: "main")
|
# entry — optional: function name if not "main" (default: "main")
|
||||||
# depends_on — optional: list of workflow names that must run first
|
# depends_on — optional: list of workflow names that must run first
|
||||||
# proxy_mode — optional: "round-robin" (default) or "sticky"
|
# proxy_mode — optional: "round-robin" (default) or "sticky"
|
||||||
|
# description — optional: human-readable one-liner shown in the admin UI
|
||||||
|
|
||||||
[overpass]
|
[overpass]
|
||||||
module = "padelnomics_extract.overpass"
|
module = "padelnomics_extract.overpass"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "Padel court locations from OpenStreetMap via Overpass API"
|
||||||
|
|
||||||
[overpass_tennis]
|
[overpass_tennis]
|
||||||
module = "padelnomics_extract.overpass_tennis"
|
module = "padelnomics_extract.overpass_tennis"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "Tennis court locations from OpenStreetMap via Overpass API"
|
||||||
|
|
||||||
[eurostat]
|
[eurostat]
|
||||||
module = "padelnomics_extract.eurostat"
|
module = "padelnomics_extract.eurostat"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "City population data from Eurostat Urban Audit"
|
||||||
|
|
||||||
[geonames]
|
[geonames]
|
||||||
module = "padelnomics_extract.geonames"
|
module = "padelnomics_extract.geonames"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "Global city/town gazetteer from GeoNames (pop >= 1K)"
|
||||||
|
|
||||||
[playtomic_tenants]
|
[playtomic_tenants]
|
||||||
module = "padelnomics_extract.playtomic_tenants"
|
module = "padelnomics_extract.playtomic_tenants"
|
||||||
schedule = "daily"
|
schedule = "daily"
|
||||||
|
description = "Padel venue directory from Playtomic (names, locations, courts)"
|
||||||
|
|
||||||
[playtomic_availability]
|
[playtomic_availability]
|
||||||
module = "padelnomics_extract.playtomic_availability"
|
module = "padelnomics_extract.playtomic_availability"
|
||||||
schedule = "daily"
|
schedule = "daily"
|
||||||
depends_on = ["playtomic_tenants"]
|
depends_on = ["playtomic_tenants"]
|
||||||
|
description = "Morning availability snapshots — slot-level pricing per venue"
|
||||||
|
|
||||||
[playtomic_recheck]
|
[playtomic_recheck]
|
||||||
module = "padelnomics_extract.playtomic_availability"
|
module = "padelnomics_extract.playtomic_availability"
|
||||||
entry = "main_recheck"
|
entry = "main_recheck"
|
||||||
schedule = "0,30 6-23 * * *"
|
schedule = "0,30 6-23 * * *"
|
||||||
depends_on = ["playtomic_availability"]
|
depends_on = ["playtomic_availability"]
|
||||||
|
description = "Intraday availability rechecks for occupancy tracking"
|
||||||
|
|
||||||
[census_usa]
|
[census_usa]
|
||||||
module = "padelnomics_extract.census_usa"
|
module = "padelnomics_extract.census_usa"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "US city/place population from Census Bureau ACS"
|
||||||
|
|
||||||
[census_usa_income]
|
[census_usa_income]
|
||||||
module = "padelnomics_extract.census_usa_income"
|
module = "padelnomics_extract.census_usa_income"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "US county median household income from Census Bureau ACS"
|
||||||
|
|
||||||
[eurostat_city_labels]
|
[eurostat_city_labels]
|
||||||
module = "padelnomics_extract.eurostat_city_labels"
|
module = "padelnomics_extract.eurostat_city_labels"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "City code-to-name mapping for Eurostat Urban Audit cities"
|
||||||
|
|
||||||
[ons_uk]
|
[ons_uk]
|
||||||
module = "padelnomics_extract.ons_uk"
|
module = "padelnomics_extract.ons_uk"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "UK local authority population estimates from ONS"
|
||||||
|
|
||||||
[gisco]
|
[gisco]
|
||||||
module = "padelnomics_extract.gisco"
|
module = "padelnomics_extract.gisco"
|
||||||
schedule = "monthly"
|
schedule = "monthly"
|
||||||
|
description = "EU geographic boundaries (NUTS2 polygons) from Eurostat GISCO"
|
||||||
|
|||||||
290
scripts/check_pipeline.py
Normal file
290
scripts/check_pipeline.py
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
"""
|
||||||
|
Diagnostic script: check row counts at every layer of the pricing pipeline.
|
||||||
|
|
||||||
|
Run on prod via SSH:
|
||||||
|
DUCKDB_PATH=/opt/padelnomics/data/lakehouse.duckdb uv run python scripts/check_pipeline.py
|
||||||
|
|
||||||
|
Or locally:
|
||||||
|
DUCKDB_PATH=data/lakehouse.duckdb uv run python scripts/check_pipeline.py
|
||||||
|
|
||||||
|
Read-only — never writes to the database.
|
||||||
|
|
||||||
|
Handles the DuckDB catalog naming quirk: when the file is named lakehouse.duckdb,
|
||||||
|
the catalog is "lakehouse" not "local". SQLMesh views may reference the wrong catalog,
|
||||||
|
so we fall back to querying physical tables (sqlmesh__<schema>.<table>__<hash>).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import duckdb
|
||||||
|
|
||||||
|
DUCKDB_PATH = os.environ.get("DUCKDB_PATH", "data/lakehouse.duckdb")
|
||||||
|
|
||||||
|
PIPELINE_TABLES = [
|
||||||
|
("staging", "stg_playtomic_availability"),
|
||||||
|
("foundation", "fct_availability_slot"),
|
||||||
|
("foundation", "dim_venue_capacity"),
|
||||||
|
("foundation", "fct_daily_availability"),
|
||||||
|
("serving", "venue_pricing_benchmarks"),
|
||||||
|
("serving", "pseo_city_pricing"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _use_catalog(con):
|
||||||
|
"""Detect and USE the database catalog so schema-qualified queries work."""
|
||||||
|
catalogs = [
|
||||||
|
row[0]
|
||||||
|
for row in con.execute(
|
||||||
|
"SELECT catalog_name FROM information_schema.schemata"
|
||||||
|
).fetchall()
|
||||||
|
]
|
||||||
|
# Pick the non-system catalog (not 'system', 'temp', 'memory')
|
||||||
|
user_catalogs = [c for c in set(catalogs) if c not in ("system", "temp", "memory")]
|
||||||
|
if user_catalogs:
|
||||||
|
catalog = user_catalogs[0]
|
||||||
|
con.execute(f"USE {catalog}")
|
||||||
|
return catalog
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _find_physical_table(con, schema, table):
|
||||||
|
"""Find the SQLMesh physical table name for a logical table.
|
||||||
|
|
||||||
|
SQLMesh stores physical tables as:
|
||||||
|
sqlmesh__<schema>.<schema>__<table>__<hash>
|
||||||
|
"""
|
||||||
|
sqlmesh_schema = f"sqlmesh__{schema}"
|
||||||
|
try:
|
||||||
|
rows = con.execute(
|
||||||
|
"SELECT table_schema, table_name "
|
||||||
|
"FROM information_schema.tables "
|
||||||
|
f"WHERE table_schema = '{sqlmesh_schema}' "
|
||||||
|
f"AND table_name LIKE '{schema}__{table}%' "
|
||||||
|
"ORDER BY table_name "
|
||||||
|
"LIMIT 1"
|
||||||
|
).fetchall()
|
||||||
|
if rows:
|
||||||
|
return f"{rows[0][0]}.{rows[0][1]}"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _query_table(con, schema, table):
|
||||||
|
"""Try logical view first, fall back to physical table. Returns (fqn, count) or (fqn, error_str)."""
|
||||||
|
logical = f"{schema}.{table}"
|
||||||
|
try:
|
||||||
|
(count,) = con.execute(f"SELECT COUNT(*) FROM {logical}").fetchone()
|
||||||
|
return logical, count
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
physical = _find_physical_table(con, schema, table)
|
||||||
|
if physical:
|
||||||
|
try:
|
||||||
|
(count,) = con.execute(f"SELECT COUNT(*) FROM {physical}").fetchone()
|
||||||
|
return f"{physical} (physical)", count
|
||||||
|
except Exception as e:
|
||||||
|
return f"{physical} (physical)", f"ERROR: {e}"
|
||||||
|
|
||||||
|
return logical, "ERROR: view broken, no physical table found"
|
||||||
|
|
||||||
|
|
||||||
|
def _query_sql(con, sql, schema_tables):
|
||||||
|
"""Execute SQL, falling back to rewritten SQL using physical table names if views fail.
|
||||||
|
|
||||||
|
schema_tables: list of (schema, table) tuples used in the SQL, in order of appearance.
|
||||||
|
The SQL must use {schema}.{table} format for these references.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return con.execute(sql)
|
||||||
|
except Exception:
|
||||||
|
# Rewrite SQL to use physical table names
|
||||||
|
rewritten = sql
|
||||||
|
for schema, table in schema_tables:
|
||||||
|
physical = _find_physical_table(con, schema, table)
|
||||||
|
if physical:
|
||||||
|
rewritten = rewritten.replace(f"{schema}.{table}", physical)
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return con.execute(rewritten)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if not os.path.exists(DUCKDB_PATH):
|
||||||
|
print(f"ERROR: {DUCKDB_PATH} not found")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
con = duckdb.connect(DUCKDB_PATH, read_only=True)
|
||||||
|
|
||||||
|
print(f"Database: {DUCKDB_PATH}")
|
||||||
|
print(f"DuckDB version: {con.execute('SELECT version()').fetchone()[0]}")
|
||||||
|
|
||||||
|
catalog = _use_catalog(con)
|
||||||
|
if catalog:
|
||||||
|
print(f"Catalog: {catalog}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ── Row counts at each layer ──────────────────────────────────────────
|
||||||
|
print("=" * 60)
|
||||||
|
print("PIPELINE ROW COUNTS")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
for schema, table in PIPELINE_TABLES:
|
||||||
|
fqn, result = _query_table(con, schema, table)
|
||||||
|
if isinstance(result, int):
|
||||||
|
print(f" {fqn:55s} {result:>10,} rows")
|
||||||
|
else:
|
||||||
|
print(f" {fqn:55s} {result}")
|
||||||
|
|
||||||
|
# ── Date range in fct_daily_availability ──────────────────────────────
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("DATE RANGE: fct_daily_availability")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
row = _query_sql(
|
||||||
|
con,
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
MIN(snapshot_date) AS min_date,
|
||||||
|
MAX(snapshot_date) AS max_date,
|
||||||
|
COUNT(DISTINCT snapshot_date) AS distinct_days,
|
||||||
|
CURRENT_DATE AS today,
|
||||||
|
CURRENT_DATE - INTERVAL '30 days' AS window_start
|
||||||
|
FROM foundation.fct_daily_availability
|
||||||
|
""",
|
||||||
|
[("foundation", "fct_daily_availability")],
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
min_date, max_date, days, today, window_start = row
|
||||||
|
print(f" Min snapshot_date: {min_date}")
|
||||||
|
print(f" Max snapshot_date: {max_date}")
|
||||||
|
print(f" Distinct days: {days}")
|
||||||
|
print(f" Today: {today}")
|
||||||
|
print(f" 30-day window start: {window_start}")
|
||||||
|
if max_date and str(max_date) < str(window_start):
|
||||||
|
print()
|
||||||
|
print(" *** ALL DATA IS OUTSIDE THE 30-DAY WINDOW ***")
|
||||||
|
print(" This is why venue_pricing_benchmarks is empty.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: {e}")
|
||||||
|
|
||||||
|
# ── HAVING filter impact in venue_pricing_benchmarks ──────────────────
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("HAVING FILTER IMPACT (venue_pricing_benchmarks)")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
row = _query_sql(
|
||||||
|
con,
|
||||||
|
"""
|
||||||
|
WITH venue_stats AS (
|
||||||
|
SELECT
|
||||||
|
da.tenant_id,
|
||||||
|
da.country_code,
|
||||||
|
da.city,
|
||||||
|
COUNT(DISTINCT da.snapshot_date) AS days_observed
|
||||||
|
FROM foundation.fct_daily_availability da
|
||||||
|
WHERE TRY_CAST(da.snapshot_date AS DATE) >= CURRENT_DATE - INTERVAL '30 days'
|
||||||
|
AND da.occupancy_rate IS NOT NULL
|
||||||
|
AND da.occupancy_rate BETWEEN 0 AND 1.5
|
||||||
|
GROUP BY da.tenant_id, da.country_code, da.city
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
COUNT(*) AS total_venues,
|
||||||
|
COUNT(*) FILTER (WHERE days_observed >= 3) AS venues_passing_having,
|
||||||
|
COUNT(*) FILTER (WHERE days_observed < 3) AS venues_failing_having,
|
||||||
|
MAX(days_observed) AS max_days,
|
||||||
|
MIN(days_observed) AS min_days
|
||||||
|
FROM venue_stats
|
||||||
|
""",
|
||||||
|
[("foundation", "fct_daily_availability")],
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
total, passing, failing, max_d, min_d = row
|
||||||
|
print(f" Venues in 30-day window: {total}")
|
||||||
|
print(f" Venues with >= 3 days (PASSING): {passing}")
|
||||||
|
print(f" Venues with < 3 days (FILTERED): {failing}")
|
||||||
|
print(f" Max days observed: {max_d}")
|
||||||
|
print(f" Min days observed: {min_d}")
|
||||||
|
if total == 0:
|
||||||
|
print()
|
||||||
|
print(" *** NO VENUES IN 30-DAY WINDOW — check fct_daily_availability dates ***")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: {e}")
|
||||||
|
|
||||||
|
# ── Occupancy rate distribution ───────────────────────────────────────
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("OCCUPANCY RATE DISTRIBUTION (fct_daily_availability)")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
rows = _query_sql(
|
||||||
|
con,
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN occupancy_rate IS NULL THEN 'NULL'
|
||||||
|
WHEN occupancy_rate < 0 THEN '< 0 (invalid)'
|
||||||
|
WHEN occupancy_rate > 1.5 THEN '> 1.5 (filtered)'
|
||||||
|
WHEN occupancy_rate <= 0.25 THEN '0 – 0.25'
|
||||||
|
WHEN occupancy_rate <= 0.50 THEN '0.25 – 0.50'
|
||||||
|
WHEN occupancy_rate <= 0.75 THEN '0.50 – 0.75'
|
||||||
|
ELSE '0.75 – 1.0+'
|
||||||
|
END AS bucket,
|
||||||
|
COUNT(*) AS cnt
|
||||||
|
FROM foundation.fct_daily_availability
|
||||||
|
GROUP BY 1
|
||||||
|
ORDER BY 1
|
||||||
|
""",
|
||||||
|
[("foundation", "fct_daily_availability")],
|
||||||
|
).fetchall()
|
||||||
|
for bucket, cnt in rows:
|
||||||
|
print(f" {bucket:25s} {cnt:>10,}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: {e}")
|
||||||
|
|
||||||
|
# ── dim_venue_capacity join coverage ──────────────────────────────────
|
||||||
|
print()
|
||||||
|
print("=" * 60)
|
||||||
|
print("JOIN COVERAGE: fct_availability_slot → dim_venue_capacity")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
try:
|
||||||
|
row = _query_sql(
|
||||||
|
con,
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
COUNT(DISTINCT a.tenant_id) AS slot_tenants,
|
||||||
|
COUNT(DISTINCT c.tenant_id) AS capacity_tenants,
|
||||||
|
COUNT(DISTINCT a.tenant_id) - COUNT(DISTINCT c.tenant_id) AS missing_capacity
|
||||||
|
FROM foundation.fct_availability_slot a
|
||||||
|
LEFT JOIN foundation.dim_venue_capacity c ON a.tenant_id = c.tenant_id
|
||||||
|
""",
|
||||||
|
[
|
||||||
|
("foundation", "fct_availability_slot"),
|
||||||
|
("foundation", "dim_venue_capacity"),
|
||||||
|
],
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
slot_t, cap_t, missing = row
|
||||||
|
print(f" Tenants in fct_availability_slot: {slot_t}")
|
||||||
|
print(f" Tenants with capacity match: {cap_t}")
|
||||||
|
print(f" Tenants missing capacity: {missing}")
|
||||||
|
if missing and missing > 0:
|
||||||
|
print(f" *** {missing} tenants dropped by INNER JOIN to dim_venue_capacity ***")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ERROR: {e}")
|
||||||
|
|
||||||
|
con.close()
|
||||||
|
print()
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -540,6 +540,7 @@ def _load_workflows() -> list[dict]:
|
|||||||
"schedule": schedule,
|
"schedule": schedule,
|
||||||
"schedule_label": schedule_label,
|
"schedule_label": schedule_label,
|
||||||
"depends_on": config.get("depends_on", []),
|
"depends_on": config.get("depends_on", []),
|
||||||
|
"description": config.get("description", ""),
|
||||||
})
|
})
|
||||||
return workflows
|
return workflows
|
||||||
|
|
||||||
|
|||||||
@@ -16,8 +16,9 @@
|
|||||||
{% set wf = row.workflow %}
|
{% set wf = row.workflow %}
|
||||||
{% set run = row.run %}
|
{% set run = row.run %}
|
||||||
{% set stale = row.stale %}
|
{% set stale = row.stale %}
|
||||||
<div style="border:1px solid #E2E8F0;border-radius:10px;padding:0.875rem;background:#FAFAFA">
|
{% set is_running = run and run.status == 'running' and not stale %}
|
||||||
<div class="flex items-center gap-2 mb-2">
|
<div style="border:1px solid {% if is_running %}#93C5FD{% else %}#E2E8F0{% endif %};border-radius:10px;padding:0.875rem;background:{% if is_running %}#EFF6FF{% else %}#FAFAFA{% endif %}">
|
||||||
|
<div class="flex items-center gap-2 mb-1">
|
||||||
{% if not run %}
|
{% if not run %}
|
||||||
<span class="status-dot pending"></span>
|
<span class="status-dot pending"></span>
|
||||||
{% elif stale %}
|
{% elif stale %}
|
||||||
@@ -33,6 +34,15 @@
|
|||||||
{% if stale %}
|
{% if stale %}
|
||||||
<span class="badge-warning" style="font-size:10px;padding:1px 6px;margin-left:auto">stale</span>
|
<span class="badge-warning" style="font-size:10px;padding:1px 6px;margin-left:auto">stale</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
{% if is_running %}
|
||||||
|
<span class="btn btn-sm ml-auto"
|
||||||
|
style="padding:2px 8px;font-size:11px;opacity:0.6;cursor:default;pointer-events:none">
|
||||||
|
<svg class="spinner-icon" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3">
|
||||||
|
<path d="M12 2a10 10 0 0 1 10 10" stroke-linecap="round"/>
|
||||||
|
</svg>
|
||||||
|
Running
|
||||||
|
</span>
|
||||||
|
{% else %}
|
||||||
<button type="button"
|
<button type="button"
|
||||||
class="btn btn-sm ml-auto"
|
class="btn btn-sm ml-auto"
|
||||||
style="padding:2px 8px;font-size:11px"
|
style="padding:2px 8px;font-size:11px"
|
||||||
@@ -41,9 +51,17 @@
|
|||||||
hx-swap="outerHTML"
|
hx-swap="outerHTML"
|
||||||
hx-vals='{"extractor": "{{ wf.name }}", "csrf_token": "{{ csrf_token() }}"}'
|
hx-vals='{"extractor": "{{ wf.name }}", "csrf_token": "{{ csrf_token() }}"}'
|
||||||
hx-confirm="Run {{ wf.name }} extractor?">Run</button>
|
hx-confirm="Run {{ wf.name }} extractor?">Run</button>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
{% if wf.description %}
|
||||||
|
<p class="text-xs text-slate" style="margin-top:2px;margin-bottom:4px">{{ wf.description }}</p>
|
||||||
|
{% endif %}
|
||||||
<p class="text-xs text-slate">{{ wf.schedule_label }}</p>
|
<p class="text-xs text-slate">{{ wf.schedule_label }}</p>
|
||||||
{% if run %}
|
{% if is_running %}
|
||||||
|
<p class="text-xs mt-1" style="color:#2563EB">
|
||||||
|
Started {{ run.started_at[:16].replace('T', ' ') if run.started_at else '—' }} — running...
|
||||||
|
</p>
|
||||||
|
{% elif run %}
|
||||||
<p class="text-xs mono text-slate-dark mt-1">{{ run.started_at[:16].replace('T', ' ') if run.started_at else '—' }}</p>
|
<p class="text-xs mono text-slate-dark mt-1">{{ run.started_at[:16].replace('T', ' ') if run.started_at else '—' }}</p>
|
||||||
{% if run.status == 'failed' and run.error_message %}
|
{% if run.status == 'failed' and run.error_message %}
|
||||||
<p class="text-xs text-danger mt-1" style="font-family:monospace;word-break:break-all">
|
<p class="text-xs text-danger mt-1" style="font-family:monospace;word-break:break-all">
|
||||||
|
|||||||
Reference in New Issue
Block a user