Compare commits
7 Commits
v202603061
...
v202603062
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fee0d6913b | ||
|
|
71e08a5fa6 | ||
|
|
27e86db6a1 | ||
|
|
90754b8d9f | ||
|
|
277c92e507 | ||
|
|
77ec3a289f | ||
|
|
f81d5f19da |
@@ -33,10 +33,10 @@ do
|
|||||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
|
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
|
||||||
uv run --package padelnomics_extract extract
|
uv run --package padelnomics_extract extract
|
||||||
|
|
||||||
# Transform — run evaluates missing daily intervals for incremental models.
|
# Transform — plan detects new/modified/deleted models and applies changes.
|
||||||
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \
|
LANDING_DIR="${LANDING_DIR:-/data/padelnomics/landing}" \
|
||||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
|
DUCKDB_PATH="${DUCKDB_PATH:-/data/padelnomics/lakehouse.duckdb}" \
|
||||||
uv run sqlmesh -p transform/sqlmesh_padelnomics run prod
|
uv run sqlmesh -p transform/sqlmesh_padelnomics plan prod --auto-apply
|
||||||
|
|
||||||
# Export serving tables to analytics.duckdb (atomic swap).
|
# Export serving tables to analytics.duckdb (atomic swap).
|
||||||
# The web app detects the inode change on next query — no restart needed.
|
# The web app detects the inode change on next query — no restart needed.
|
||||||
|
|||||||
@@ -247,10 +247,10 @@ def run_shell(cmd: str, timeout_seconds: int = SUBPROCESS_TIMEOUT_SECONDS) -> tu
|
|||||||
|
|
||||||
|
|
||||||
def run_transform() -> None:
|
def run_transform() -> None:
|
||||||
"""Run SQLMesh — evaluates missing daily intervals."""
|
"""Run SQLMesh — detects new/modified/deleted models and applies changes."""
|
||||||
logger.info("Running SQLMesh transform")
|
logger.info("Running SQLMesh transform")
|
||||||
ok, err = run_shell(
|
ok, err = run_shell(
|
||||||
"uv run sqlmesh -p transform/sqlmesh_padelnomics run prod",
|
"uv run sqlmesh -p transform/sqlmesh_padelnomics plan prod --auto-apply",
|
||||||
)
|
)
|
||||||
if not ok:
|
if not ok:
|
||||||
send_alert(f"[transform] {err}")
|
send_alert(f"[transform] {err}")
|
||||||
|
|||||||
@@ -215,7 +215,7 @@ SELECT
|
|||||||
l.location_slug,
|
l.location_slug,
|
||||||
l.lat,
|
l.lat,
|
||||||
l.lon,
|
l.lon,
|
||||||
h3_latlng_to_cell(l.lat, l.lon, 4) AS h3_cell_res4,
|
h3_latlng_to_cell(l.lat, l.lon, 5) AS h3_cell_res5,
|
||||||
l.admin1_code,
|
l.admin1_code,
|
||||||
l.admin2_code,
|
l.admin2_code,
|
||||||
l.population,
|
l.population,
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
-- "Where should I build a padel court?"
|
-- "Where should I build a padel court?"
|
||||||
-- Computed for ALL locations — zero-court locations score highest on supply gap.
|
-- Computed for ALL locations — zero-court locations score highest on supply gap.
|
||||||
-- H3 catchment methodology: addressable market and supply gap use a regional
|
-- H3 catchment methodology: addressable market and supply gap use a regional
|
||||||
-- H3 catchment (res-4 cell + 6 neighbours, ~462km², ~15-18km radius).
|
-- H3 catchment (res-5 cell + 6 neighbours, ~24km radius).
|
||||||
--
|
--
|
||||||
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
|
-- 25 pts addressable market — log-scaled catchment population, ceiling 500K
|
||||||
-- 20 pts economic power — income PPS, normalised to 35,000
|
-- 20 pts economic power — income PPS, normalised to 35,000
|
||||||
@@ -63,30 +63,30 @@ base AS (
|
|||||||
l.padel_venues_per_100k,
|
l.padel_venues_per_100k,
|
||||||
l.nearest_padel_court_km,
|
l.nearest_padel_court_km,
|
||||||
l.tennis_courts_within_25km,
|
l.tennis_courts_within_25km,
|
||||||
l.h3_cell_res4
|
l.h3_cell_res5
|
||||||
FROM foundation.dim_locations l
|
FROM foundation.dim_locations l
|
||||||
),
|
),
|
||||||
-- Aggregate population and court counts per H3 cell (res 4, ~10km edge).
|
-- Aggregate population and court counts per H3 cell (res 5, ~8.5km edge).
|
||||||
-- Grouping by cell first (~30-50K distinct cells vs 140K locations) keeps the
|
-- Grouping by cell first (~50-80K distinct cells vs 140K locations) keeps the
|
||||||
-- subsequent lateral join small.
|
-- subsequent lateral join small.
|
||||||
hex_stats AS (
|
hex_stats AS (
|
||||||
SELECT
|
SELECT
|
||||||
h3_cell_res4,
|
h3_cell_res5,
|
||||||
SUM(population) AS hex_population,
|
SUM(population) AS hex_population,
|
||||||
SUM(padel_venue_count) AS hex_padel_courts
|
SUM(padel_venue_count) AS hex_padel_courts
|
||||||
FROM foundation.dim_locations
|
FROM foundation.dim_locations
|
||||||
GROUP BY h3_cell_res4
|
GROUP BY h3_cell_res5
|
||||||
),
|
),
|
||||||
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
|
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
|
||||||
-- Effective catchment: ~462km², ~15-18km radius — realistic driving distance.
|
-- Effective catchment: ~24km radius — realistic driving distance.
|
||||||
catchment AS (
|
catchment AS (
|
||||||
SELECT
|
SELECT
|
||||||
l.geoname_id,
|
l.geoname_id,
|
||||||
SUM(hs.hex_population) AS catchment_population,
|
SUM(hs.hex_population) AS catchment_population,
|
||||||
SUM(hs.hex_padel_courts) AS catchment_padel_courts
|
SUM(hs.hex_padel_courts) AS catchment_padel_courts
|
||||||
FROM base l,
|
FROM base l,
|
||||||
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res4, 1)) AS cell) ring
|
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res5, 1)) AS cell) ring
|
||||||
JOIN hex_stats hs ON hs.h3_cell_res4 = ring.cell
|
JOIN hex_stats hs ON hs.h3_cell_res5 = ring.cell
|
||||||
GROUP BY l.geoname_id
|
GROUP BY l.geoname_id
|
||||||
),
|
),
|
||||||
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
|
-- Match dim_cities via (country_code, geoname_id) to get city_slug + exact venue count.
|
||||||
|
|||||||
5
uv.lock
generated
5
uv.lock
generated
@@ -150,6 +150,11 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/84/85/57c314a6b35336efbbdc13e5fc9ae13f6b60a0647cfa7c1221178ac6d8ae/brotlicffi-1.2.0.0.tar.gz", hash = "sha256:34345d8d1f9d534fcac2249e57a4c3c8801a33c9942ff9f8574f67a175e17adb", size = 476682, upload-time = "2025-11-21T18:17:57.334Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/84/85/57c314a6b35336efbbdc13e5fc9ae13f6b60a0647cfa7c1221178ac6d8ae/brotlicffi-1.2.0.0.tar.gz", hash = "sha256:34345d8d1f9d534fcac2249e57a4c3c8801a33c9942ff9f8574f67a175e17adb", size = 476682, upload-time = "2025-11-21T18:17:57.334Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7c/87/ba6298c3d7f8d66ce80d7a487f2a487ebae74a79c6049c7c2990178ce529/brotlicffi-1.2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b13fb476a96f02e477a506423cb5e7bc21e0e3ac4c060c20ba31c44056e38c68", size = 433038, upload-time = "2026-03-05T17:57:37.96Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/00/49/16c7a77d1cae0519953ef0389a11a9c2e2e62e87d04f8e7afbae40124255/brotlicffi-1.2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17db36fb581f7b951635cd6849553a95c6f2f53c1a707817d06eae5aeff5f6af", size = 1541124, upload-time = "2026-03-05T17:57:39.488Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e8/17/fab2c36ea820e2288f8c1bf562de1b6cd9f30e28d66f1ce2929a4baff6de/brotlicffi-1.2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:40190192790489a7b054312163d0ce82b07d1b6e706251036898ce1684ef12e9", size = 1541983, upload-time = "2026-03-05T17:57:41.061Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/78/c9/849a669b3b3bb8ac96005cdef04df4db658c33443a7fc704a6d4a2f07a56/brotlicffi-1.2.0.0-cp314-cp314t-win32.whl", hash = "sha256:a8079e8ecc32ecef728036a1d9b7105991ce6a5385cf51ee8c02297c90fb08c2", size = 349046, upload-time = "2026-03-05T17:57:42.76Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/a4/25/09c0fd21cfc451fa38ad538f4d18d8be566746531f7f27143f63f8c45a9f/brotlicffi-1.2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:ca90c4266704ca0a94de8f101b4ec029624273380574e4cf19301acfa46c61a0", size = 385653, upload-time = "2026-03-05T17:57:44.224Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e4/df/a72b284d8c7bef0ed5756b41c2eb7d0219a1dd6ac6762f1c7bdbc31ef3af/brotlicffi-1.2.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9458d08a7ccde8e3c0afedbf2c70a8263227a68dea5ab13590593f4c0a4fd5f4", size = 432340, upload-time = "2025-11-21T18:17:42.277Z" },
|
{ url = "https://files.pythonhosted.org/packages/e4/df/a72b284d8c7bef0ed5756b41c2eb7d0219a1dd6ac6762f1c7bdbc31ef3af/brotlicffi-1.2.0.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:9458d08a7ccde8e3c0afedbf2c70a8263227a68dea5ab13590593f4c0a4fd5f4", size = 432340, upload-time = "2025-11-21T18:17:42.277Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/74/2b/cc55a2d1d6fb4f5d458fba44a3d3f91fb4320aa14145799fd3a996af0686/brotlicffi-1.2.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:84e3d0020cf1bd8b8131f4a07819edee9f283721566fe044a20ec792ca8fd8b7", size = 1534002, upload-time = "2025-11-21T18:17:43.746Z" },
|
{ url = "https://files.pythonhosted.org/packages/74/2b/cc55a2d1d6fb4f5d458fba44a3d3f91fb4320aa14145799fd3a996af0686/brotlicffi-1.2.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:84e3d0020cf1bd8b8131f4a07819edee9f283721566fe044a20ec792ca8fd8b7", size = 1534002, upload-time = "2025-11-21T18:17:43.746Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e4/9c/d51486bf366fc7d6735f0e46b5b96ca58dc005b250263525a1eea3cd5d21/brotlicffi-1.2.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33cfb408d0cff64cd50bef268c0fed397c46fbb53944aa37264148614a62e990", size = 1536547, upload-time = "2025-11-21T18:17:45.729Z" },
|
{ url = "https://files.pythonhosted.org/packages/e4/9c/d51486bf366fc7d6735f0e46b5b96ca58dc005b250263525a1eea3cd5d21/brotlicffi-1.2.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:33cfb408d0cff64cd50bef268c0fed397c46fbb53944aa37264148614a62e990", size = 1536547, upload-time = "2025-11-21T18:17:45.729Z" },
|
||||||
|
|||||||
@@ -737,9 +737,9 @@ async def handle_run_extraction(payload: dict) -> None:
|
|||||||
|
|
||||||
@task("run_transform")
|
@task("run_transform")
|
||||||
async def handle_run_transform(payload: dict) -> None:
|
async def handle_run_transform(payload: dict) -> None:
|
||||||
"""Run SQLMesh transform (prod run) in the background.
|
"""Run SQLMesh transform (prod plan + apply) in the background.
|
||||||
|
|
||||||
Shells out to `uv run sqlmesh -p transform/sqlmesh_padelnomics run prod`.
|
Shells out to `uv run sqlmesh -p transform/sqlmesh_padelnomics plan prod --auto-apply`.
|
||||||
2-hour absolute timeout — same as extraction.
|
2-hour absolute timeout — same as extraction.
|
||||||
"""
|
"""
|
||||||
import subprocess
|
import subprocess
|
||||||
@@ -748,7 +748,7 @@ async def handle_run_transform(payload: dict) -> None:
|
|||||||
repo_root = Path(__file__).resolve().parents[4]
|
repo_root = Path(__file__).resolve().parents[4]
|
||||||
result = await asyncio.to_thread(
|
result = await asyncio.to_thread(
|
||||||
subprocess.run,
|
subprocess.run,
|
||||||
["uv", "run", "sqlmesh", "-p", "transform/sqlmesh_padelnomics", "run", "prod"],
|
["uv", "run", "sqlmesh", "-p", "transform/sqlmesh_padelnomics", "plan", "prod", "--auto-apply"],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=7200,
|
timeout=7200,
|
||||||
@@ -803,7 +803,7 @@ async def handle_run_pipeline(payload: dict) -> None:
|
|||||||
),
|
),
|
||||||
(
|
(
|
||||||
"transform",
|
"transform",
|
||||||
["uv", "run", "sqlmesh", "-p", "transform/sqlmesh_padelnomics", "run", "prod"],
|
["uv", "run", "sqlmesh", "-p", "transform/sqlmesh_padelnomics", "plan", "prod", "--auto-apply"],
|
||||||
7200,
|
7200,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
|
|||||||
Reference in New Issue
Block a user