From ea86940b78b01389f7e7747e82b1354fb39be346 Mon Sep 17 00:00:00 2001 From: Deeman Date: Sun, 22 Feb 2026 17:50:36 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20copier=20update=20v0.9.0=20=E2=86=92=20?= =?UTF-8?q?v0.10.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulls in template changes: export_serving.py for atomic DuckDB swap, supervisor export step, SQLMesh glob macro, server provisioning script, imprint template, and formatting improvements. Template scaffold SQL models excluded (padelnomics has real models). Web app routes/analytics unchanged (padelnomics-specific customizations). Co-Authored-By: Claude Opus 4.6 --- .claude/CLAUDE.md | 7 ++ .copier-answers.yml | 2 +- extract/padelnomics_extract/README.md | 2 +- infra/setup_server.sh | 51 ++++++++++++ .../supervisor/padelnomics-supervisor.service | 1 + infra/supervisor/supervisor.sh | 13 ++- src/padelnomics/export_serving.py | 79 +++++++++++++++++++ transform/sqlmesh_padelnomics/README.md | 22 +++--- .../sqlmesh_padelnomics/macros/__init__.py | 20 +++++ .../padelnomics/public/templates/imprint.html | 55 +++++++++++++ 10 files changed, 236 insertions(+), 16 deletions(-) create mode 100644 infra/setup_server.sh create mode 100644 src/padelnomics/export_serving.py create mode 100644 transform/sqlmesh_padelnomics/macros/__init__.py create mode 100644 web/src/padelnomics/public/templates/imprint.html diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index b3a614f..d936ee1 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -5,18 +5,22 @@ This file tells Claude Code how to work in this repository. ## Project Overview Padelnomics is a SaaS application built with Quart (async Python), HTMX, and SQLite. + It includes a full data pipeline: ``` External APIs → extract → landing zone → SQLMesh transform → DuckDB → web app ``` + **Packages** (uv workspace): - `web/` — Quart + HTMX web application (auth, billing, dashboard) + - `extract/padelnomics_extract/` — data extraction to local landing zone - `transform/sqlmesh_padelnomics/` — 4-layer SQL transformation (raw → staging → foundation → serving) - `src/padelnomics/` — CLI utilities, export_serving helper + ## Skills: invoke these for domain tasks ### Working on extraction or transformation? @@ -32,6 +36,7 @@ Use the **`data-engineer`** skill for: /data-engineer (or ask Claude to invoke it) ``` + ### Working on the web app UI or frontend? Use the **`frontend-design`** skill for UI components, templates, or dashboard layouts. @@ -66,6 +71,7 @@ uv run sqlmesh -p transform/sqlmesh_padelnomics plan prod # Export serving tables (run after SQLMesh) DUCKDB_PATH=local.duckdb SERVING_DUCKDB_PATH=analytics.duckdb \ uv run python -m padelnomics.export_serving + ``` ## Architecture documentation @@ -96,6 +102,7 @@ analytics.duckdb ← serving tables only, web app read-only | `DUCKDB_PATH` | `local.duckdb` | SQLMesh pipeline DB (exclusive write) | | `SERVING_DUCKDB_PATH` | `analytics.duckdb` | Read-only DB for web app | + ## Coding philosophy - **Simple and procedural** — functions over classes, no "Manager" patterns diff --git a/.copier-answers.yml b/.copier-answers.yml index 9641c2e..4e8a874 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier; NEVER EDIT MANUALLY -_commit: v0.9.0 +_commit: v0.10.0 _src_path: /home/Deeman/Projects/quart_saas_boilerplate author_email: '' author_name: '' diff --git a/extract/padelnomics_extract/README.md b/extract/padelnomics_extract/README.md index 5b36d06..b870176 100644 --- a/extract/padelnomics_extract/README.md +++ b/extract/padelnomics_extract/README.md @@ -83,7 +83,7 @@ State table schema: ``` data/landing/ ├── .state.sqlite # extraction run history -└── padelnomics/ # one subdirectory per source +└── padelnomics/ # one subdirectory per source └── {year}/ └── {month:02d}/ └── {etag}.csv.gz # immutable, content-addressed files diff --git a/infra/setup_server.sh b/infra/setup_server.sh new file mode 100644 index 0000000..cf680f7 --- /dev/null +++ b/infra/setup_server.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# One-time server setup: create app directory and GitLab deploy key. +# Run as root on a fresh server before deploying. +# +# Usage: +# bash infra/setup_server.sh + +set -euo pipefail + +APP_DIR="/opt/padelnomics" +KEY_PATH="$HOME/.ssh/padelnomics_deploy" + +# Create app directory +mkdir -p "$APP_DIR" +echo "Created $APP_DIR" + +# Generate deploy key if not already present +if [ ! -f "$KEY_PATH" ]; then + mkdir -p "$HOME/.ssh" + ssh-keygen -t ed25519 -f "$KEY_PATH" -N "" -C "padelnomics-server" + chmod 700 "$HOME/.ssh" + chmod 600 "$KEY_PATH" + chmod 644 "$KEY_PATH.pub" + + # Configure SSH to use this key for gitlab.com + if ! grep -q "# padelnomics" "$HOME/.ssh/config" 2>/dev/null; then + cat >> "$HOME/.ssh/config" < None: + """Copy all serving.* tables from the pipeline DB to the serving DB atomically.""" + pipeline_path = os.getenv("DUCKDB_PATH", "") + serving_path = os.getenv("SERVING_DUCKDB_PATH", "") + assert pipeline_path, "DUCKDB_PATH must be set" + assert serving_path, "SERVING_DUCKDB_PATH must be set" + assert os.path.exists(pipeline_path), f"Pipeline DB not found: {pipeline_path}" + + # Temp path in the same directory as the serving DB so rename() is atomic + # (rename across filesystems is not atomic on Linux). + tmp_path = os.path.join(os.path.dirname(os.path.abspath(serving_path)), "_export.duckdb") + + src = duckdb.connect(pipeline_path, read_only=True) + try: + tables = src.sql( + "SELECT table_name FROM information_schema.tables" + " WHERE table_schema = 'serving' ORDER BY table_name" + ).fetchall() + assert tables, f"No tables found in serving schema of {pipeline_path}" + logger.info(f"Exporting {len(tables)} serving tables: {[t[0] for t in tables]}") + + dst = duckdb.connect(tmp_path) + try: + dst.execute("CREATE SCHEMA IF NOT EXISTS serving") + for (table,) in tables: + # Read via Arrow to avoid cross-connection catalog ambiguity. + arrow_data = src.sql(f"SELECT * FROM serving.{table}").arrow() + dst.register("_src", arrow_data) + dst.execute(f"CREATE OR REPLACE TABLE serving.{table} AS SELECT * FROM _src") + dst.unregister("_src") + row_count = dst.sql(f"SELECT count(*) FROM serving.{table}").fetchone()[0] + logger.info(f" serving.{table}: {row_count:,} rows") + finally: + dst.close() + finally: + src.close() + + # Atomic rename — on Linux, rename() is atomic when src and dst are on the same filesystem. + os.rename(tmp_path, serving_path) + logger.info(f"Serving DB atomically updated: {serving_path}") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(name)s %(levelname)s %(message)s") + export_serving() diff --git a/transform/sqlmesh_padelnomics/README.md b/transform/sqlmesh_padelnomics/README.md index 3fb4d2d..9f83f58 100644 --- a/transform/sqlmesh_padelnomics/README.md +++ b/transform/sqlmesh_padelnomics/README.md @@ -21,21 +21,21 @@ uv run sqlmesh -p transform/sqlmesh_padelnomics format ## 4-layer architecture ``` -landing/ <- raw files (extraction output) - +-- padelnomics/ - +-- {year}/{etag}.csv.gz +landing/ ← raw files (extraction output) + └── padelnomics/ + └── {year}/{etag}.csv.gz -raw/ <- reads files verbatim - +-- raw.padelnomics +raw/ ← reads files verbatim + └── raw.padelnomics -staging/ <- type casting, deduplication - +-- staging.stg_padelnomics +staging/ ← type casting, deduplication + └── staging.stg_padelnomics -foundation/ <- business logic, dimensions, facts - +-- foundation.dim_category +foundation/ ← business logic, dimensions, facts + └── foundation.dim_category -serving/ <- pre-aggregated for web app - +-- serving.padelnomics_metrics +serving/ ← pre-aggregated for web app + └── serving.padelnomics_metrics ``` ### raw/ — verbatim source reads diff --git a/transform/sqlmesh_padelnomics/macros/__init__.py b/transform/sqlmesh_padelnomics/macros/__init__.py new file mode 100644 index 0000000..b4b675c --- /dev/null +++ b/transform/sqlmesh_padelnomics/macros/__init__.py @@ -0,0 +1,20 @@ +import os + +from sqlmesh import macro + + +@macro() +def padelnomics_glob(evaluator) -> str: + """Return a quoted glob path for all padelnomics CSV gz files under LANDING_DIR. + + Used in raw models: SELECT * FROM read_csv(@padelnomics_glob(), ...) + + The LANDING_DIR variable is read from the SQLMesh config variables block first, + then falls back to the LANDING_DIR environment variable, then to 'data/landing'. + """ + landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing") + return f"'{landing_dir}/padelnomics/**/*.csv.gz'" + + +# Add one macro per landing zone subdirectory you create. +# Pattern: def {source}_glob(evaluator) → f"'{landing_dir}/{source}/**/*.csv.gz'" diff --git a/web/src/padelnomics/public/templates/imprint.html b/web/src/padelnomics/public/templates/imprint.html new file mode 100644 index 0000000..e3578f4 --- /dev/null +++ b/web/src/padelnomics/public/templates/imprint.html @@ -0,0 +1,55 @@ +{% extends "base.html" %} + +{% block title %}Imprint — {{ config.APP_NAME }}{% endblock %} + +{% block head %} + + +{% endblock %} + +{% block content %} +
+
+

Imprint

+

Legal disclosure pursuant to §5 DDG (Digitale-Dienste-Gesetz)

+ +
+ +
+

Service Provider

+

+
+ +

+
+ +
+

Contact

+

Email: {{ config.EMAIL_FROM }}

+
+ +
+

VAT

+ +

Small business owner pursuant to §19 UStG (Umsatzsteuergesetz). VAT is not charged and no VAT identification number is issued.

+
+ +
+

Responsible for Content

+

+ +

+
+ +
+

Disclaimer

+

Despite careful content control we assume no liability for the content of external links. The operators of linked pages are solely responsible for their content.

+
+ +
+
+
+{% endblock %}