refactor: rename materia → beanflows throughout codebase
Some checks failed
CI / test-cli (push) Failing after 5s
CI / test-sqlmesh (push) Failing after 4s
CI / test-web (push) Failing after 5s
CI / tag (push) Has been skipped

- Rename src/materia/ → src/beanflows/ (Python package)
- Rename transform/sqlmesh_materia/ → transform/sqlmesh_beanflows/
- Rename infra/supervisor/materia-supervisor.service → beanflows-supervisor.service
- Rename infra/backup/materia-backup.{service,timer} → beanflows-backup.{service,timer}
- Update all path strings: /opt/materia → /opt/beanflows, /data/materia → /data/beanflows
- Update pyproject.toml: project name, CLI entrypoint, workspace source key
- Update all internal imports from materia.* → beanflows.*
- Update infra scripts: REPO_DIR, service names, systemctl references
- Fix docker-compose.prod.yml: /data/materia → /data/beanflows (bind mount path)

Intentionally left unchanged: Pulumi stack name (materia-infrastructure) and
Hetzner resource names ("materia-key", "managed_by: materia") — these reference
live cloud infrastructure and require separate cloud-side renames.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-28 23:00:52 +01:00
parent 9ea4f09600
commit d14990bb01
58 changed files with 128 additions and 93 deletions

View File

@@ -33,10 +33,10 @@ services:
env_file: ./.env
environment:
- DATABASE_PATH=/app/data/app.db
- SERVING_DUCKDB_PATH=/data/materia/analytics.duckdb
- SERVING_DUCKDB_PATH=/data/beanflows/analytics.duckdb
volumes:
- app-data:/app/data
- /data/materia/analytics.duckdb:/data/materia/analytics.duckdb:ro
- /data/beanflows:/data/beanflows:ro
networks:
- net
healthcheck:
@@ -84,10 +84,10 @@ services:
env_file: ./.env
environment:
- DATABASE_PATH=/app/data/app.db
- SERVING_DUCKDB_PATH=/data/materia/analytics.duckdb
- SERVING_DUCKDB_PATH=/data/beanflows/analytics.duckdb
volumes:
- app-data:/app/data
- /data/materia/analytics.duckdb:/data/materia/analytics.duckdb:ro
- /data/beanflows:/data/beanflows:ro
networks:
- net
healthcheck:

View File

@@ -0,0 +1,9 @@
[Unit]
Description=Beanflows Landing Data Backup to R2
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/bin/rclone sync /data/beanflows/landing/ r2:backup/beanflows/landing/ --log-level INFO
TimeoutStartSec=1800

View File

@@ -1,5 +1,5 @@
[Unit]
Description=Materia Landing Data Backup Timer
Description=Beanflows Landing Data Backup Timer
[Timer]
OnCalendar=*-*-* 00/6:00:00

View File

@@ -1,9 +0,0 @@
[Unit]
Description=Materia Landing Data Backup to R2
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/bin/rclone sync /data/materia/landing/ r2:backup/materia/landing/ --log-level INFO
TimeoutStartSec=1800

View File

@@ -14,7 +14,7 @@
set -euo pipefail
SERVICE_USER="beanflows_service"
REPO_DIR="/opt/materia"
REPO_DIR="/opt/beanflows"
GITEA_REPO="ssh://git@git.padelnomics.io:2222/deemanone/beanflows.git"
UV="/home/${SERVICE_USER}/.local/bin/uv"
@@ -57,9 +57,9 @@ sudo -u "${SERVICE_USER}" bash -c "cd ${REPO_DIR} && ${UV} sync --all-packages"
# ── Systemd supervisor service ────────────────────────────────────────────────
cp "${REPO_DIR}/infra/supervisor/materia-supervisor.service" /etc/systemd/system/
cp "${REPO_DIR}/infra/supervisor/beanflows-supervisor.service" /etc/systemd/system/
systemctl daemon-reload
systemctl enable --now materia-supervisor
systemctl enable --now beanflows-supervisor
# ── R2 backup timer (optional) ────────────────────────────────────────────────
# Enabled only when R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY, and R2_ENDPOINT
@@ -96,7 +96,7 @@ EOF
chmod 600 "${RCLONE_CONF}"
UNITS_CHANGED=0
for unit in materia-backup.service materia-backup.timer; do
for unit in beanflows-backup.service beanflows-backup.timer; do
if ! diff -q "${REPO_DIR}/infra/backup/${unit}" "/etc/systemd/system/${unit}" >/dev/null 2>&1; then
cp "${REPO_DIR}/infra/backup/${unit}" /etc/systemd/system/
UNITS_CHANGED=1
@@ -104,7 +104,7 @@ EOF
done
[ "${UNITS_CHANGED}" = "1" ] && systemctl daemon-reload
systemctl enable --now materia-backup.timer
systemctl enable --now beanflows-backup.timer
echo "$(date '+%H:%M:%S') ==> R2 backup timer enabled."
else
echo "$(date '+%H:%M:%S') ==> R2_ACCESS_KEY_ID / R2_SECRET_ACCESS_KEY / R2_ENDPOINT not set — skipping backup timer."
@@ -113,8 +113,8 @@ fi
echo ""
echo "=== Bootstrap complete! ==="
echo ""
echo "Check status: systemctl status materia-supervisor"
echo "View logs: journalctl -u materia-supervisor -f"
echo "Workflow status: sudo -u ${SERVICE_USER} ${UV} run -p ${REPO_DIR} python src/materia/supervisor.py status"
echo "Backup timer: systemctl list-timers materia-backup.timer"
echo "Check status: systemctl status beanflows-supervisor"
echo "View logs: journalctl -u beanflows-supervisor -f"
echo "Workflow status: sudo -u ${SERVICE_USER} ${UV} run -p ${REPO_DIR} python src/beanflows/supervisor.py status"
echo "Backup timer: systemctl list-timers beanflows-backup.timer"
echo "Tag: $(sudo -u "${SERVICE_USER}" git -C "${REPO_DIR}" describe --tags --always)"

View File

@@ -8,7 +8,7 @@
#
# What it does:
# 1. Creates beanflows_service user (nologin) + adds to docker group
# 2. Creates /opt/materia + /data/materia/landing with correct ownership
# 2. Creates /opt/beanflows + /data/beanflows/landing with correct ownership
# 3. Installs git, curl, age, sops, rclone, uv
# 4. Generates ed25519 SSH deploy key for GitLab read access
# 5. Generates age keypair at ~/.config/sops/age/keys.txt (as service user)
@@ -17,10 +17,10 @@
set -euo pipefail
SERVICE_USER="beanflows_service"
APP_DIR="/opt/materia"
DATA_DIR="/data/materia"
APP_DIR="/opt/beanflows"
DATA_DIR="/data/beanflows"
SSH_DIR="/home/${SERVICE_USER}/.ssh"
DEPLOY_KEY="${SSH_DIR}/materia_deploy"
DEPLOY_KEY="${SSH_DIR}/beanflows_deploy"
SOPS_AGE_DIR="/home/${SERVICE_USER}/.config/sops/age"
ROTATE_KEYS="${ROTATE_KEYS:-}"
@@ -63,7 +63,7 @@ fi
if [ ! -f "${DEPLOY_KEY}" ]; then
sudo -u "${SERVICE_USER}" ssh-keygen -t ed25519 \
-f "${DEPLOY_KEY}" -N "" -C "materia-deploy"
-f "${DEPLOY_KEY}" -N "" -C "beanflows-deploy"
fi
if [ ! -f "${SSH_DIR}/config" ]; then

View File

@@ -0,0 +1,29 @@
[Unit]
Description=Beanflows Supervisor - Pipeline Orchestration
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=beanflows_service
WorkingDirectory=/opt/beanflows
ExecStart=/bin/sh -c 'exec uv run python src/beanflows/supervisor.py'
Restart=always
RestartSec=10
EnvironmentFile=/opt/beanflows/.env
Environment=PATH=/home/beanflows_service/.local/bin:/usr/local/bin:/usr/bin:/bin
Environment=LANDING_DIR=/data/beanflows/landing
Environment=DUCKDB_PATH=/data/beanflows/lakehouse.duckdb
Environment=SERVING_DUCKDB_PATH=/data/beanflows/analytics.duckdb
Environment=SUPERVISOR_GIT_PULL=1
# Resource limits
LimitNOFILE=65536
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=beanflows-supervisor
[Install]
WantedBy=multi-user.target

View File

@@ -1,29 +0,0 @@
[Unit]
Description=Materia Supervisor - Pipeline Orchestration
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=beanflows_service
WorkingDirectory=/opt/materia
ExecStart=/bin/sh -c 'exec uv run python src/materia/supervisor.py'
Restart=always
RestartSec=10
EnvironmentFile=/opt/materia/.env
Environment=PATH=/home/beanflows_service/.local/bin:/usr/local/bin:/usr/bin:/bin
Environment=LANDING_DIR=/data/materia/landing
Environment=DUCKDB_PATH=/data/materia/lakehouse.duckdb
Environment=SERVING_DUCKDB_PATH=/data/materia/analytics.duckdb
Environment=SUPERVISOR_GIT_PULL=1
# Resource limits
LimitNOFILE=65536
# Logging
StandardOutput=journal
StandardError=journal
SyslogIdentifier=materia-supervisor
[Install]
WantedBy=multi-user.target

View File

@@ -1,5 +1,5 @@
[project]
name = "materia"
name = "beanflows"
version = "0.1.0"
description = "Add your description here"
readme = "readme.md"
@@ -20,7 +20,7 @@ dependencies = [
]
[project.scripts]
materia = "materia.cli:app"
beanflows = "beanflows.cli:app"
[dependency-groups]
@@ -43,7 +43,7 @@ dev = [
[tool.uv.sources]
extract_core = {workspace = true }
psdonline = {workspace = true }
sqlmesh_materia = {workspace = true }
sqlmesh_beanflows = {workspace = true }
cftc_cot = {workspace = true }
coffee_prices = {workspace = true }
ice_stocks = {workspace = true }

View File

@@ -0,0 +1,2 @@
def main() -> None:
print("Hello from beanflows!")

View File

@@ -5,7 +5,7 @@ from typing import Annotated
import typer
app = typer.Typer(
name="materia",
name="beanflows",
help="BeanFlows.coffee data platform management CLI",
no_args_is_help=True,
)
@@ -26,7 +26,7 @@ def worker_list(
provider: Annotated[str, typer.Option("--provider", "-p")] = "hetzner",
):
"""List all active worker instances."""
from materia.workers import list_workers
from beanflows.workers import list_workers
workers = list_workers(provider)
if not workers:
@@ -47,7 +47,7 @@ def worker_create(
location: Annotated[str | None, typer.Option("--location", "-l")] = None,
):
"""Create a new worker instance."""
from materia.workers import create_worker
from beanflows.workers import create_worker
typer.echo(f"Creating worker '{name}' ({server_type}) on {provider}...")
worker = create_worker(name, server_type, provider, location)
@@ -61,7 +61,7 @@ def worker_destroy(
force: Annotated[bool, typer.Option("--force", "-f")] = False,
):
"""Destroy a worker instance."""
from materia.workers import destroy_worker
from beanflows.workers import destroy_worker
if not force:
confirm = typer.confirm(f"Destroy worker '{name}'?")
@@ -82,7 +82,7 @@ def pipeline_run(
name: Annotated[str, typer.Argument(help="Pipeline name (extract, transform)")],
):
"""Run a pipeline locally."""
from materia.pipelines import run_pipeline
from beanflows.pipelines import run_pipeline
typer.echo(f"Running pipeline '{name}'...")
result = run_pipeline(name)
@@ -98,7 +98,7 @@ def pipeline_run(
@pipeline_app.command("list")
def pipeline_list():
"""List available pipelines."""
from materia.pipelines import PIPELINES
from beanflows.pipelines import PIPELINES
typer.echo("Available pipelines:")
for name, config in PIPELINES.items():
@@ -113,7 +113,7 @@ app.add_typer(secrets_app, name="secrets")
@secrets_app.command("list")
def secrets_list():
"""List available secrets (keys only)."""
from materia.secrets import list_secrets
from beanflows.secrets import list_secrets
secrets = list_secrets()
if not secrets:
@@ -130,7 +130,7 @@ def secrets_get(
key: Annotated[str, typer.Argument(help="Secret key")],
):
"""Get a secret value."""
from materia.secrets import get_secret
from beanflows.secrets import get_secret
value = get_secret(key)
if value is None:
@@ -143,7 +143,7 @@ def secrets_get(
@secrets_app.command("test")
def secrets_test():
"""Test sops decryption (verifies sops is installed and age key is present)."""
from materia.secrets import test_connection
from beanflows.secrets import test_connection
typer.echo("Testing SOPS decryption...")
if test_connection():

View File

@@ -11,7 +11,7 @@ reopens the connection automatically — no restart or signal required.
Usage:
DUCKDB_PATH=lakehouse.duckdb SERVING_DUCKDB_PATH=serving.duckdb \
uv run materia pipeline run export_serving
uv run beanflows pipeline run export_serving
"""
import logging
import os

View File

@@ -57,7 +57,7 @@ PIPELINES = {
"timeout_seconds": 6600,
},
"transform": {
"command": ["uv", "run", "--package", "sqlmesh_materia", "sqlmesh", "-p", "transform/sqlmesh_materia", "plan", "prod", "--no-prompts", "--auto-apply"],
"command": ["uv", "run", "--package", "sqlmesh_beanflows", "sqlmesh", "-p", "transform/sqlmesh_beanflows", "plan", "prod", "--no-prompts", "--auto-apply"],
"timeout_seconds": 3600,
},
# Copies serving.* tables from lakehouse.duckdb → serving.duckdb (atomic swap).
@@ -65,7 +65,7 @@ PIPELINES = {
"export_serving": {
"command": ["uv", "run", "python", "-c",
"import logging; logging.basicConfig(level=logging.INFO); "
"from materia.export_serving import export_serving; export_serving()"],
"from beanflows.export_serving import export_serving; export_serving()"],
"timeout_seconds": 300,
},
}

View File

@@ -15,7 +15,7 @@ class Instance:
def get_provider(provider_name: str):
if provider_name == "hetzner":
from materia.providers import hetzner
from beanflows.providers import hetzner
return hetzner
else:

View File

@@ -7,8 +7,8 @@ from hcloud import Client
from hcloud.images import Image
from hcloud.server_types import ServerType
from materia.providers import Instance
from materia.secrets import get_secret
from beanflows.providers import Instance
from beanflows.secrets import get_secret
def _get_client() -> Client:

View File

@@ -11,10 +11,10 @@ the supervisor is effectively unkillable.
Usage:
# Run the supervisor loop (production)
LANDING_DIR=data/landing uv run python src/materia/supervisor.py
LANDING_DIR=data/landing uv run python src/beanflows/supervisor.py
# Show workflow status
LANDING_DIR=data/landing uv run python src/materia/supervisor.py status
LANDING_DIR=data/landing uv run python src/beanflows/supervisor.py status
"""
import importlib
@@ -38,7 +38,7 @@ from croniter import croniter
TICK_INTERVAL_SECONDS = 60
BACKOFF_SECONDS = 600 # 10 min on tick failure
SUBPROCESS_TIMEOUT_SECONDS = 14400 # 4 hours max per subprocess
REPO_DIR = Path(os.getenv("REPO_DIR", "/opt/materia"))
REPO_DIR = Path(os.getenv("REPO_DIR", "/opt/beanflows"))
LANDING_DIR = Path(os.getenv("LANDING_DIR", "data/landing"))
DUCKDB_PATH = os.getenv("DUCKDB_PATH", "data/lakehouse.duckdb")
SERVING_DUCKDB_PATH = os.getenv("SERVING_DUCKDB_PATH", "analytics.duckdb")
@@ -58,7 +58,7 @@ logging.basicConfig(
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger("materia.supervisor")
logger = logging.getLogger("beanflows.supervisor")
# ---------------------------------------------------------------------------
@@ -242,7 +242,7 @@ def run_shell(cmd: str, timeout_seconds: int = SUBPROCESS_TIMEOUT_SECONDS) -> tu
def run_transform() -> None:
"""Run SQLMesh — evaluates model staleness internally."""
logger.info("Running SQLMesh transform")
ok, err = run_shell("uv run sqlmesh -p transform/sqlmesh_materia plan prod --auto-apply")
ok, err = run_shell("uv run sqlmesh -p transform/sqlmesh_beanflows plan prod --auto-apply")
if not ok:
send_alert(f"[transform] {err}")
@@ -252,28 +252,63 @@ def run_export() -> None:
logger.info("Exporting serving tables")
ok, err = run_shell(
f"DUCKDB_PATH={DUCKDB_PATH} SERVING_DUCKDB_PATH={SERVING_DUCKDB_PATH} "
f"uv run materia pipeline run export_serving"
f"uv run beanflows pipeline run export_serving"
)
if not ok:
send_alert(f"[export] {err}")
_last_seen_head: str | None = None
def web_code_changed() -> bool:
"""Check if web app code changed since last deploy."""
"""True on the first tick after a commit that changed web app code.
Compares the current HEAD to the HEAD from the previous tick. On first call
after process start, falls back to HEAD~1 so the just-deployed commit is
evaluated exactly once. Records HEAD before returning so the same commit
never triggers twice.
"""
global _last_seen_head
result = subprocess.run(
["git", "diff", "--name-only", "HEAD~1", "HEAD", "--", "web/", "Dockerfile"],
["git", "rev-parse", "HEAD"], capture_output=True, text=True, timeout=10,
)
if result.returncode != 0:
return False
current_head = result.stdout.strip()
if _last_seen_head is None:
base_result = subprocess.run(
["git", "rev-parse", "HEAD~1"], capture_output=True, text=True, timeout=10,
)
base = base_result.stdout.strip() if base_result.returncode == 0 else current_head
else:
base = _last_seen_head
_last_seen_head = current_head # advance now — won't fire again for this HEAD
if base == current_head:
return False
diff = subprocess.run(
["git", "diff", "--name-only", base, current_head, "--", "web/", "Dockerfile"],
capture_output=True, text=True, timeout=30,
)
return bool(result.stdout.strip())
return bool(diff.stdout.strip())
def current_deployed_tag() -> str | None:
"""Return the tag currently checked out, or None if not on a tag."""
"""Return the highest-version tag pointing at HEAD, or None.
Uses --points-at HEAD so multiple tags on the same commit (e.g. a CI
integer tag and a date-based tag) are handled correctly.
"""
result = subprocess.run(
["git", "describe", "--tags", "--exact-match", "HEAD"],
["git", "tag", "--list", "--sort=-version:refname", "--points-at", "HEAD", "v*"],
capture_output=True, text=True, timeout=10,
)
return result.stdout.strip() or None
tags = result.stdout.strip().splitlines()
return tags[0] if tags else None
def latest_remote_tag() -> str | None:

View File

@@ -1,7 +1,7 @@
"""Worker instance management."""
from materia.providers import Instance, get_provider
from materia.secrets import get_secret
from beanflows.providers import Instance, get_provider
from beanflows.secrets import get_secret
DEFAULT_PROVIDER = "hetzner"

View File

@@ -1,2 +0,0 @@
def main() -> None:
print("Hello from materia!")

View File

@@ -1,5 +1,5 @@
[project]
name = "sqlmesh_materia"
name = "sqlmesh_beanflows"
version = "0.1.0"
description = "Add your description here"
authors = [
@@ -16,4 +16,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["sqlmesh_materia"]
packages = ["sqlmesh_beanflows"]