Refactor to local-first architecture on Hetzner NVMe

Remove distributed R2/Iceberg/SSH pipeline architecture in favor of
local subprocess execution with NVMe storage. Landing data backed up
to R2 via rclone timer.

- Strip Iceberg catalog, httpfs, boto3, paramiko, prefect, pyarrow
- Pipelines run via subprocess.run() with bounded timeouts
- Extract writes to {LANDING_DIR}/psd/{year}/{month}/{etag}.csv.gzip
- SQLMesh reads LANDING_DIR variable, writes to DUCKDB_PATH
- Delete unused provider stubs (ovh, scaleway, oracle)
- Add rclone systemd timer for R2 backup every 6h
- Update supervisor to run pipelines with env vars

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-18 18:05:41 +01:00
parent 910424c956
commit c1d00dcdc4
25 changed files with 231 additions and 1807 deletions

View File

@@ -1,5 +1,7 @@
"""End-to-end tests for the materia CLI."""
from unittest.mock import patch
from typer.testing import CliRunner
from materia.cli import app
@@ -33,7 +35,6 @@ def test_secrets_list_command(mock_secrets):
result = runner.invoke(app, ["secrets", "list"])
assert result.exit_code == 0
assert "HETZNER_API_TOKEN" in result.stdout
assert "R2_ACCESS_KEY_ID" in result.stdout
def test_worker_list_empty(mock_secrets, mock_hcloud_client):
@@ -98,46 +99,55 @@ def test_worker_destroy(mock_secrets, mock_hcloud_client):
assert "Worker destroyed" in result.stdout
def test_pipeline_list(mock_secrets):
def test_pipeline_list():
"""Test pipeline list command."""
result = runner.invoke(app, ["pipeline", "list"])
assert result.exit_code == 0
assert "extract" in result.stdout
assert "transform" in result.stdout
assert "ccx12" in result.stdout
assert "ccx22" in result.stdout
assert "1800" in result.stdout
assert "3600" in result.stdout
def test_pipeline_run_extract(
mock_secrets, mock_hcloud_client, mock_ssh_wait, mock_ssh_connection
):
def test_pipeline_run_extract():
"""Test running extract pipeline end-to-end."""
result = runner.invoke(app, ["pipeline", "run", "extract"])
with patch("materia.pipelines.subprocess.run") as mock_run:
mock_run.return_value.returncode = 0
mock_run.return_value.stdout = "Extracted successfully\n"
mock_run.return_value.stderr = ""
assert result.exit_code == 0
assert "Running pipeline" in result.stdout
assert "Pipeline completed successfully" in result.stdout
result = runner.invoke(app, ["pipeline", "run", "extract"])
mock_hcloud_client.servers.create.assert_called_once()
mock_ssh_connection.connect.assert_called()
mock_ssh_connection.exec_command.assert_called()
assert result.exit_code == 0
assert "Running pipeline" in result.stdout
assert "Pipeline completed successfully" in result.stdout
mock_run.assert_called_once()
call_args = mock_run.call_args
assert call_args[0][0] == ["uv", "run", "--package", "psdonline", "extract_psd"]
assert call_args[1]["timeout"] == 1800
def test_pipeline_run_transform(
mock_secrets, mock_hcloud_client, mock_ssh_wait, mock_ssh_connection
):
def test_pipeline_run_transform():
"""Test running transform pipeline end-to-end."""
result = runner.invoke(app, ["pipeline", "run", "transform"])
with patch("materia.pipelines.subprocess.run") as mock_run:
mock_run.return_value.returncode = 0
mock_run.return_value.stdout = "Transform complete\n"
mock_run.return_value.stderr = ""
assert result.exit_code == 0
assert "Running pipeline" in result.stdout
assert "Pipeline completed successfully" in result.stdout
result = runner.invoke(app, ["pipeline", "run", "transform"])
mock_hcloud_client.servers.create.assert_called_once()
mock_ssh_connection.connect.assert_called()
assert result.exit_code == 0
assert "Running pipeline" in result.stdout
assert "Pipeline completed successfully" in result.stdout
mock_run.assert_called_once()
call_args = mock_run.call_args
assert "sqlmesh" in call_args[0][0]
assert call_args[1]["timeout"] == 3600
def test_pipeline_run_invalid(mock_secrets):
def test_pipeline_run_invalid():
"""Test running an invalid pipeline."""
result = runner.invoke(app, ["pipeline", "run", "invalid-pipeline"])