Refactor to local-first architecture on Hetzner NVMe
Remove distributed R2/Iceberg/SSH pipeline architecture in favor of
local subprocess execution with NVMe storage. Landing data backed up
to R2 via rclone timer.
- Strip Iceberg catalog, httpfs, boto3, paramiko, prefect, pyarrow
- Pipelines run via subprocess.run() with bounded timeouts
- Extract writes to {LANDING_DIR}/psd/{year}/{month}/{etag}.csv.gzip
- SQLMesh reads LANDING_DIR variable, writes to DUCKDB_PATH
- Delete unused provider stubs (ovh, scaleway, oracle)
- Add rclone systemd timer for R2 backup every 6h
- Update supervisor to run pipelines with env vars
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,16 +13,9 @@ def mock_esc_env(tmp_path):
|
||||
|
||||
return {
|
||||
"HETZNER_API_TOKEN": "test-hetzner-token",
|
||||
"R2_ACCESS_KEY_ID": "test-r2-key",
|
||||
"R2_SECRET_ACCESS_KEY": "test-r2-secret",
|
||||
"R2_ENDPOINT": "test.r2.cloudflarestorage.com",
|
||||
"R2_ARTIFACTS_BUCKET": "test-artifacts",
|
||||
"SSH_PUBLIC_KEY": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITest",
|
||||
"SSH_PRIVATE_KEY": "-----BEGIN OPENSSH PRIVATE KEY-----\ntest\n-----END OPENSSH PRIVATE KEY-----",
|
||||
"SSH_PRIVATE_KEY_PATH": str(ssh_key_path),
|
||||
"CLOUDFLARE_API_TOKEN": "test-cf-token",
|
||||
"ICEBERG_REST_URI": "https://api.cloudflare.com/test",
|
||||
"R2_WAREHOUSE_NAME": "test-warehouse",
|
||||
}
|
||||
|
||||
|
||||
@@ -67,33 +60,3 @@ def mock_ssh_wait():
|
||||
"""Mock SSH wait function to return immediately."""
|
||||
with patch("materia.providers.hetzner.wait_for_ssh", return_value=True):
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_ssh_connection():
|
||||
"""Mock paramiko SSH connection."""
|
||||
with patch("materia.pipelines.paramiko.SSHClient") as mock_ssh_class, \
|
||||
patch("materia.pipelines.paramiko.RSAKey.from_private_key_file") as mock_key:
|
||||
ssh_instance = Mock()
|
||||
mock_ssh_class.return_value = ssh_instance
|
||||
mock_key.return_value = Mock()
|
||||
|
||||
ssh_instance.connect = Mock()
|
||||
ssh_instance.set_missing_host_key_policy = Mock()
|
||||
|
||||
mock_channel = Mock()
|
||||
mock_channel.recv_exit_status.return_value = 0
|
||||
|
||||
mock_stdout = Mock()
|
||||
mock_stdout.read.return_value = b"Success\n"
|
||||
mock_stdout.channel = mock_channel
|
||||
|
||||
mock_stderr = Mock()
|
||||
mock_stderr.read.return_value = b""
|
||||
|
||||
ssh_instance.exec_command = Mock(
|
||||
return_value=(Mock(), mock_stdout, mock_stderr)
|
||||
)
|
||||
ssh_instance.close = Mock()
|
||||
|
||||
yield ssh_instance
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
"""End-to-end tests for the materia CLI."""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from typer.testing import CliRunner
|
||||
|
||||
from materia.cli import app
|
||||
@@ -33,7 +35,6 @@ def test_secrets_list_command(mock_secrets):
|
||||
result = runner.invoke(app, ["secrets", "list"])
|
||||
assert result.exit_code == 0
|
||||
assert "HETZNER_API_TOKEN" in result.stdout
|
||||
assert "R2_ACCESS_KEY_ID" in result.stdout
|
||||
|
||||
|
||||
def test_worker_list_empty(mock_secrets, mock_hcloud_client):
|
||||
@@ -98,46 +99,55 @@ def test_worker_destroy(mock_secrets, mock_hcloud_client):
|
||||
assert "Worker destroyed" in result.stdout
|
||||
|
||||
|
||||
def test_pipeline_list(mock_secrets):
|
||||
def test_pipeline_list():
|
||||
"""Test pipeline list command."""
|
||||
result = runner.invoke(app, ["pipeline", "list"])
|
||||
assert result.exit_code == 0
|
||||
assert "extract" in result.stdout
|
||||
assert "transform" in result.stdout
|
||||
assert "ccx12" in result.stdout
|
||||
assert "ccx22" in result.stdout
|
||||
assert "1800" in result.stdout
|
||||
assert "3600" in result.stdout
|
||||
|
||||
|
||||
def test_pipeline_run_extract(
|
||||
mock_secrets, mock_hcloud_client, mock_ssh_wait, mock_ssh_connection
|
||||
):
|
||||
def test_pipeline_run_extract():
|
||||
"""Test running extract pipeline end-to-end."""
|
||||
result = runner.invoke(app, ["pipeline", "run", "extract"])
|
||||
with patch("materia.pipelines.subprocess.run") as mock_run:
|
||||
mock_run.return_value.returncode = 0
|
||||
mock_run.return_value.stdout = "Extracted successfully\n"
|
||||
mock_run.return_value.stderr = ""
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Running pipeline" in result.stdout
|
||||
assert "Pipeline completed successfully" in result.stdout
|
||||
result = runner.invoke(app, ["pipeline", "run", "extract"])
|
||||
|
||||
mock_hcloud_client.servers.create.assert_called_once()
|
||||
mock_ssh_connection.connect.assert_called()
|
||||
mock_ssh_connection.exec_command.assert_called()
|
||||
assert result.exit_code == 0
|
||||
assert "Running pipeline" in result.stdout
|
||||
assert "Pipeline completed successfully" in result.stdout
|
||||
|
||||
mock_run.assert_called_once()
|
||||
call_args = mock_run.call_args
|
||||
assert call_args[0][0] == ["uv", "run", "--package", "psdonline", "extract_psd"]
|
||||
assert call_args[1]["timeout"] == 1800
|
||||
|
||||
|
||||
def test_pipeline_run_transform(
|
||||
mock_secrets, mock_hcloud_client, mock_ssh_wait, mock_ssh_connection
|
||||
):
|
||||
def test_pipeline_run_transform():
|
||||
"""Test running transform pipeline end-to-end."""
|
||||
result = runner.invoke(app, ["pipeline", "run", "transform"])
|
||||
with patch("materia.pipelines.subprocess.run") as mock_run:
|
||||
mock_run.return_value.returncode = 0
|
||||
mock_run.return_value.stdout = "Transform complete\n"
|
||||
mock_run.return_value.stderr = ""
|
||||
|
||||
assert result.exit_code == 0
|
||||
assert "Running pipeline" in result.stdout
|
||||
assert "Pipeline completed successfully" in result.stdout
|
||||
result = runner.invoke(app, ["pipeline", "run", "transform"])
|
||||
|
||||
mock_hcloud_client.servers.create.assert_called_once()
|
||||
mock_ssh_connection.connect.assert_called()
|
||||
assert result.exit_code == 0
|
||||
assert "Running pipeline" in result.stdout
|
||||
assert "Pipeline completed successfully" in result.stdout
|
||||
|
||||
mock_run.assert_called_once()
|
||||
call_args = mock_run.call_args
|
||||
assert "sqlmesh" in call_args[0][0]
|
||||
assert call_args[1]["timeout"] == 3600
|
||||
|
||||
|
||||
def test_pipeline_run_invalid(mock_secrets):
|
||||
def test_pipeline_run_invalid():
|
||||
"""Test running an invalid pipeline."""
|
||||
result = runner.invoke(app, ["pipeline", "run", "invalid-pipeline"])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user