Refactor to local-first architecture on Hetzner NVMe

Remove distributed R2/Iceberg/SSH pipeline architecture in favor of
local subprocess execution with NVMe storage. Landing data backed up
to R2 via rclone timer.

- Strip Iceberg catalog, httpfs, boto3, paramiko, prefect, pyarrow
- Pipelines run via subprocess.run() with bounded timeouts
- Extract writes to {LANDING_DIR}/psd/{year}/{month}/{etag}.csv.gzip
- SQLMesh reads LANDING_DIR variable, writes to DUCKDB_PATH
- Delete unused provider stubs (ovh, scaleway, oracle)
- Add rclone systemd timer for R2 backup every 6h
- Update supervisor to run pipelines with env vars

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-18 18:05:41 +01:00
parent 910424c956
commit c1d00dcdc4
25 changed files with 231 additions and 1807 deletions

View File

@@ -13,16 +13,9 @@ def mock_esc_env(tmp_path):
return {
"HETZNER_API_TOKEN": "test-hetzner-token",
"R2_ACCESS_KEY_ID": "test-r2-key",
"R2_SECRET_ACCESS_KEY": "test-r2-secret",
"R2_ENDPOINT": "test.r2.cloudflarestorage.com",
"R2_ARTIFACTS_BUCKET": "test-artifacts",
"SSH_PUBLIC_KEY": "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAITest",
"SSH_PRIVATE_KEY": "-----BEGIN OPENSSH PRIVATE KEY-----\ntest\n-----END OPENSSH PRIVATE KEY-----",
"SSH_PRIVATE_KEY_PATH": str(ssh_key_path),
"CLOUDFLARE_API_TOKEN": "test-cf-token",
"ICEBERG_REST_URI": "https://api.cloudflare.com/test",
"R2_WAREHOUSE_NAME": "test-warehouse",
}
@@ -67,33 +60,3 @@ def mock_ssh_wait():
"""Mock SSH wait function to return immediately."""
with patch("materia.providers.hetzner.wait_for_ssh", return_value=True):
yield
@pytest.fixture
def mock_ssh_connection():
"""Mock paramiko SSH connection."""
with patch("materia.pipelines.paramiko.SSHClient") as mock_ssh_class, \
patch("materia.pipelines.paramiko.RSAKey.from_private_key_file") as mock_key:
ssh_instance = Mock()
mock_ssh_class.return_value = ssh_instance
mock_key.return_value = Mock()
ssh_instance.connect = Mock()
ssh_instance.set_missing_host_key_policy = Mock()
mock_channel = Mock()
mock_channel.recv_exit_status.return_value = 0
mock_stdout = Mock()
mock_stdout.read.return_value = b"Success\n"
mock_stdout.channel = mock_channel
mock_stderr = Mock()
mock_stderr.read.return_value = b""
ssh_instance.exec_command = Mock(
return_value=(Mock(), mock_stdout, mock_stderr)
)
ssh_instance.close = Mock()
yield ssh_instance