Refactor to local-first architecture on Hetzner NVMe

Remove distributed R2/Iceberg/SSH pipeline architecture in favor of
local subprocess execution with NVMe storage. Landing data backed up
to R2 via rclone timer.

- Strip Iceberg catalog, httpfs, boto3, paramiko, prefect, pyarrow
- Pipelines run via subprocess.run() with bounded timeouts
- Extract writes to {LANDING_DIR}/psd/{year}/{month}/{etag}.csv.gzip
- SQLMesh reads LANDING_DIR variable, writes to DUCKDB_PATH
- Delete unused provider stubs (ovh, scaleway, oracle)
- Add rclone systemd timer for R2 backup every 6h
- Update supervisor to run pipelines with env vars

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-18 18:05:41 +01:00
parent 910424c956
commit c1d00dcdc4
25 changed files with 231 additions and 1807 deletions

View File

@@ -1,7 +1,6 @@
"""Cloud provider abstraction for worker management."""
"""Cloud provider for worker management."""
from dataclasses import dataclass
from typing import Protocol
@dataclass
@@ -14,35 +13,10 @@ class Instance:
type: str
class ProviderModule(Protocol):
def create_instance(
self: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance: ...
def destroy_instance(self: str) -> None: ...
def list_instances(self: str | None = None) -> list[Instance]: ...
def get_instance(self: str) -> Instance | None: ...
def wait_for_ssh(self: str, timeout: int = 300) -> bool: ...
def get_provider(provider_name: str) -> ProviderModule:
def get_provider(provider_name: str):
if provider_name == "hetzner":
from materia.providers import hetzner
return hetzner
elif provider_name == "ovh":
from materia.providers import ovh
return ovh
elif provider_name == "scaleway":
from materia.providers import scaleway
return scaleway
elif provider_name == "oracle":
from materia.providers import oracle
return oracle
else:
raise ValueError(f"Unknown provider: {provider_name}")

View File

@@ -1,28 +0,0 @@
"""Oracle Cloud provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("Oracle Cloud provider not yet implemented")

View File

@@ -1,28 +0,0 @@
"""OVH Cloud provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("OVH provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("OVH provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("OVH provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("OVH provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("OVH provider not yet implemented")

View File

@@ -1,28 +0,0 @@
"""Scaleway provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("Scaleway provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("Scaleway provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("Scaleway provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("Scaleway provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("Scaleway provider not yet implemented")