implement cli/infra update cicd

This commit is contained in:
Deeman
2025-10-12 21:00:41 +02:00
parent 790e802edd
commit 55bb84f0fa
18 changed files with 2052 additions and 60 deletions

159
src/materia/cli.py Normal file
View File

@@ -0,0 +1,159 @@
"""Materia CLI - Management interface for BeanFlows.coffee infrastructure."""
import typer
from typing_extensions import Annotated
app = typer.Typer(
name="materia",
help="BeanFlows.coffee data platform management CLI",
no_args_is_help=True,
)
@app.command()
def version():
"""Show Materia version."""
typer.echo("Materia CLI v0.1.0")
worker_app = typer.Typer(help="Manage worker instances")
app.add_typer(worker_app, name="worker")
@worker_app.command("list")
def worker_list(
provider: Annotated[str, typer.Option("--provider", "-p")] = "hetzner",
):
"""List all active worker instances."""
from materia.workers import list_workers
workers = list_workers(provider)
if not workers:
typer.echo("No active workers")
return
typer.echo(f"{'NAME':<30} {'IP':<15} {'TYPE':<10} {'STATUS':<10}")
typer.echo("-" * 70)
for worker in workers:
typer.echo(f"{worker.name:<30} {worker.ip:<15} {worker.type:<10} {worker.status:<10}")
@worker_app.command("create")
def worker_create(
name: Annotated[str, typer.Argument(help="Worker name")],
server_type: Annotated[str, typer.Option("--type", "-t")] = "ccx22",
provider: Annotated[str, typer.Option("--provider", "-p")] = "hetzner",
location: Annotated[str | None, typer.Option("--location", "-l")] = None,
):
"""Create a new worker instance."""
from materia.workers import create_worker
typer.echo(f"Creating worker '{name}' ({server_type}) on {provider}...")
worker = create_worker(name, server_type, provider, location)
typer.echo(f"✓ Worker created: {worker.ip}")
@worker_app.command("destroy")
def worker_destroy(
name: Annotated[str, typer.Argument(help="Worker name")],
provider: Annotated[str, typer.Option("--provider", "-p")] = "hetzner",
force: Annotated[bool, typer.Option("--force", "-f")] = False,
):
"""Destroy a worker instance."""
from materia.workers import destroy_worker
if not force:
confirm = typer.confirm(f"Destroy worker '{name}'?")
if not confirm:
raise typer.Abort()
typer.echo(f"Destroying worker '{name}'...")
destroy_worker(name, provider)
typer.echo("✓ Worker destroyed")
pipeline_app = typer.Typer(help="Execute data pipelines")
app.add_typer(pipeline_app, name="pipeline")
@pipeline_app.command("run")
def pipeline_run(
name: Annotated[str, typer.Argument(help="Pipeline name (extract, transform)")],
worker_type: Annotated[str | None, typer.Option("--worker", "-w")] = None,
provider: Annotated[str, typer.Option("--provider", "-p")] = "hetzner",
keep: Annotated[bool, typer.Option("--keep", help="Keep worker after completion")] = False,
):
"""Run a pipeline on an ephemeral worker."""
from materia.pipelines import run_pipeline
typer.echo(f"Running pipeline '{name}'...")
result = run_pipeline(name, worker_type, auto_destroy=not keep, provider=provider)
if result.success:
typer.echo(result.output)
typer.echo(f"\n✓ Pipeline completed successfully")
else:
typer.echo(result.error, err=True)
raise typer.Exit(1)
@pipeline_app.command("list")
def pipeline_list():
"""List available pipelines."""
from materia.pipelines import PIPELINES
typer.echo("Available pipelines:")
for name, config in PIPELINES.items():
typer.echo(f"{name:<15} (worker: {config.worker_type}, artifact: {config.artifact})")
secrets_app = typer.Typer(help="Manage secrets via Pulumi ESC")
app.add_typer(secrets_app, name="secrets")
@secrets_app.command("list")
def secrets_list():
"""List available secrets (keys only)."""
from materia.secrets import list_secrets
secrets = list_secrets()
if not secrets:
typer.echo("No secrets configured")
return
typer.echo("Available secrets:")
for key in secrets:
typer.echo(f"{key}")
@secrets_app.command("get")
def secrets_get(
key: Annotated[str, typer.Argument(help="Secret key")],
):
"""Get a secret value."""
from materia.secrets import get_secret
value = get_secret(key)
if value is None:
typer.echo(f"Secret '{key}' not found", err=True)
raise typer.Exit(1)
typer.echo(value)
@secrets_app.command("test")
def secrets_test():
"""Test ESC connection and authentication."""
from materia.secrets import test_connection
typer.echo("Testing Pulumi ESC connection...")
if test_connection():
typer.echo("✓ ESC connection successful")
else:
typer.echo("✗ ESC connection failed", err=True)
typer.echo("\nMake sure you've run: esc login")
raise typer.Exit(1)
if __name__ == "__main__":
app()

139
src/materia/pipelines.py Normal file
View File

@@ -0,0 +1,139 @@
"""Pipeline execution on ephemeral workers."""
import paramiko
from dataclasses import dataclass
from materia.workers import create_worker, destroy_worker
from materia.secrets import get_secret
@dataclass
class PipelineConfig:
worker_type: str
artifact: str
command: str
secrets: list[str]
@dataclass
class PipelineResult:
success: bool
output: str
error: str | None = None
PIPELINES = {
"extract": PipelineConfig(
worker_type="ccx12",
artifact="materia-extract-latest.tar.gz",
command="./extract_psd",
secrets=["R2_ACCESS_KEY_ID", "R2_SECRET_ACCESS_KEY", "R2_ENDPOINT", "R2_ARTIFACTS_BUCKET"],
),
"transform": PipelineConfig(
worker_type="ccx22",
artifact="materia-transform-latest.tar.gz",
command="cd sqlmesh_materia && ./sqlmesh plan prod",
secrets=[
"CLOUDFLARE_API_TOKEN",
"ICEBERG_REST_URI",
"R2_WAREHOUSE_NAME",
],
),
}
def _execute_ssh_command(ip: str, command: str, env_vars: dict[str, str]) -> tuple[str, str, int]:
ssh_key_path = get_secret("SSH_PRIVATE_KEY_PATH")
if not ssh_key_path:
raise ValueError("SSH_PRIVATE_KEY_PATH not found in secrets")
client = paramiko.SSHClient()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
pkey = paramiko.RSAKey.from_private_key_file(ssh_key_path)
client.connect(ip, username="root", pkey=pkey)
env_string = " ".join([f"export {k}='{v}' &&" for k, v in env_vars.items()])
full_command = f"{env_string} {command}" if env_vars else command
stdin, stdout, stderr = client.exec_command(full_command)
exit_code = stdout.channel.recv_exit_status()
output = stdout.read().decode()
error = stderr.read().decode()
client.close()
return output, error, exit_code
def run_pipeline(
pipeline_name: str,
worker_type: str | None = None,
auto_destroy: bool = True,
provider: str = "hetzner",
) -> PipelineResult:
if pipeline_name not in PIPELINES:
return PipelineResult(
success=False,
output="",
error=f"Unknown pipeline: {pipeline_name}. Available: {', '.join(PIPELINES.keys())}",
)
pipeline_config = PIPELINES[pipeline_name]
worker_type = worker_type or pipeline_config.worker_type
worker_name = f"materia-{pipeline_name}-worker"
r2_bucket = get_secret("R2_ARTIFACTS_BUCKET") or "materia-artifacts"
r2_endpoint = get_secret("R2_ENDPOINT")
if not r2_endpoint:
return PipelineResult(
success=False,
output="",
error="R2_ENDPOINT not configured in secrets",
)
try:
worker = create_worker(worker_name, worker_type, provider)
artifact_url = f"https://{r2_endpoint}/{r2_bucket}/{pipeline_config.artifact}"
bootstrap_commands = [
f"curl -fsSL -o artifact.tar.gz {artifact_url}",
"tar -xzf artifact.tar.gz",
"chmod +x -R .",
]
for cmd in bootstrap_commands:
_, error, exit_code = _execute_ssh_command(worker.ip, cmd, {})
if exit_code != 0:
return PipelineResult(
success=False,
output="",
error=f"Bootstrap failed: {error}",
)
env_vars = {}
for secret_key in pipeline_config.secrets:
value = get_secret(secret_key)
if value:
env_vars[secret_key] = value
command = pipeline_config.command
output, error, exit_code = _execute_ssh_command(worker.ip, command, env_vars)
success = exit_code == 0
return PipelineResult(
success=success,
output=output,
error=error if not success else None,
)
finally:
if auto_destroy:
try:
destroy_worker(worker_name, provider)
except Exception:
pass

View File

@@ -0,0 +1,48 @@
"""Cloud provider abstraction for worker management."""
from dataclasses import dataclass
from typing import Protocol
@dataclass
class Instance:
id: str
name: str
ip: str
status: str
provider: str
type: str
class ProviderModule(Protocol):
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance: ...
def destroy_instance(instance_id: str) -> None: ...
def list_instances(label: str | None = None) -> list[Instance]: ...
def get_instance(name: str) -> Instance | None: ...
def wait_for_ssh(ip: str, timeout: int = 300) -> bool: ...
def get_provider(provider_name: str) -> ProviderModule:
if provider_name == "hetzner":
from materia.providers import hetzner
return hetzner
elif provider_name == "ovh":
from materia.providers import ovh
return ovh
elif provider_name == "scaleway":
from materia.providers import scaleway
return scaleway
elif provider_name == "oracle":
from materia.providers import oracle
return oracle
else:
raise ValueError(f"Unknown provider: {provider_name}")

View File

@@ -0,0 +1,122 @@
"""Hetzner Cloud provider implementation."""
import time
import socket
from hcloud import Client
from hcloud.images import Image
from hcloud.server_types import ServerType
from materia.providers import Instance
from materia.secrets import get_secret
def _get_client() -> Client:
token = get_secret("HETZNER_TOKEN")
if not token:
raise ValueError("HETZNER_TOKEN not found in secrets")
return Client(token=token)
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
client = _get_client()
# Get or create SSH key
ssh_keys = client.ssh_keys.get_all(name="materia-key")
if ssh_keys:
hcloud_key = ssh_keys[0]
else:
hcloud_key = client.ssh_keys.create(name="materia-key", public_key=ssh_key)
server_type = ServerType(name=instance_type)
image = Image(name="ubuntu-24.04")
location_obj = location or "nbg1"
response = client.servers.create(
name=name,
server_type=server_type,
image=image,
ssh_keys=[hcloud_key],
location=location_obj,
labels={"managed_by": "materia"},
)
server = response.server
server.wait_until_status_is("running")
return Instance(
id=str(server.id),
name=server.name,
ip=server.public_net.ipv4.ip,
status=server.status,
provider="hetzner",
type=instance_type,
)
def destroy_instance(instance_id: str) -> None:
client = _get_client()
server = client.servers.get_by_id(int(instance_id))
if server:
server.delete()
def list_instances(label: str | None = None) -> list[Instance]:
client = _get_client()
label_selector = {"managed_by": "materia"}
if label:
label_selector["pipeline"] = label
servers = client.servers.get_all(label_selector=label_selector)
return [
Instance(
id=str(server.id),
name=server.name,
ip=server.public_net.ipv4.ip,
status=server.status,
provider="hetzner",
type=server.server_type.name,
)
for server in servers
]
def get_instance(name: str) -> Instance | None:
client = _get_client()
servers = client.servers.get_all(name=name)
if not servers:
return None
server = servers[0]
return Instance(
id=str(server.id),
name=server.name,
ip=server.public_net.ipv4.ip,
status=server.status,
provider="hetzner",
type=server.server_type.name,
)
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
start = time.time()
while time.time() - start < timeout:
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(5)
result = sock.connect_ex((ip, 22))
sock.close()
if result == 0:
time.sleep(10)
return True
except Exception:
pass
time.sleep(5)
return False

View File

@@ -0,0 +1,28 @@
"""Oracle Cloud provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("Oracle Cloud provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("Oracle Cloud provider not yet implemented")

View File

@@ -0,0 +1,28 @@
"""OVH Cloud provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("OVH provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("OVH provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("OVH provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("OVH provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("OVH provider not yet implemented")

View File

@@ -0,0 +1,28 @@
"""Scaleway provider implementation."""
from materia.providers import Instance
def create_instance(
name: str,
instance_type: str,
ssh_key: str,
location: str | None = None,
) -> Instance:
raise NotImplementedError("Scaleway provider not yet implemented")
def destroy_instance(instance_id: str) -> None:
raise NotImplementedError("Scaleway provider not yet implemented")
def list_instances(label: str | None = None) -> list[Instance]:
raise NotImplementedError("Scaleway provider not yet implemented")
def get_instance(name: str) -> Instance | None:
raise NotImplementedError("Scaleway provider not yet implemented")
def wait_for_ssh(ip: str, timeout: int = 300) -> bool:
raise NotImplementedError("Scaleway provider not yet implemented")

44
src/materia/secrets.py Normal file
View File

@@ -0,0 +1,44 @@
"""Secrets management via Pulumi ESC."""
import json
import subprocess
from functools import lru_cache
@lru_cache(maxsize=1)
def _load_environment() -> dict[str, str]:
"""Load secrets from Pulumi ESC environment."""
try:
result = subprocess.run(
["esc", "env", "open", "prod", "--format", "json"],
capture_output=True,
text=True,
check=True,
)
data = json.loads(result.stdout)
return data.get("values", {})
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to load ESC environment: {e.stderr}")
except FileNotFoundError:
raise RuntimeError("ESC CLI not found. Install with: curl -fsSL https://get.pulumi.com/esc/install.sh | sh")
def get_secret(key: str) -> str | None:
"""Get a secret value by key."""
env = _load_environment()
return env.get(key)
def list_secrets() -> list[str]:
"""List all available secret keys."""
env = _load_environment()
return list(env.keys())
def test_connection() -> bool:
"""Test ESC connection."""
try:
_load_environment()
return True
except Exception:
return False

44
src/materia/workers.py Normal file
View File

@@ -0,0 +1,44 @@
"""Worker instance management."""
from materia.providers import Instance, get_provider
from materia.secrets import get_secret
DEFAULT_PROVIDER = "hetzner"
def list_workers(provider: str = DEFAULT_PROVIDER) -> list[Instance]:
p = get_provider(provider)
return p.list_instances()
def create_worker(
name: str,
server_type: str,
provider: str = DEFAULT_PROVIDER,
location: str | None = None,
) -> Instance:
ssh_key = get_secret("SSH_PUBLIC_KEY")
if not ssh_key:
raise ValueError("SSH_PUBLIC_KEY not found in secrets")
p = get_provider(provider)
instance = p.create_instance(name, server_type, ssh_key, location)
if not p.wait_for_ssh(instance.ip):
raise RuntimeError(f"SSH never became available on {instance.ip}")
return instance
def destroy_worker(name: str, provider: str = DEFAULT_PROVIDER) -> None:
p = get_provider(provider)
instance = p.get_instance(name)
if not instance:
raise ValueError(f"Worker '{name}' not found")
p.destroy_instance(instance.id)
def get_worker(name: str, provider: str = DEFAULT_PROVIDER) -> Instance | None:
p = get_provider(provider)
return p.get_instance(name)