140 lines
4.4 KiB
Python
140 lines
4.4 KiB
Python
"""
|
|
BeanFlows.coffee Infrastructure
|
|
Cloudflare R2 + Iceberg + Hetzner compute stack
|
|
"""
|
|
|
|
import pulumi
|
|
import pulumi_cloudflare as cloudflare
|
|
import pulumi_hcloud as hcloud
|
|
|
|
# Load configuration
|
|
config = pulumi.Config()
|
|
cloudflare_account_id = config.require("cloudflare_account_id")
|
|
hetzner_location = config.get("hetzner_location") or "nbg1" # Nuremberg datacenter
|
|
|
|
# ============================================================
|
|
# Cloudflare R2 Storage + Data Catalog (Iceberg)
|
|
# ============================================================
|
|
|
|
# R2 bucket for artifacts (CLI + extract/transform packages)
|
|
# Note: Import existing bucket with:
|
|
# pulumi import cloudflare:index/r2Bucket:R2Bucket beanflows-artifacts <account_id>/beanflows-artifacts
|
|
artifacts_bucket = cloudflare.R2Bucket(
|
|
"beanflows-artifacts",
|
|
account_id=cloudflare_account_id,
|
|
name="beanflows-artifacts",
|
|
location="weur", # Western Europe
|
|
)
|
|
|
|
# R2 bucket for lakehouse (Iceberg tables)
|
|
# Note: Import existing bucket with:
|
|
# pulumi import cloudflare:index/r2Bucket:R2Bucket beanflows-data-prod <account_id>/beanflows-data-prod
|
|
lakehouse_bucket = cloudflare.R2Bucket(
|
|
"beanflows-data-prod",
|
|
account_id=cloudflare_account_id,
|
|
name="beanflows-data-prod",
|
|
location="weur",
|
|
)
|
|
|
|
# ============================================================
|
|
# Hetzner Cloud Infrastructure
|
|
# ============================================================
|
|
|
|
# SSH key for server access
|
|
ssh_key = hcloud.SshKey(
|
|
"materia-ssh-key",
|
|
name="materia-deployment-key",
|
|
public_key=config.require_secret("ssh_public_key"),
|
|
)
|
|
|
|
# Small CCX instance for supervisor (runs materia CLI to orchestrate pipelines)
|
|
# This is an always-on instance that creates/destroys ephemeral workers on-demand
|
|
supervisor_server = hcloud.Server(
|
|
"materia-supervisor",
|
|
name="materia-supervisor",
|
|
server_type="ccx11", # 2 vCPU, 4GB RAM, ~€4/mo (cheapest option)
|
|
image="ubuntu-24.04",
|
|
location=hetzner_location,
|
|
ssh_keys=[ssh_key.id],
|
|
labels={
|
|
"role": "supervisor",
|
|
"project": "materia",
|
|
},
|
|
user_data="""#!/bin/bash
|
|
set -e
|
|
|
|
# Basic server setup
|
|
apt-get update
|
|
apt-get install -y python3.13 python3-pip curl unzip
|
|
|
|
# Install Pulumi ESC CLI
|
|
curl -fsSL https://get.pulumi.com/esc/install.sh | sh
|
|
export PATH="$HOME/.pulumi/bin:$PATH"
|
|
echo 'export PATH="$HOME/.pulumi/bin:$PATH"' >> /root/.bashrc
|
|
|
|
# Create deployment directory
|
|
mkdir -p /opt/materia
|
|
|
|
# Configure environment
|
|
echo 'Setup complete. Materia CLI will be deployed via CI/CD.' > /opt/materia/README.txt
|
|
""",
|
|
)
|
|
|
|
# Note: Workers are created on-demand by the materia CLI
|
|
# No always-on worker instances in this architecture
|
|
|
|
# Firewall for servers (restrict to SSH + outbound only)
|
|
firewall = hcloud.Firewall(
|
|
"materia-firewall",
|
|
name="materia-firewall",
|
|
rules=[
|
|
# Allow SSH from anywhere (consider restricting to your IP)
|
|
hcloud.FirewallRuleArgs(
|
|
direction="in",
|
|
protocol="tcp",
|
|
port="22",
|
|
source_ips=["0.0.0.0/0", "::/0"],
|
|
),
|
|
# Allow all outbound traffic
|
|
hcloud.FirewallRuleArgs(
|
|
direction="out",
|
|
protocol="tcp",
|
|
port="any",
|
|
destination_ips=["0.0.0.0/0", "::/0"],
|
|
),
|
|
hcloud.FirewallRuleArgs(
|
|
direction="out",
|
|
protocol="udp",
|
|
port="any",
|
|
destination_ips=["0.0.0.0/0", "::/0"],
|
|
),
|
|
],
|
|
)
|
|
|
|
# Apply firewall to supervisor
|
|
supervisor_firewall = hcloud.FirewallAttachment(
|
|
"supervisor-firewall",
|
|
firewall_id=firewall.id,
|
|
server_ids=[supervisor_server.id],
|
|
)
|
|
|
|
# ============================================================
|
|
# Outputs
|
|
# ============================================================
|
|
|
|
pulumi.export("artifacts_bucket_name", artifacts_bucket.name)
|
|
pulumi.export("lakehouse_bucket_name", lakehouse_bucket.name)
|
|
pulumi.export("supervisor_ip", supervisor_server.ipv4_address)
|
|
|
|
# Export connection info for DuckDB
|
|
pulumi.export(
|
|
"duckdb_r2_config",
|
|
pulumi.Output.all(cloudflare_account_id, lakehouse_bucket.name).apply(
|
|
lambda args: {
|
|
"account_id": args[0],
|
|
"bucket": args[1],
|
|
"catalog_uri": f"https://catalog.cloudflarestorage.com/{args[0]}/r2-data-catalog",
|
|
}
|
|
),
|
|
)
|