""" BeanFlows.coffee Infrastructure Cloudflare R2 + Iceberg + Hetzner compute stack """ import pulumi import pulumi_cloudflare as cloudflare import pulumi_hcloud as hcloud # Load configuration config = pulumi.Config() cloudflare_account_id = config.require("cloudflare_account_id") hetzner_location = config.get("hetzner_location") or "nbg1" # Nuremberg datacenter # ============================================================ # Cloudflare R2 Storage + Data Catalog (Iceberg) # ============================================================ # R2 bucket for artifacts (CLI + extract/transform packages) # Note: Import existing bucket with: # pulumi import cloudflare:index/r2Bucket:R2Bucket beanflows-artifacts /beanflows-artifacts artifacts_bucket = cloudflare.R2Bucket( "beanflows-artifacts", account_id=cloudflare_account_id, name="beanflows-artifacts", location="weur", # Western Europe ) # R2 bucket for lakehouse (Iceberg tables) # Note: Import existing bucket with: # pulumi import cloudflare:index/r2Bucket:R2Bucket beanflows-data-prod /beanflows-data-prod lakehouse_bucket = cloudflare.R2Bucket( "beanflows-data-prod", account_id=cloudflare_account_id, name="beanflows-data-prod", location="weur", ) # ============================================================ # Hetzner Cloud Infrastructure # ============================================================ # SSH key for server access ssh_key = hcloud.SshKey( "materia-ssh-key", name="materia-deployment-key", public_key=config.require_secret("ssh_public_key"), ) # Small CCX instance for supervisor (runs materia CLI to orchestrate pipelines) # This is an always-on instance that creates/destroys ephemeral workers on-demand supervisor_server = hcloud.Server( "materia-supervisor", name="materia-supervisor", server_type="ccx11", # 2 vCPU, 4GB RAM, ~€4/mo (cheapest option) image="ubuntu-24.04", location=hetzner_location, ssh_keys=[ssh_key.id], labels={ "role": "supervisor", "project": "materia", }, user_data="""#!/bin/bash set -e # Basic server setup apt-get update apt-get install -y python3.13 python3-pip curl unzip # Install Pulumi ESC CLI curl -fsSL https://get.pulumi.com/esc/install.sh | sh export PATH="$HOME/.pulumi/bin:$PATH" echo 'export PATH="$HOME/.pulumi/bin:$PATH"' >> /root/.bashrc # Create deployment directory mkdir -p /opt/materia # Configure environment echo 'Setup complete. Materia CLI will be deployed via CI/CD.' > /opt/materia/README.txt """, ) # Note: Workers are created on-demand by the materia CLI # No always-on worker instances in this architecture # Firewall for servers (restrict to SSH + outbound only) firewall = hcloud.Firewall( "materia-firewall", name="materia-firewall", rules=[ # Allow SSH from anywhere (consider restricting to your IP) hcloud.FirewallRuleArgs( direction="in", protocol="tcp", port="22", source_ips=["0.0.0.0/0", "::/0"], ), # Allow all outbound traffic hcloud.FirewallRuleArgs( direction="out", protocol="tcp", port="any", destination_ips=["0.0.0.0/0", "::/0"], ), hcloud.FirewallRuleArgs( direction="out", protocol="udp", port="any", destination_ips=["0.0.0.0/0", "::/0"], ), ], ) # Apply firewall to supervisor supervisor_firewall = hcloud.FirewallAttachment( "supervisor-firewall", firewall_id=firewall.id, server_ids=[supervisor_server.id], ) # ============================================================ # Outputs # ============================================================ pulumi.export("artifacts_bucket_name", artifacts_bucket.name) pulumi.export("lakehouse_bucket_name", lakehouse_bucket.name) pulumi.export("supervisor_ip", supervisor_server.ipv4_address) # Export connection info for DuckDB pulumi.export( "duckdb_r2_config", pulumi.Output.all(cloudflare_account_id, lakehouse_bucket.name).apply( lambda args: { "account_id": args[0], "bucket": args[1], "catalog_uri": f"https://catalog.cloudflarestorage.com/{args[0]}/r2-data-catalog", } ), )