""" BeanFlows.coffee Infrastructure Cloudflare R2 + Iceberg + Hetzner compute stack """ import pulumi import pulumi_cloudflare as cloudflare import pulumi_hcloud as hcloud # Load configuration config = pulumi.Config() cloudflare_account_id = config.require("cloudflare_account_id") hetzner_location = config.get("hetzner_location") or "nbg1" # Nuremberg datacenter # ============================================================ # Cloudflare R2 Storage + Data Catalog (Iceberg) # ============================================================ # R2 bucket for raw data (extraction outputs) raw_bucket = cloudflare.R2Bucket( "materia-raw", account_id=cloudflare_account_id, name="materia-raw", location="weur", # Western Europe ) # R2 bucket for lakehouse (Iceberg tables) lakehouse_bucket = cloudflare.R2Bucket( "materia-lakehouse", account_id=cloudflare_account_id, name="materia-lakehouse", location="weur", ) # TODO: Enable R2 Data Catalog (Iceberg) on lakehouse bucket # Note: As of Oct 2025, R2 Data Catalog is in public beta # May need to enable via Cloudflare dashboard or API once SDK supports it # For now, document manual step in README # API token for R2 access (needs R2 + Data Catalog permissions) # Note: Create this manually in Cloudflare dashboard and store in Pulumi config # pulumi config set --secret cloudflare_r2_token # ============================================================ # Hetzner Cloud Infrastructure # ============================================================ # SSH key for server access ssh_key = hcloud.SshKey( "materia-ssh-key", name="materia-deployment-key", public_key=config.require_secret("ssh_public_key"), ) # Small CCX instance for scheduler/orchestrator # This runs the cron scheduler + lightweight tasks scheduler_server = hcloud.Server( "materia-scheduler", name="materia-scheduler", server_type="ccx12", # 2 vCPU, 8GB RAM, ~€6/mo image="ubuntu-24.04", location=hetzner_location, ssh_keys=[ssh_key.id], labels={ "role": "scheduler", "project": "materia", }, user_data="""#!/bin/bash # Basic server setup apt-get update apt-get install -y python3.13 python3-pip git curl # Install uv curl -LsSf https://astral.sh/uv/install.sh | sh # Configure environment echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /root/.bashrc """, ) # Larger CCX instance for heavy SQLMesh workloads # This gets spun up on-demand for big transformations worker_server = hcloud.Server( "materia-worker-01", name="materia-worker-01", server_type="ccx22", # 4 vCPU, 16GB RAM, ~€24/mo image="ubuntu-24.04", location=hetzner_location, ssh_keys=[ssh_key.id], labels={ "role": "worker", "project": "materia", }, user_data="""#!/bin/bash # Basic server setup apt-get update apt-get install -y python3.13 python3-pip git curl # Install uv curl -LsSf https://astral.sh/uv/install.sh | sh # Configure environment echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> /root/.bashrc """, ) # Firewall for servers (restrict to SSH + outbound only) firewall = hcloud.Firewall( "materia-firewall", name="materia-firewall", rules=[ # Allow SSH from anywhere (consider restricting to your IP) hcloud.FirewallRuleArgs( direction="in", protocol="tcp", port="22", source_ips=["0.0.0.0/0", "::/0"], ), # Allow all outbound traffic hcloud.FirewallRuleArgs( direction="out", protocol="tcp", port="any", destination_ips=["0.0.0.0/0", "::/0"], ), hcloud.FirewallRuleArgs( direction="out", protocol="udp", port="any", destination_ips=["0.0.0.0/0", "::/0"], ), ], ) # Apply firewall to all servers scheduler_firewall = hcloud.FirewallAttachment( "scheduler-firewall", firewall_id=firewall.id, server_ids=[scheduler_server.id], ) worker_firewall = hcloud.FirewallAttachment( "worker-firewall", firewall_id=firewall.id, server_ids=[worker_server.id], ) # ============================================================ # Outputs # ============================================================ pulumi.export("raw_bucket_name", raw_bucket.name) pulumi.export("lakehouse_bucket_name", lakehouse_bucket.name) pulumi.export("scheduler_ip", scheduler_server.ipv4_address) pulumi.export("worker_ip", worker_server.ipv4_address) # Export connection info for DuckDB pulumi.export( "duckdb_r2_config", pulumi.Output.all(cloudflare_account_id, lakehouse_bucket.name).apply( lambda args: { "account_id": args[0], "bucket": args[1], "catalog_uri": f"https://catalog.cloudflarestorage.com/{args[0]}/r2-data-catalog", } ), )