Refactor to local-first architecture on Hetzner NVMe
Remove distributed R2/Iceberg/SSH pipeline architecture in favor of
local subprocess execution with NVMe storage. Landing data backed up
to R2 via rclone timer.
- Strip Iceberg catalog, httpfs, boto3, paramiko, prefect, pyarrow
- Pipelines run via subprocess.run() with bounded timeouts
- Extract writes to {LANDING_DIR}/psd/{year}/{month}/{etag}.csv.gzip
- SQLMesh reads LANDING_DIR variable, writes to DUCKDB_PATH
- Delete unused provider stubs (ovh, scaleway, oracle)
- Add rclone systemd timer for R2 backup every 6h
- Update supervisor to run pipelines with env vars
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -9,14 +9,11 @@ authors = [
|
||||
]
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"pyarrow>=20.0.0",
|
||||
"python-dotenv>=1.1.0",
|
||||
"typer>=0.15.0",
|
||||
"paramiko>=3.5.0",
|
||||
"pyyaml>=6.0.2",
|
||||
"niquests>=3.15.2",
|
||||
"hcloud>=2.8.0",
|
||||
"prefect>=3.6.15",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
@@ -130,4 +127,5 @@ force-single-line = false
|
||||
# Allow print statements and other rules in scripts
|
||||
"scripts/*" = ["T201"]
|
||||
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
Reference in New Issue
Block a user