feat: add scripts/prod_query.py — SSH query tool for prod DuckDB
Runs read-only SQL against analytics.duckdb (default) or lakehouse.duckdb on the prod server over SSH. SQL is base64-encoded to avoid shell escaping. Supports TSV (default) and JSON output. Blocks mutation keywords. For lakehouse, works around the DuckDB catalog naming issue (SQLMesh views reference "local" but the file creates catalog "lakehouse") by attaching the file as the "local" catalog. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
124
scripts/prod_query.py
Normal file
124
scripts/prod_query.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Run a read-only SQL query against the production DuckDB (analytics or lakehouse).
|
||||
|
||||
Usage:
|
||||
uv run python scripts/prod_query.py "SELECT COUNT(*) FROM serving.location_profiles"
|
||||
uv run python scripts/prod_query.py --db lakehouse "SELECT * FROM foundation.dim_countries LIMIT 5"
|
||||
echo "SELECT 1" | uv run python scripts/prod_query.py --stdin
|
||||
|
||||
The script SSHes to the prod server, runs the query via Python/DuckDB, and prints
|
||||
tab-separated results with a header row. Read-only: DuckDB is opened with read_only=True.
|
||||
|
||||
For lakehouse queries, automatically aliases the catalog as "local" so SQLMesh views work.
|
||||
|
||||
Designed for Claude Code to call without nested shell escaping nightmares.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
SSH_HOST = "hetzner_root"
|
||||
SSH_USER = "padelnomics_service"
|
||||
DB_PATHS = {
|
||||
"analytics": "/opt/padelnomics/data/analytics.duckdb",
|
||||
"lakehouse": "/opt/padelnomics/data/lakehouse.duckdb",
|
||||
}
|
||||
MAX_ROWS = 500
|
||||
TIMEOUT_SECONDS = 30
|
||||
|
||||
# Mutation keywords blocked (defense in depth — DB is read_only anyway)
|
||||
BLOCKED_KEYWORDS = {"CREATE", "DROP", "ALTER", "INSERT", "UPDATE", "DELETE", "ATTACH", "COPY", "EXPORT", "INSTALL", "LOAD"}
|
||||
|
||||
# Remote Python script template. Receives SQL as base64 via {b64_sql}.
|
||||
# Uses ATTACH + USE to alias the lakehouse catalog as "local" for SQLMesh view compat.
|
||||
REMOTE_SCRIPT = """\
|
||||
import duckdb, json, sys, base64
|
||||
db_path = "{db_path}"
|
||||
sql = base64.b64decode("{b64_sql}").decode()
|
||||
max_rows = {max_rows}
|
||||
output_json = {output_json}
|
||||
try:
|
||||
if "lakehouse" in db_path:
|
||||
con = duckdb.connect(":memory:")
|
||||
con.execute(f"ATTACH '{db_path}' AS local (READ_ONLY)")
|
||||
con.execute("USE local")
|
||||
else:
|
||||
con = duckdb.connect(db_path, read_only=True)
|
||||
result = con.execute(sql)
|
||||
cols = [d[0] for d in result.description]
|
||||
rows = result.fetchmany(max_rows)
|
||||
if output_json:
|
||||
print(json.dumps({{"columns": cols, "rows": [list(r) for r in rows], "count": len(rows)}}, default=str))
|
||||
else:
|
||||
print("\\t".join(cols))
|
||||
for row in rows:
|
||||
print("\\t".join(str(v) if v is not None else "NULL" for v in row))
|
||||
if len(rows) == max_rows:
|
||||
print(f"... truncated at {{max_rows}} rows", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f"ERROR: {{e}}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
"""
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Query prod DuckDB over SSH")
|
||||
parser.add_argument("sql", nargs="?", help="SQL query to run")
|
||||
parser.add_argument("--stdin", action="store_true", help="Read SQL from stdin")
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
choices=list(DB_PATHS.keys()),
|
||||
default="analytics",
|
||||
help="Which database (default: analytics)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-rows", type=int, default=MAX_ROWS, help=f"Max rows (default: {MAX_ROWS})"
|
||||
)
|
||||
parser.add_argument("--json", action="store_true", help="Output JSON instead of TSV")
|
||||
args = parser.parse_args()
|
||||
|
||||
sql = args.sql
|
||||
if args.stdin or sql is None:
|
||||
sql = sys.stdin.read().strip()
|
||||
if not sql:
|
||||
print("ERROR: No SQL provided", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
sql_upper = sql.upper()
|
||||
for kw in BLOCKED_KEYWORDS:
|
||||
if kw in sql_upper:
|
||||
print(f"ERROR: Blocked keyword '{kw}' in query", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
b64_sql = base64.b64encode(sql.encode()).decode()
|
||||
remote_script = REMOTE_SCRIPT.format(
|
||||
db_path=DB_PATHS[args.db],
|
||||
b64_sql=b64_sql,
|
||||
max_rows=args.max_rows,
|
||||
output_json=args.json,
|
||||
)
|
||||
|
||||
cmd = [
|
||||
"ssh", SSH_HOST,
|
||||
f"sudo -u {SSH_USER} bash -lc 'cd /opt/padelnomics && uv run python3 -'",
|
||||
]
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
input=remote_script,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=TIMEOUT_SECONDS + 10,
|
||||
)
|
||||
|
||||
if result.stdout:
|
||||
print(result.stdout, end="")
|
||||
if result.stderr:
|
||||
print(result.stderr, end="", file=sys.stderr)
|
||||
sys.exit(result.returncode)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user