From 8a6fd614321b116e2291f9e8e3e771e4939172b0 Mon Sep 17 00:00:00 2001 From: Deeman Date: Tue, 24 Feb 2026 19:31:34 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20bound=20unbounded=20operations=20?= =?UTF-8?q?=E2=80=94=20LIMIT=20on=20scenarios,=20timeouts=20on=20DuckDB=20?= =?UTF-8?q?and=20Resend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - admin/routes.py: add LIMIT 500 to scenarios() — was unbounded, could return arbitrarily large result sets and exhaust memory - analytics.py: wrap asyncio.to_thread(DuckDB) in asyncio.wait_for with _QUERY_TIMEOUT_SECONDS=30 so a slow scan cannot permanently starve the asyncio thread pool - core.py: replace resend.default_http_client with RequestsClient(timeout=10) so all Resend API calls are capped at 10 s (default was 30 s) Co-Authored-By: Claude Opus 4.6 --- web/src/padelnomics/admin/routes.py | 2 +- web/src/padelnomics/analytics.py | 12 +++++++++++- web/src/padelnomics/core.py | 5 +++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/web/src/padelnomics/admin/routes.py b/web/src/padelnomics/admin/routes.py index ad3f133..996133b 100644 --- a/web/src/padelnomics/admin/routes.py +++ b/web/src/padelnomics/admin/routes.py @@ -1438,7 +1438,7 @@ async def scenarios(): where = " AND ".join(wheres) scenario_list = await fetch_all( - f"SELECT * FROM published_scenarios WHERE {where} ORDER BY created_at DESC", + f"SELECT * FROM published_scenarios WHERE {where} ORDER BY created_at DESC LIMIT 500", tuple(params), ) countries = await fetch_all( diff --git a/web/src/padelnomics/analytics.py b/web/src/padelnomics/analytics.py index 4d7e432..5379a67 100644 --- a/web/src/padelnomics/analytics.py +++ b/web/src/padelnomics/analytics.py @@ -20,6 +20,10 @@ logger = logging.getLogger(__name__) _conn = None # duckdb.DuckDBPyConnection | None — lazy import _DUCKDB_PATH = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb") +# DuckDB queries run in the asyncio thread pool. Cap them so a slow scan +# cannot starve the pool and leave all workers busy. +_QUERY_TIMEOUT_SECONDS = 30 + def open_analytics_db() -> None: """Open the DuckDB connection. Call once at app startup.""" @@ -63,7 +67,13 @@ async def fetch_analytics(sql: str, params: list | None = None) -> list[dict[str cur.close() try: - return await asyncio.to_thread(_run) + return await asyncio.wait_for( + asyncio.to_thread(_run), + timeout=_QUERY_TIMEOUT_SECONDS, + ) + except asyncio.TimeoutError: + logger.error("DuckDB analytics query timed out after %ds: %.200s", _QUERY_TIMEOUT_SECONDS, sql) + return [] except Exception: logger.exception("DuckDB analytics query failed: %.200s", sql) return [] diff --git a/web/src/padelnomics/core.py b/web/src/padelnomics/core.py index 76a5f08..f23ba24 100644 --- a/web/src/padelnomics/core.py +++ b/web/src/padelnomics/core.py @@ -18,6 +18,11 @@ from pathlib import Path import aiosqlite import resend from dotenv import load_dotenv + +# Cap all Resend API calls at 10 s — the default RequestsClient timeout is 30 s. +# These calls run synchronously on the event loop thread; a shorter cap limits stalls. +_RESEND_TIMEOUT_SECONDS = 10 +resend.default_http_client = resend.RequestsClient(timeout=_RESEND_TIMEOUT_SECONDS) from quart import g, make_response, render_template, request, session load_dotenv()