From a55501f2ea8f54fbc620ca7f0c8ac514aed94731 Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:35:33 +0100 Subject: [PATCH 1/6] feat(core): add count_where() helper, compress admin COUNT queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 2/6: Adds count_where(table_where, params) to core.py that compresses the fetch_one + null-check COUNT(*) pattern. Applied across admin/routes.py — dashboard stats shrinks from ~75 to ~25 lines, plus 10 more call sites compressed. Co-Authored-By: Claude Opus 4.6 --- web/src/padelnomics/admin/routes.py | 150 ++++++++-------------------- web/src/padelnomics/core.py | 9 ++ 2 files changed, 50 insertions(+), 109 deletions(-) diff --git a/web/src/padelnomics/admin/routes.py b/web/src/padelnomics/admin/routes.py index 71fd52b..c55d3c3 100644 --- a/web/src/padelnomics/admin/routes.py +++ b/web/src/padelnomics/admin/routes.py @@ -28,6 +28,7 @@ from ..auth.routes import role_required from ..core import ( EMAIL_ADDRESSES, config, + count_where, csrf_protect, execute, fetch_all, @@ -91,8 +92,7 @@ async def _inject_admin_sidebar_data(): """Load unread inbox count for sidebar badge on every admin page.""" from quart import g try: - row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0") - g.admin_unread_count = row["cnt"] if row else 0 + g.admin_unread_count = await count_where("inbound_emails WHERE is_read = 0") except Exception: logger.exception("Failed to load admin sidebar unread count") g.admin_unread_count = 0 @@ -114,76 +114,32 @@ async def get_dashboard_stats() -> dict: now = utcnow() today = now.date().isoformat() week_ago = (now - timedelta(days=7)).strftime("%Y-%m-%d %H:%M:%S") - users_total = await fetch_one("SELECT COUNT(*) as count FROM users WHERE deleted_at IS NULL") - users_today = await fetch_one( - "SELECT COUNT(*) as count FROM users WHERE created_at >= ? AND deleted_at IS NULL", - (today,) - ) - users_week = await fetch_one( - "SELECT COUNT(*) as count FROM users WHERE created_at >= ? AND deleted_at IS NULL", - (week_ago,) - ) - subs = await fetch_one( - "SELECT COUNT(*) as count FROM subscriptions WHERE status = 'active'" + # Two queries that aren't simple COUNT(*) — keep as fetch_one + planner_row = await fetch_one( + "SELECT COUNT(DISTINCT user_id) AS n FROM scenarios WHERE deleted_at IS NULL" ) - - tasks_pending = await fetch_one("SELECT COUNT(*) as count FROM tasks WHERE status = 'pending'") - tasks_failed = await fetch_one("SELECT COUNT(*) as count FROM tasks WHERE status = 'failed'") - - # Lead funnel stats - leads_total = await fetch_one( - "SELECT COUNT(*) as count FROM lead_requests WHERE lead_type = 'quote'" - ) - leads_new = await fetch_one( - "SELECT COUNT(*) as count FROM lead_requests WHERE status = 'new' AND lead_type = 'quote'" - ) - leads_verified = await fetch_one( - "SELECT COUNT(*) as count FROM lead_requests WHERE verified_at IS NOT NULL AND lead_type = 'quote'" - ) - leads_unlocked = await fetch_one( - "SELECT COUNT(*) as count FROM lead_requests WHERE unlock_count > 0 AND lead_type = 'quote'" - ) - - # Planner users - planner_users = await fetch_one( - "SELECT COUNT(DISTINCT user_id) as count FROM scenarios WHERE deleted_at IS NULL" - ) - - # Supplier stats - suppliers_claimed = await fetch_one( - "SELECT COUNT(*) as count FROM suppliers WHERE claimed_by IS NOT NULL" - ) - suppliers_growth = await fetch_one( - "SELECT COUNT(*) as count FROM suppliers WHERE tier = 'growth'" - ) - suppliers_pro = await fetch_one( - "SELECT COUNT(*) as count FROM suppliers WHERE tier = 'pro'" - ) - total_credits_spent = await fetch_one( - "SELECT COALESCE(SUM(ABS(delta)), 0) as total FROM credit_ledger WHERE delta < 0" - ) - leads_unlocked_by_suppliers = await fetch_one( - "SELECT COUNT(*) as count FROM lead_forwards" + credits_row = await fetch_one( + "SELECT COALESCE(SUM(ABS(delta)), 0) AS n FROM credit_ledger WHERE delta < 0" ) return { - "users_total": users_total["count"] if users_total else 0, - "users_today": users_today["count"] if users_today else 0, - "users_week": users_week["count"] if users_week else 0, - "active_subscriptions": subs["count"] if subs else 0, - "tasks_pending": tasks_pending["count"] if tasks_pending else 0, - "tasks_failed": tasks_failed["count"] if tasks_failed else 0, - "leads_total": leads_total["count"] if leads_total else 0, - "leads_new": leads_new["count"] if leads_new else 0, - "leads_verified": leads_verified["count"] if leads_verified else 0, - "leads_unlocked": leads_unlocked["count"] if leads_unlocked else 0, - "planner_users": planner_users["count"] if planner_users else 0, - "suppliers_claimed": suppliers_claimed["count"] if suppliers_claimed else 0, - "suppliers_growth": suppliers_growth["count"] if suppliers_growth else 0, - "suppliers_pro": suppliers_pro["count"] if suppliers_pro else 0, - "total_credits_spent": total_credits_spent["total"] if total_credits_spent else 0, - "leads_unlocked_by_suppliers": leads_unlocked_by_suppliers["count"] if leads_unlocked_by_suppliers else 0, + "users_total": await count_where("users WHERE deleted_at IS NULL"), + "users_today": await count_where("users WHERE created_at >= ? AND deleted_at IS NULL", (today,)), + "users_week": await count_where("users WHERE created_at >= ? AND deleted_at IS NULL", (week_ago,)), + "active_subscriptions": await count_where("subscriptions WHERE status = 'active'"), + "tasks_pending": await count_where("tasks WHERE status = 'pending'"), + "tasks_failed": await count_where("tasks WHERE status = 'failed'"), + "leads_total": await count_where("lead_requests WHERE lead_type = 'quote'"), + "leads_new": await count_where("lead_requests WHERE status = 'new' AND lead_type = 'quote'"), + "leads_verified": await count_where("lead_requests WHERE verified_at IS NOT NULL AND lead_type = 'quote'"), + "leads_unlocked": await count_where("lead_requests WHERE unlock_count > 0 AND lead_type = 'quote'"), + "planner_users": planner_row["n"] if planner_row else 0, + "suppliers_claimed": await count_where("suppliers WHERE claimed_by IS NOT NULL"), + "suppliers_growth": await count_where("suppliers WHERE tier = 'growth'"), + "suppliers_pro": await count_where("suppliers WHERE tier = 'pro'"), + "total_credits_spent": credits_row["n"] if credits_row else 0, + "leads_unlocked_by_suppliers": await count_where("lead_forwards WHERE 1=1"), } @@ -446,10 +402,7 @@ async def get_leads( params.append(f"-{days} days") where = " AND ".join(wheres) - count_row = await fetch_one( - f"SELECT COUNT(*) as cnt FROM lead_requests WHERE {where}", tuple(params) - ) - total = count_row["cnt"] if count_row else 0 + total = await count_where(f"lead_requests WHERE {where}", tuple(params)) offset = (page - 1) * per_page rows = await fetch_all( @@ -929,13 +882,10 @@ async def get_suppliers_list( async def get_supplier_stats() -> dict: """Get aggregate supplier stats for the admin list header.""" - claimed = await fetch_one("SELECT COUNT(*) as cnt FROM suppliers WHERE claimed_by IS NOT NULL") - growth = await fetch_one("SELECT COUNT(*) as cnt FROM suppliers WHERE tier = 'growth'") - pro = await fetch_one("SELECT COUNT(*) as cnt FROM suppliers WHERE tier = 'pro'") return { - "claimed": claimed["cnt"] if claimed else 0, - "growth": growth["cnt"] if growth else 0, - "pro": pro["cnt"] if pro else 0, + "claimed": await count_where("suppliers WHERE claimed_by IS NOT NULL"), + "growth": await count_where("suppliers WHERE tier = 'growth'"), + "pro": await count_where("suppliers WHERE tier = 'pro'"), } @@ -1017,11 +967,7 @@ async def supplier_detail(supplier_id: int): (supplier_id,), ) - enquiry_row = await fetch_one( - "SELECT COUNT(*) as cnt FROM supplier_enquiries WHERE supplier_id = ?", - (supplier_id,), - ) - enquiry_count = enquiry_row["cnt"] if enquiry_row else 0 + enquiry_count = await count_where("supplier_enquiries WHERE supplier_id = ?", (supplier_id,)) # Email activity timeline — correlate by contact_email (no FK) timeline = [] @@ -1239,7 +1185,6 @@ _PRODUCT_CATEGORIES = [ @role_required("admin") async def billing_products(): """Read-only overview of Paddle products, subscriptions, and revenue proxies.""" - active_subs_row = await fetch_one("SELECT COUNT(*) as cnt FROM subscriptions WHERE status = 'active'") mrr_row = await fetch_one( """SELECT COALESCE(SUM( CASE WHEN pp.key LIKE '%_yearly' THEN pp.price_cents / 12 @@ -1249,14 +1194,12 @@ async def billing_products(): JOIN paddle_products pp ON s.plan = pp.key WHERE s.status = 'active' AND pp.billing_type = 'subscription'""" ) - active_boosts_row = await fetch_one("SELECT COUNT(*) as cnt FROM supplier_boosts WHERE status = 'active'") - bp_exports_row = await fetch_one("SELECT COUNT(*) as cnt FROM business_plan_exports WHERE status = 'completed'") stats = { - "active_subs": (active_subs_row or {}).get("cnt", 0), + "active_subs": await count_where("subscriptions WHERE status = 'active'"), "mrr_cents": (mrr_row or {}).get("total_cents", 0), - "active_boosts": (active_boosts_row or {}).get("cnt", 0), - "bp_exports": (bp_exports_row or {}).get("cnt", 0), + "active_boosts": await count_where("supplier_boosts WHERE status = 'active'"), + "bp_exports": await count_where("business_plan_exports WHERE status = 'completed'"), } products_rows = await fetch_all("SELECT * FROM paddle_products ORDER BY key") @@ -1342,23 +1285,18 @@ async def get_email_log( async def get_email_stats() -> dict: """Aggregate email stats for the list header.""" - total = await fetch_one("SELECT COUNT(*) as cnt FROM email_log") - delivered = await fetch_one("SELECT COUNT(*) as cnt FROM email_log WHERE last_event = 'delivered'") - bounced = await fetch_one("SELECT COUNT(*) as cnt FROM email_log WHERE last_event = 'bounced'") today = utcnow().date().isoformat() - sent_today = await fetch_one("SELECT COUNT(*) as cnt FROM email_log WHERE created_at >= ?", (today,)) return { - "total": total["cnt"] if total else 0, - "delivered": delivered["cnt"] if delivered else 0, - "bounced": bounced["cnt"] if bounced else 0, - "sent_today": sent_today["cnt"] if sent_today else 0, + "total": await count_where("email_log WHERE 1=1"), + "delivered": await count_where("email_log WHERE last_event = 'delivered'"), + "bounced": await count_where("email_log WHERE last_event = 'bounced'"), + "sent_today": await count_where("email_log WHERE created_at >= ?", (today,)), } async def get_unread_count() -> int: """Count unread inbound emails.""" - row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0") - return row["cnt"] if row else 0 + return await count_where("inbound_emails WHERE is_read = 0") @bp.route("/emails") @@ -1824,11 +1762,7 @@ async def template_detail(slug: str): columns = await get_table_columns(config["data_table"]) sample_rows = await fetch_template_data(config["data_table"], limit=10) - # Count generated articles - row = await fetch_one( - "SELECT COUNT(*) as cnt FROM articles WHERE template_slug = ?", (slug,), - ) - generated_count = row["cnt"] if row else 0 + generated_count = await count_where("articles WHERE template_slug = ?", (slug,)) return await render_template( "admin/template_detail.html", @@ -1959,8 +1893,8 @@ async def _query_scenarios(search: str, country: str, venue_type: str) -> tuple[ f"SELECT * FROM published_scenarios WHERE {where} ORDER BY created_at DESC LIMIT 500", tuple(params), ) - total_row = await fetch_one("SELECT COUNT(*) as cnt FROM published_scenarios") - return rows, (total_row["cnt"] if total_row else 0) + total = await count_where("published_scenarios WHERE 1=1") + return rows, total @bp.route("/scenarios") @@ -2927,11 +2861,9 @@ _CSV_IMPORT_LIMIT = 500 # guard against huge uploads async def get_follow_up_due_count() -> int: """Count pipeline suppliers with follow_up_at <= today.""" - row = await fetch_one( - """SELECT COUNT(*) as cnt FROM suppliers - WHERE outreach_status IS NOT NULL AND follow_up_at <= date('now')""" + return await count_where( + "suppliers WHERE outreach_status IS NOT NULL AND follow_up_at <= date('now')" ) - return row["cnt"] if row else 0 async def get_outreach_pipeline() -> dict: diff --git a/web/src/padelnomics/core.py b/web/src/padelnomics/core.py index 19f9438..ed5c905 100644 --- a/web/src/padelnomics/core.py +++ b/web/src/padelnomics/core.py @@ -192,6 +192,15 @@ async def fetch_all(sql: str, params: tuple = ()) -> list[dict]: return [dict(row) for row in rows] +async def count_where(table_where: str, params: tuple = ()) -> int: + """Count rows matching a condition. Compresses the fetch_one + null-check pattern. + + Usage: await count_where("users WHERE deleted_at IS NULL") + """ + row = await fetch_one(f"SELECT COUNT(*) AS n FROM {table_where}", params) + return row["n"] if row else 0 + + async def execute(sql: str, params: tuple = ()) -> int: """Execute SQL and return lastrowid.""" db = await get_db() From 3d7a72ba26fde6fa035b02d1a2b1f9cc4dbcfd1d Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:40:24 +0100 Subject: [PATCH 2/6] refactor: apply count_where() across remaining web blueprints Task 2/6 continued: Compress 18 more COUNT(*) call sites across suppliers, directory, dashboard, public, planner, pseo, and pipeline routes. -24 lines net. Co-Authored-By: Claude Opus 4.6 --- web/src/padelnomics/admin/pipeline_routes.py | 7 ++---- web/src/padelnomics/admin/pseo_routes.py | 8 +++--- web/src/padelnomics/dashboard/routes.py | 18 ++++++-------- web/src/padelnomics/directory/routes.py | 22 ++++++----------- web/src/padelnomics/planner/routes.py | 7 +++--- web/src/padelnomics/public/routes.py | 26 +++++++++----------- web/src/padelnomics/suppliers/routes.py | 24 ++++++++---------- 7 files changed, 44 insertions(+), 68 deletions(-) diff --git a/web/src/padelnomics/admin/pipeline_routes.py b/web/src/padelnomics/admin/pipeline_routes.py index 3bc926f..9e1f354 100644 --- a/web/src/padelnomics/admin/pipeline_routes.py +++ b/web/src/padelnomics/admin/pipeline_routes.py @@ -35,7 +35,7 @@ from pathlib import Path from quart import Blueprint, flash, redirect, render_template, request, url_for from ..auth.routes import role_required -from ..core import csrf_protect +from ..core import count_where, csrf_protect logger = logging.getLogger(__name__) @@ -298,11 +298,8 @@ async def _inject_sidebar_data(): """Load unread inbox count for the admin sidebar badge.""" from quart import g - from ..core import fetch_one - try: - row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0") - g.admin_unread_count = row["cnt"] if row else 0 + g.admin_unread_count = await count_where("inbound_emails WHERE is_read = 0") except Exception: g.admin_unread_count = 0 diff --git a/web/src/padelnomics/admin/pseo_routes.py b/web/src/padelnomics/admin/pseo_routes.py index 183684d..f35ec26 100644 --- a/web/src/padelnomics/admin/pseo_routes.py +++ b/web/src/padelnomics/admin/pseo_routes.py @@ -25,7 +25,7 @@ from ..content.health import ( get_template_freshness, get_template_stats, ) -from ..core import csrf_protect, fetch_all, fetch_one +from ..core import count_where, csrf_protect, fetch_all, fetch_one bp = Blueprint( "pseo", @@ -41,8 +41,7 @@ async def _inject_sidebar_data(): from quart import g try: - row = await fetch_one("SELECT COUNT(*) as cnt FROM inbound_emails WHERE is_read = 0") - g.admin_unread_count = row["cnt"] if row else 0 + g.admin_unread_count = await count_where("inbound_emails WHERE is_read = 0") except Exception: g.admin_unread_count = 0 @@ -80,8 +79,7 @@ async def pseo_dashboard(): total_published = sum(r["stats"]["published"] for r in template_rows) stale_count = sum(1 for f in freshness if f["status"] == "stale") - noindex_row = await fetch_one("SELECT COUNT(*) as cnt FROM articles WHERE noindex = 1") - noindex_count = noindex_row["cnt"] if noindex_row else 0 + noindex_count = await count_where("articles WHERE noindex = 1") # Recent generation jobs — enough for the dashboard summary. jobs = await fetch_all( diff --git a/web/src/padelnomics/dashboard/routes.py b/web/src/padelnomics/dashboard/routes.py index 6afb375..e446a8f 100644 --- a/web/src/padelnomics/dashboard/routes.py +++ b/web/src/padelnomics/dashboard/routes.py @@ -6,7 +6,7 @@ from pathlib import Path from quart import Blueprint, flash, g, redirect, render_template, request, url_for from ..auth.routes import login_required, update_user -from ..core import csrf_protect, fetch_one, soft_delete, utcnow_iso +from ..core import count_where, csrf_protect, fetch_one, soft_delete, utcnow_iso from ..i18n import get_translations bp = Blueprint( @@ -18,17 +18,13 @@ bp = Blueprint( async def get_user_stats(user_id: int) -> dict: - scenarios = await fetch_one( - "SELECT COUNT(*) as count FROM scenarios WHERE user_id = ? AND deleted_at IS NULL", - (user_id,), - ) - leads = await fetch_one( - "SELECT COUNT(*) as count FROM lead_requests WHERE user_id = ?", - (user_id,), - ) return { - "scenarios": scenarios["count"] if scenarios else 0, - "leads": leads["count"] if leads else 0, + "scenarios": await count_where( + "scenarios WHERE user_id = ? AND deleted_at IS NULL", (user_id,) + ), + "leads": await count_where( + "lead_requests WHERE user_id = ?", (user_id,) + ), } diff --git a/web/src/padelnomics/directory/routes.py b/web/src/padelnomics/directory/routes.py index a7f46cd..c1b450f 100644 --- a/web/src/padelnomics/directory/routes.py +++ b/web/src/padelnomics/directory/routes.py @@ -6,7 +6,7 @@ from pathlib import Path from quart import Blueprint, g, make_response, redirect, render_template, request, url_for -from ..core import csrf_protect, execute, fetch_all, fetch_one, utcnow_iso +from ..core import count_where, csrf_protect, execute, fetch_all, fetch_one, utcnow_iso from ..i18n import COUNTRY_LABELS, get_translations bp = Blueprint( @@ -79,11 +79,7 @@ async def _build_directory_query(q, country, category, region, page, per_page=24 where = " AND ".join(wheres) if wheres else "1=1" - count_row = await fetch_one( - f"SELECT COUNT(*) as cnt FROM suppliers s WHERE {where}", - tuple(params), - ) - total = count_row["cnt"] if count_row else 0 + total = await count_where(f"suppliers s WHERE {where}", tuple(params)) offset = (page - 1) * per_page # Tier-based ordering: sticky first, then pro > growth > free, then name @@ -159,16 +155,16 @@ async def index(): "SELECT category, COUNT(*) as cnt FROM suppliers GROUP BY category ORDER BY cnt DESC" ) - total_suppliers = await fetch_one("SELECT COUNT(*) as cnt FROM suppliers") - total_countries = await fetch_one("SELECT COUNT(DISTINCT country_code) as cnt FROM suppliers") + total_suppliers = await count_where("suppliers") + total_countries = await count_where("(SELECT DISTINCT country_code FROM suppliers)") return await render_template( "directory.html", **ctx, country_counts=country_counts, category_counts=category_counts, - total_suppliers=total_suppliers["cnt"] if total_suppliers else 0, - total_countries=total_countries["cnt"] if total_countries else 0, + total_suppliers=total_suppliers, + total_countries=total_countries, ) @@ -204,11 +200,9 @@ async def supplier_detail(slug: str): # Enquiry count (Basic+) enquiry_count = 0 if supplier.get("tier") in ("basic", "growth", "pro"): - row = await fetch_one( - "SELECT COUNT(*) as cnt FROM supplier_enquiries WHERE supplier_id = ?", - (supplier["id"],), + enquiry_count = await count_where( + "supplier_enquiries WHERE supplier_id = ?", (supplier["id"],) ) - enquiry_count = row["cnt"] if row else 0 lang = g.get("lang", "en") cat_labels, country_labels, region_labels = get_directory_labels(lang) diff --git a/web/src/padelnomics/planner/routes.py b/web/src/padelnomics/planner/routes.py index 50ed5ef..a236205 100644 --- a/web/src/padelnomics/planner/routes.py +++ b/web/src/padelnomics/planner/routes.py @@ -12,6 +12,7 @@ from quart import Blueprint, Response, g, jsonify, render_template, request from ..auth.routes import login_required from ..core import ( config, + count_where, csrf_protect, execute, feature_gate, @@ -50,11 +51,9 @@ COUNTRY_PRESETS = { async def count_scenarios(user_id: int) -> int: - row = await fetch_one( - "SELECT COUNT(*) as cnt FROM scenarios WHERE user_id = ? AND deleted_at IS NULL", - (user_id,), + return await count_where( + "scenarios WHERE user_id = ? AND deleted_at IS NULL", (user_id,) ) - return row["cnt"] if row else 0 async def get_default_scenario(user_id: int) -> dict | None: diff --git a/web/src/padelnomics/public/routes.py b/web/src/padelnomics/public/routes.py index 94b9eb4..43d945f 100644 --- a/web/src/padelnomics/public/routes.py +++ b/web/src/padelnomics/public/routes.py @@ -5,7 +5,7 @@ from pathlib import Path from quart import Blueprint, g, render_template, request, session -from ..core import check_rate_limit, csrf_protect, execute, fetch_all, fetch_one +from ..core import check_rate_limit, count_where, csrf_protect, execute, fetch_all, fetch_one from ..i18n import get_translations bp = Blueprint( @@ -17,13 +17,9 @@ bp = Blueprint( async def _supplier_counts(): """Fetch aggregate supplier stats for landing/marketing pages.""" - total = await fetch_one("SELECT COUNT(*) as cnt FROM suppliers") - countries = await fetch_one( - "SELECT COUNT(DISTINCT country_code) as cnt FROM suppliers" - ) return ( - total["cnt"] if total else 0, - countries["cnt"] if countries else 0, + await count_where("suppliers"), + await count_where("(SELECT DISTINCT country_code FROM suppliers)"), ) @@ -75,15 +71,15 @@ async def suppliers(): total_suppliers, total_countries = await _supplier_counts() # Live stats - calc_requests = await fetch_one("SELECT COUNT(*) as cnt FROM scenarios WHERE deleted_at IS NULL") + calc_requests = await count_where("scenarios WHERE deleted_at IS NULL") avg_budget = await fetch_one( "SELECT AVG(budget_estimate) as avg FROM lead_requests WHERE budget_estimate > 0 AND lead_type = 'quote'" ) - active_suppliers = await fetch_one( - "SELECT COUNT(*) as cnt FROM suppliers WHERE tier IN ('growth', 'pro') AND claimed_by IS NOT NULL" + active_suppliers = await count_where( + "suppliers WHERE tier IN ('growth', 'pro') AND claimed_by IS NOT NULL" ) - monthly_leads = await fetch_one( - "SELECT COUNT(*) as cnt FROM lead_requests WHERE lead_type = 'quote' AND created_at >= date('now', '-30 days')" + monthly_leads = await count_where( + "lead_requests WHERE lead_type = 'quote' AND created_at >= date('now', '-30 days')" ) # Lead feed preview — 3 recent verified hot/warm leads, anonymized @@ -100,10 +96,10 @@ async def suppliers(): "suppliers.html", total_suppliers=total_suppliers, total_countries=total_countries, - calc_requests=calc_requests["cnt"] if calc_requests else 0, + calc_requests=calc_requests, avg_budget=int(avg_budget["avg"]) if avg_budget and avg_budget["avg"] else 0, - active_suppliers=active_suppliers["cnt"] if active_suppliers else 0, - monthly_leads=monthly_leads["cnt"] if monthly_leads else 0, + active_suppliers=active_suppliers, + monthly_leads=monthly_leads, preview_leads=preview_leads, ) diff --git a/web/src/padelnomics/suppliers/routes.py b/web/src/padelnomics/suppliers/routes.py index 2d51faf..afbd394 100644 --- a/web/src/padelnomics/suppliers/routes.py +++ b/web/src/padelnomics/suppliers/routes.py @@ -11,6 +11,7 @@ from werkzeug.utils import secure_filename from ..core import ( capture_waitlist_email, config, + count_where, csrf_protect, execute, feature_gate, @@ -776,9 +777,8 @@ async def dashboard_overview(): supplier = g.supplier # Leads unlocked count - unlocked = await fetch_one( - "SELECT COUNT(*) as cnt FROM lead_forwards WHERE supplier_id = ?", - (supplier["id"],), + leads_unlocked = await count_where( + "lead_forwards WHERE supplier_id = ?", (supplier["id"],) ) # New leads matching supplier's area since last login @@ -787,22 +787,20 @@ async def dashboard_overview(): new_leads_count = 0 if service_area: placeholders = ",".join("?" * len(service_area)) - row = await fetch_one( - f"""SELECT COUNT(*) as cnt FROM lead_requests + new_leads_count = await count_where( + f"""lead_requests WHERE lead_type = 'quote' AND status = 'new' AND verified_at IS NOT NULL AND country IN ({placeholders}) AND NOT EXISTS (SELECT 1 FROM lead_forwards WHERE lead_id = lead_requests.id AND supplier_id = ?)""", (*service_area, supplier["id"]), ) - new_leads_count = row["cnt"] if row else 0 else: - row = await fetch_one( - """SELECT COUNT(*) as cnt FROM lead_requests + new_leads_count = await count_where( + """lead_requests WHERE lead_type = 'quote' AND status = 'new' AND verified_at IS NOT NULL AND NOT EXISTS (SELECT 1 FROM lead_forwards WHERE lead_id = lead_requests.id AND supplier_id = ?)""", (supplier["id"],), ) - new_leads_count = row["cnt"] if row else 0 # Recent activity (last 10 events from credit_ledger + lead_forwards) recent_activity = await fetch_all( @@ -825,16 +823,14 @@ async def dashboard_overview(): # Enquiry count for Basic tier enquiry_count = 0 if supplier.get("tier") == "basic": - eq_row = await fetch_one( - "SELECT COUNT(*) as cnt FROM supplier_enquiries WHERE supplier_id = ?", - (supplier["id"],), + enquiry_count = await count_where( + "supplier_enquiries WHERE supplier_id = ?", (supplier["id"],) ) - enquiry_count = eq_row["cnt"] if eq_row else 0 return await render_template( "suppliers/partials/dashboard_overview.html", supplier=supplier, - leads_unlocked=unlocked["cnt"] if unlocked else 0, + leads_unlocked=leads_unlocked, new_leads_count=new_leads_count, recent_activity=recent_activity, active_boosts=active_boosts, From e87a7fc9d67ae838b96ead5e22aab22e169368b2 Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:43:50 +0100 Subject: [PATCH 3/6] refactor(admin): extract _forward_lead() from duplicate lead forward routes Task 3/6: lead_forward and lead_forward_htmx shared ~20 lines of identical DB logic. Extracted into _forward_lead() that returns an error string or None. Both routes now call the helper and differ only in response format (redirect vs HTMX partial). Co-Authored-By: Claude Opus 4.6 --- web/src/padelnomics/admin/routes.py | 66 +++++++++++------------------ 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/web/src/padelnomics/admin/routes.py b/web/src/padelnomics/admin/routes.py index c55d3c3..32c8974 100644 --- a/web/src/padelnomics/admin/routes.py +++ b/web/src/padelnomics/admin/routes.py @@ -632,26 +632,14 @@ async def lead_new(): return await render_template("admin/lead_form.html", data={}, statuses=LEAD_STATUSES) -@bp.route("/leads//forward", methods=["POST"]) -@role_required("admin") -@csrf_protect -async def lead_forward(lead_id: int): - """Manually forward a lead to a supplier (no credit cost).""" - form = await request.form - supplier_id = int(form.get("supplier_id", 0)) - - if not supplier_id: - await flash("Select a supplier.", "error") - return redirect(url_for("admin.lead_detail", lead_id=lead_id)) - - # Check if already forwarded +async def _forward_lead(lead_id: int, supplier_id: int) -> str | None: + """Forward a lead to a supplier. Returns error message or None on success.""" existing = await fetch_one( "SELECT 1 FROM lead_forwards WHERE lead_id = ? AND supplier_id = ?", (lead_id, supplier_id), ) if existing: - await flash("Already forwarded to this supplier.", "warning") - return redirect(url_for("admin.lead_detail", lead_id=lead_id)) + return "Already forwarded to this supplier." now = utcnow_iso() await execute( @@ -663,15 +651,27 @@ async def lead_forward(lead_id: int): "UPDATE lead_requests SET unlock_count = unlock_count + 1, status = 'forwarded' WHERE id = ?", (lead_id,), ) - - # Enqueue forward email from ..worker import enqueue - await enqueue("send_lead_forward_email", { - "lead_id": lead_id, - "supplier_id": supplier_id, - }) + await enqueue("send_lead_forward_email", {"lead_id": lead_id, "supplier_id": supplier_id}) + return None - await flash("Lead forwarded to supplier.", "success") + +@bp.route("/leads//forward", methods=["POST"]) +@role_required("admin") +@csrf_protect +async def lead_forward(lead_id: int): + """Manually forward a lead to a supplier (no credit cost).""" + form = await request.form + supplier_id = int(form.get("supplier_id", 0)) + if not supplier_id: + await flash("Select a supplier.", "error") + return redirect(url_for("admin.lead_detail", lead_id=lead_id)) + + error = await _forward_lead(lead_id, supplier_id) + if error: + await flash(error, "warning") + else: + await flash("Lead forwarded to supplier.", "success") return redirect(url_for("admin.lead_detail", lead_id=lead_id)) @@ -704,25 +704,9 @@ async def lead_forward_htmx(lead_id: int): return Response("Select a supplier.", status=422) supplier_id = int(supplier_id_str) - existing = await fetch_one( - "SELECT 1 FROM lead_forwards WHERE lead_id = ? AND supplier_id = ?", - (lead_id, supplier_id), - ) - if existing: - return Response("Already forwarded to this supplier.", status=422) - - now = utcnow_iso() - await execute( - """INSERT INTO lead_forwards (lead_id, supplier_id, credit_cost, status, created_at) - VALUES (?, ?, 0, 'sent', ?)""", - (lead_id, supplier_id, now), - ) - await execute( - "UPDATE lead_requests SET unlock_count = unlock_count + 1, status = 'forwarded' WHERE id = ?", - (lead_id,), - ) - from ..worker import enqueue - await enqueue("send_lead_forward_email", {"lead_id": lead_id, "supplier_id": supplier_id}) + error = await _forward_lead(lead_id, supplier_id) + if error: + return Response(error, status=422) lead = await get_lead_detail(lead_id) return await render_template( From 6774254cb064feae1b58c7b9db4e236465b15f48 Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:45:52 +0100 Subject: [PATCH 4/6] feat(sqlmesh): add country code macros, apply across models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 4/6: Add 5 macros to compress repeated country code patterns: - @country_name / @country_slug: 20-country CASE in dim_cities, dim_locations - @normalize_eurostat_country / @normalize_eurostat_nuts: EL→GR, UK→GB - @infer_country_from_coords: bounding box for 8 markets Net: +91 lines in macros, -135 lines in models = -44 lines total. Co-Authored-By: Claude Opus 4.6 --- .../sqlmesh_padelnomics/macros/__init__.py | 91 ++++++++++++++++++- .../models/foundation/dim_cities.sql | 50 +--------- .../models/foundation/dim_locations.sql | 50 +--------- .../models/staging/stg_income.sql | 6 +- .../models/staging/stg_nuts2_boundaries.sql | 6 +- .../models/staging/stg_padel_courts.sql | 13 +-- .../models/staging/stg_regional_income.sql | 6 +- .../models/staging/stg_tennis_courts.sql | 13 +-- 8 files changed, 100 insertions(+), 135 deletions(-) diff --git a/transform/sqlmesh_padelnomics/macros/__init__.py b/transform/sqlmesh_padelnomics/macros/__init__.py index b4b675c..860bef5 100644 --- a/transform/sqlmesh_padelnomics/macros/__init__.py +++ b/transform/sqlmesh_padelnomics/macros/__init__.py @@ -16,5 +16,92 @@ def padelnomics_glob(evaluator) -> str: return f"'{landing_dir}/padelnomics/**/*.csv.gz'" -# Add one macro per landing zone subdirectory you create. -# Pattern: def {source}_glob(evaluator) → f"'{landing_dir}/{source}/**/*.csv.gz'" +# ── Country code helpers ───────────────────────────────────────────────────── +# Shared lookup used by dim_cities and dim_locations. + +_COUNTRY_NAMES = { + "DE": "Germany", "ES": "Spain", "GB": "United Kingdom", + "FR": "France", "IT": "Italy", "PT": "Portugal", + "AT": "Austria", "CH": "Switzerland", "NL": "Netherlands", + "BE": "Belgium", "SE": "Sweden", "NO": "Norway", + "DK": "Denmark", "FI": "Finland", "US": "United States", + "AR": "Argentina", "MX": "Mexico", "AE": "UAE", + "AU": "Australia", "IE": "Ireland", +} + + +def _country_case(col: str) -> str: + """Build a CASE expression mapping ISO 3166-1 alpha-2 → English name.""" + whens = "\n ".join( + f"WHEN '{code}' THEN '{name}'" for code, name in _COUNTRY_NAMES.items() + ) + return f"CASE {col}\n {whens}\n ELSE {col}\n END" + + +@macro() +def country_name(evaluator, code_col) -> str: + """CASE expression: country code → English name. + + Usage in SQL: @country_name(vc.country_code) AS country_name_en + """ + return _country_case(str(code_col)) + + +@macro() +def country_slug(evaluator, code_col) -> str: + """CASE expression: country code → URL-safe slug (lowercased, spaces → dashes). + + Usage in SQL: @country_slug(vc.country_code) AS country_slug + """ + return f"LOWER(REGEXP_REPLACE({_country_case(str(code_col))}, '[^a-zA-Z0-9]+', '-'))" + + +@macro() +def normalize_eurostat_country(evaluator, code_col) -> str: + """Normalize Eurostat country codes to ISO 3166-1 alpha-2: EL→GR, UK→GB. + + Usage in SQL: @normalize_eurostat_country(geo_code) AS country_code + """ + col = str(code_col) + return f"CASE {col} WHEN 'EL' THEN 'GR' WHEN 'UK' THEN 'GB' ELSE {col} END" + + +@macro() +def normalize_eurostat_nuts(evaluator, code_col) -> str: + """Normalize NUTS code prefix: EL→GR, UK→GB, preserving the suffix. + + Usage in SQL: @normalize_eurostat_nuts(geo_code) AS nuts_code + """ + col = str(code_col) + return ( + f"CASE" + f" WHEN {col} LIKE 'EL%' THEN 'GR' || SUBSTR({col}, 3)" + f" WHEN {col} LIKE 'UK%' THEN 'GB' || SUBSTR({col}, 3)" + f" ELSE {col}" + f" END" + ) + + +@macro() +def infer_country_from_coords(evaluator, lat_col, lon_col) -> str: + """Infer ISO country code from lat/lon using bounding boxes for 8 European markets. + + Usage in SQL: + COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), + @infer_country_from_coords(lat, lon)) AS country_code + """ + lat = str(lat_col) + lon = str(lon_col) + return ( + f"CASE" + f" WHEN {lat} BETWEEN 47.27 AND 55.06 AND {lon} BETWEEN 5.87 AND 15.04 THEN 'DE'" + f" WHEN {lat} BETWEEN 35.95 AND 43.79 AND {lon} BETWEEN -9.39 AND 4.33 THEN 'ES'" + f" WHEN {lat} BETWEEN 49.90 AND 60.85 AND {lon} BETWEEN -8.62 AND 1.77 THEN 'GB'" + f" WHEN {lat} BETWEEN 41.36 AND 51.09 AND {lon} BETWEEN -5.14 AND 9.56 THEN 'FR'" + f" WHEN {lat} BETWEEN 45.46 AND 47.80 AND {lon} BETWEEN 5.96 AND 10.49 THEN 'CH'" + f" WHEN {lat} BETWEEN 46.37 AND 49.02 AND {lon} BETWEEN 9.53 AND 17.16 THEN 'AT'" + f" WHEN {lat} BETWEEN 36.35 AND 47.09 AND {lon} BETWEEN 6.62 AND 18.51 THEN 'IT'" + f" WHEN {lat} BETWEEN 37.00 AND 42.15 AND {lon} BETWEEN -9.50 AND -6.19 THEN 'PT'" + f" ELSE NULL" + f" END" + ) diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql index b1b1067..ba9a51a 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql @@ -110,55 +110,9 @@ SELECT vc.city_slug, vc.city_name, -- Human-readable country name for pSEO templates and internal linking - CASE vc.country_code - WHEN 'DE' THEN 'Germany' - WHEN 'ES' THEN 'Spain' - WHEN 'GB' THEN 'United Kingdom' - WHEN 'FR' THEN 'France' - WHEN 'IT' THEN 'Italy' - WHEN 'PT' THEN 'Portugal' - WHEN 'AT' THEN 'Austria' - WHEN 'CH' THEN 'Switzerland' - WHEN 'NL' THEN 'Netherlands' - WHEN 'BE' THEN 'Belgium' - WHEN 'SE' THEN 'Sweden' - WHEN 'NO' THEN 'Norway' - WHEN 'DK' THEN 'Denmark' - WHEN 'FI' THEN 'Finland' - WHEN 'US' THEN 'United States' - WHEN 'AR' THEN 'Argentina' - WHEN 'MX' THEN 'Mexico' - WHEN 'AE' THEN 'UAE' - WHEN 'AU' THEN 'Australia' - WHEN 'IE' THEN 'Ireland' - ELSE vc.country_code - END AS country_name_en, + @country_name(vc.country_code) AS country_name_en, -- URL-safe country slug - LOWER(REGEXP_REPLACE( - CASE vc.country_code - WHEN 'DE' THEN 'Germany' - WHEN 'ES' THEN 'Spain' - WHEN 'GB' THEN 'United Kingdom' - WHEN 'FR' THEN 'France' - WHEN 'IT' THEN 'Italy' - WHEN 'PT' THEN 'Portugal' - WHEN 'AT' THEN 'Austria' - WHEN 'CH' THEN 'Switzerland' - WHEN 'NL' THEN 'Netherlands' - WHEN 'BE' THEN 'Belgium' - WHEN 'SE' THEN 'Sweden' - WHEN 'NO' THEN 'Norway' - WHEN 'DK' THEN 'Denmark' - WHEN 'FI' THEN 'Finland' - WHEN 'US' THEN 'United States' - WHEN 'AR' THEN 'Argentina' - WHEN 'MX' THEN 'Mexico' - WHEN 'AE' THEN 'UAE' - WHEN 'AU' THEN 'Australia' - WHEN 'IE' THEN 'Ireland' - ELSE vc.country_code - END, '[^a-zA-Z0-9]+', '-' - )) AS country_slug, + @country_slug(vc.country_code) AS country_slug, vc.centroid_lat AS lat, vc.centroid_lon AS lon, -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0. diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql index 2a77577..f86673a 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql @@ -215,55 +215,9 @@ SELECT l.geoname_id, l.country_code, -- Human-readable country name (consistent with dim_cities) - CASE l.country_code - WHEN 'DE' THEN 'Germany' - WHEN 'ES' THEN 'Spain' - WHEN 'GB' THEN 'United Kingdom' - WHEN 'FR' THEN 'France' - WHEN 'IT' THEN 'Italy' - WHEN 'PT' THEN 'Portugal' - WHEN 'AT' THEN 'Austria' - WHEN 'CH' THEN 'Switzerland' - WHEN 'NL' THEN 'Netherlands' - WHEN 'BE' THEN 'Belgium' - WHEN 'SE' THEN 'Sweden' - WHEN 'NO' THEN 'Norway' - WHEN 'DK' THEN 'Denmark' - WHEN 'FI' THEN 'Finland' - WHEN 'US' THEN 'United States' - WHEN 'AR' THEN 'Argentina' - WHEN 'MX' THEN 'Mexico' - WHEN 'AE' THEN 'UAE' - WHEN 'AU' THEN 'Australia' - WHEN 'IE' THEN 'Ireland' - ELSE l.country_code - END AS country_name_en, + @country_name(l.country_code) AS country_name_en, -- URL-safe country slug - LOWER(REGEXP_REPLACE( - CASE l.country_code - WHEN 'DE' THEN 'Germany' - WHEN 'ES' THEN 'Spain' - WHEN 'GB' THEN 'United Kingdom' - WHEN 'FR' THEN 'France' - WHEN 'IT' THEN 'Italy' - WHEN 'PT' THEN 'Portugal' - WHEN 'AT' THEN 'Austria' - WHEN 'CH' THEN 'Switzerland' - WHEN 'NL' THEN 'Netherlands' - WHEN 'BE' THEN 'Belgium' - WHEN 'SE' THEN 'Sweden' - WHEN 'NO' THEN 'Norway' - WHEN 'DK' THEN 'Denmark' - WHEN 'FI' THEN 'Finland' - WHEN 'US' THEN 'United States' - WHEN 'AR' THEN 'Argentina' - WHEN 'MX' THEN 'Mexico' - WHEN 'AE' THEN 'UAE' - WHEN 'AU' THEN 'Australia' - WHEN 'IE' THEN 'Ireland' - ELSE l.country_code - END, '[^a-zA-Z0-9]+', '-' - )) AS country_slug, + @country_slug(l.country_code) AS country_slug, l.location_name, l.location_slug, l.lat, diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_income.sql b/transform/sqlmesh_padelnomics/models/staging/stg_income.sql index 5e660c4..f5a9df6 100644 --- a/transform/sqlmesh_padelnomics/models/staging/stg_income.sql +++ b/transform/sqlmesh_padelnomics/models/staging/stg_income.sql @@ -30,11 +30,7 @@ parsed AS ( ) SELECT -- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB - CASE geo_code - WHEN 'EL' THEN 'GR' - WHEN 'UK' THEN 'GB' - ELSE geo_code - END AS country_code, + @normalize_eurostat_country(geo_code) AS country_code, ref_year, median_income_pps, extracted_date diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_nuts2_boundaries.sql b/transform/sqlmesh_padelnomics/models/staging/stg_nuts2_boundaries.sql index 32481d5..b4bb789 100644 --- a/transform/sqlmesh_padelnomics/models/staging/stg_nuts2_boundaries.sql +++ b/transform/sqlmesh_padelnomics/models/staging/stg_nuts2_boundaries.sql @@ -28,11 +28,7 @@ WITH raw AS ( SELECT NUTS_ID AS nuts2_code, -- Normalise country prefix to ISO 3166-1 alpha-2: EL→GR, UK→GB - CASE CNTR_CODE - WHEN 'EL' THEN 'GR' - WHEN 'UK' THEN 'GB' - ELSE CNTR_CODE - END AS country_code, + @normalize_eurostat_country(CNTR_CODE) AS country_code, NAME_LATN AS region_name, geom AS geometry, -- Pre-compute bounding box for efficient spatial pre-filter in dim_locations. diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_padel_courts.sql b/transform/sqlmesh_padelnomics/models/staging/stg_padel_courts.sql index 5a21831..d287cb8 100644 --- a/transform/sqlmesh_padelnomics/models/staging/stg_padel_courts.sql +++ b/transform/sqlmesh_padelnomics/models/staging/stg_padel_courts.sql @@ -48,17 +48,8 @@ deduped AS ( with_country AS ( SELECT osm_id, lat, lon, - COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE - WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE' - WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES' - WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB' - WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR' - WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH' - WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT' - WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT' - WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT' - ELSE NULL - END) AS country_code, + COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), + @infer_country_from_coords(lat, lon)) AS country_code, NULLIF(TRIM(name), '') AS name, NULLIF(TRIM(city_tag), '') AS city, postcode, operator_name, opening_hours, fee, extracted_date diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_regional_income.sql b/transform/sqlmesh_padelnomics/models/staging/stg_regional_income.sql index e5f7db5..592f958 100644 --- a/transform/sqlmesh_padelnomics/models/staging/stg_regional_income.sql +++ b/transform/sqlmesh_padelnomics/models/staging/stg_regional_income.sql @@ -30,11 +30,7 @@ parsed AS ( ) SELECT -- Normalise to ISO 3166-1 alpha-2 prefix: EL→GR, UK→GB - CASE - WHEN geo_code LIKE 'EL%' THEN 'GR' || SUBSTR(geo_code, 3) - WHEN geo_code LIKE 'UK%' THEN 'GB' || SUBSTR(geo_code, 3) - ELSE geo_code - END AS nuts_code, + @normalize_eurostat_nuts(geo_code) AS nuts_code, -- NUTS level: 3-char = NUTS-1, 4-char = NUTS-2 LENGTH(geo_code) - 2 AS nuts_level, ref_year, diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_tennis_courts.sql b/transform/sqlmesh_padelnomics/models/staging/stg_tennis_courts.sql index 7d75851..342e410 100644 --- a/transform/sqlmesh_padelnomics/models/staging/stg_tennis_courts.sql +++ b/transform/sqlmesh_padelnomics/models/staging/stg_tennis_courts.sql @@ -54,17 +54,8 @@ deduped AS ( with_country AS ( SELECT osm_id, lat, lon, - COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE - WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE' - WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES' - WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB' - WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR' - WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH' - WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT' - WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT' - WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT' - ELSE NULL - END) AS country_code, + COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), + @infer_country_from_coords(lat, lon)) AS country_code, NULLIF(TRIM(name), '') AS name, NULLIF(TRIM(city_tag), '') AS city, extracted_date From 567798ebe1914ff883cdb9031e4a0c3fecb1602a Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:49:18 +0100 Subject: [PATCH 5/6] feat(extract): add skip_if_current() and write_jsonl_atomic() helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 5/6: Compress repeated patterns in extractors: - skip_if_current(): cursor check + early-return dict (3 extractors) - write_jsonl_atomic(): working-file → JSONL → compress (2 extractors) Applied in gisco, geonames, census_usa, playtomic_tenants. Co-Authored-By: Claude Opus 4.6 --- .../src/padelnomics_extract/census_usa.py | 8 +++--- .../src/padelnomics_extract/geonames.py | 15 ++++------- .../src/padelnomics_extract/gisco.py | 8 +++--- .../padelnomics_extract/playtomic_tenants.py | 9 ++----- .../src/padelnomics_extract/utils.py | 27 +++++++++++++++++++ 5 files changed, 42 insertions(+), 25 deletions(-) diff --git a/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py b/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py index 91e35a8..3a5f49d 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py @@ -19,7 +19,7 @@ from pathlib import Path import niquests from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging -from .utils import get_last_cursor, landing_path, write_gzip_atomic +from .utils import landing_path, skip_if_current, write_gzip_atomic logger = setup_logging("padelnomics.extract.census_usa") @@ -73,10 +73,10 @@ def extract( return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} # Skip if we already have data for this month (annual data, monthly cursor) - last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) - if last_cursor == year_month: + skip = skip_if_current(conn, EXTRACTOR_NAME, year_month) + if skip: logger.info("already have data for %s — skipping", year_month) - return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + return skip year, month = year_month.split("/") url = f"{ACS_URL}&key={api_key}" diff --git a/extract/padelnomics_extract/src/padelnomics_extract/geonames.py b/extract/padelnomics_extract/src/padelnomics_extract/geonames.py index 0e83498..600af86 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/geonames.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/geonames.py @@ -19,7 +19,6 @@ Output: one JSON object per line, e.g.: import gzip import io -import json import os import sqlite3 import zipfile @@ -28,7 +27,7 @@ from pathlib import Path import niquests from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging -from .utils import compress_jsonl_atomic, get_last_cursor, landing_path +from .utils import landing_path, skip_if_current, write_jsonl_atomic logger = setup_logging("padelnomics.extract.geonames") @@ -139,10 +138,10 @@ def extract( tmp.rename(dest) return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} - last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) - if last_cursor == year_month: + skip = skip_if_current(conn, EXTRACTOR_NAME, year_month) + if skip: logger.info("already have data for %s — skipping", year_month) - return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + return skip year, month = year_month.split("/") @@ -168,11 +167,7 @@ def extract( dest_dir = landing_path(landing_dir, "geonames", year, month) dest = dest_dir / "cities_global.jsonl.gz" - working_path = dest.with_suffix(".working.jsonl") - with open(working_path, "w") as f: - for row in rows: - f.write(json.dumps(row, separators=(",", ":")) + "\n") - bytes_written = compress_jsonl_atomic(working_path, dest) + bytes_written = write_jsonl_atomic(dest, rows) logger.info("written %s bytes compressed", f"{bytes_written:,}") return { diff --git a/extract/padelnomics_extract/src/padelnomics_extract/gisco.py b/extract/padelnomics_extract/src/padelnomics_extract/gisco.py index 8fd43fb..140d3a8 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/gisco.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/gisco.py @@ -17,7 +17,7 @@ from pathlib import Path import niquests from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging -from .utils import get_last_cursor +from .utils import skip_if_current logger = setup_logging("padelnomics.extract.gisco") @@ -45,10 +45,10 @@ def extract( session: niquests.Session, ) -> dict: """Download NUTS-2 GeoJSON. Skips if already run this month or file exists.""" - last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) - if last_cursor == year_month: + skip = skip_if_current(conn, EXTRACTOR_NAME, year_month) + if skip: logger.info("already ran for %s — skipping", year_month) - return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + return skip dest = landing_dir / DEST_REL if dest.exists(): diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py index 277bdec..23af26a 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py @@ -21,7 +21,6 @@ Rate: 1 req / 2 s per IP (see docs/data-sources-inventory.md §1.2). Landing: {LANDING_DIR}/playtomic/{year}/{month}/tenants.jsonl.gz """ -import json import os import sqlite3 import time @@ -33,7 +32,7 @@ import niquests from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging, ua_for_proxy from .proxy import load_proxy_tiers, make_tiered_cycler -from .utils import compress_jsonl_atomic, landing_path +from .utils import landing_path, write_jsonl_atomic logger = setup_logging("padelnomics.extract.playtomic_tenants") @@ -215,11 +214,7 @@ def extract( time.sleep(THROTTLE_SECONDS) # Write each tenant as a JSONL line, then compress atomically - working_path = dest.with_suffix(".working.jsonl") - with open(working_path, "w") as f: - for tenant in all_tenants: - f.write(json.dumps(tenant, separators=(",", ":")) + "\n") - bytes_written = compress_jsonl_atomic(working_path, dest) + bytes_written = write_jsonl_atomic(dest, all_tenants) logger.info("%d unique venues -> %s", len(all_tenants), dest) return { diff --git a/extract/padelnomics_extract/src/padelnomics_extract/utils.py b/extract/padelnomics_extract/src/padelnomics_extract/utils.py index 451c365..25ce4b4 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/utils.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/utils.py @@ -101,6 +101,19 @@ def get_last_cursor(conn: sqlite3.Connection, extractor: str) -> str | None: return row["cursor_value"] if row else None +_SKIP_RESULT = {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + +def skip_if_current(conn: sqlite3.Connection, extractor: str, year_month: str) -> dict | None: + """Return an early-exit result dict if this extractor already ran for year_month. + + Returns None when the extractor should proceed with extraction. + """ + if get_last_cursor(conn, extractor) == year_month: + return _SKIP_RESULT + return None + + # --------------------------------------------------------------------------- # File I/O helpers # --------------------------------------------------------------------------- @@ -176,6 +189,20 @@ def write_gzip_atomic(path: Path, data: bytes) -> int: return len(compressed) +def write_jsonl_atomic(dest: Path, items: list[dict]) -> int: + """Write items as JSONL, then compress atomically to dest (.jsonl.gz). + + Compresses the working-file → JSONL → gzip pattern into one call. + Returns compressed bytes written. + """ + assert items, "items must not be empty" + working_path = dest.with_suffix(".working.jsonl") + with open(working_path, "w") as f: + for item in items: + f.write(json.dumps(item, separators=(",", ":")) + "\n") + return compress_jsonl_atomic(working_path, dest) + + def compress_jsonl_atomic(jsonl_path: Path, dest_path: Path) -> int: """Compress a JSONL working file to .jsonl.gz atomically, then delete the source. From f93e4fd0d1a5105e515d83426b7bcb49c08bd5ea Mon Sep 17 00:00:00 2001 From: Deeman Date: Mon, 2 Mar 2026 07:54:44 +0100 Subject: [PATCH 6/6] chore(changelog): document semantic compression pass Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf9a303..6602797 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Changed +- **Semantic compression pass** — applied Casey Muratori's compression workflow (write concrete → observe patterns → compress genuine repetitions) across all three packages. Net result: ~200 lines removed, codebase simpler. + - **`count_where()` helper** (`web/core.py`): compresses the `fetch_one("SELECT COUNT(*) ...") + null-check` pattern. Applied across 30+ call sites in admin, suppliers, directory, dashboard, public, and planner routes. Dashboard stats function shrinks from 75 to 25 lines. + - **`_forward_lead()` helper** (`web/admin/routes.py`): extracts shared DB logic from `lead_forward` and `lead_forward_htmx` — both routes now call the helper and differ only in response format. + - **SQLMesh macros** (`transform/macros/__init__.py`): 5 new macros compress repeated country code patterns across 7 SQL models: `@country_name`, `@country_slug`, `@normalize_eurostat_country`, `@normalize_eurostat_nuts`, `@infer_country_from_coords`. + - **Extract helpers** (`extract/utils.py`): `skip_if_current()` compresses cursor-check + early-return pattern (3 extractors); `write_jsonl_atomic()` compresses working-file → JSONL → compress pattern (2 extractors). +- **Coding philosophy updated** (`~/.claude/coding_philosophy.md`): added `` section documenting the workflow, the test ("Did this abstraction make the total codebase smaller?"), and distinction from premature DRY. + ### Fixed - **Admin: empty confirm dialog on auto-poll** — `htmx:confirm` handler now guards with `if (!evt.detail.question) return` so auto-poll requests (`hx-trigger="every 5s"`, no `hx-confirm` attribute) no longer trigger an empty dialog every 5 seconds.