diff --git a/web/src/padelnomics/content/__init__.py b/web/src/padelnomics/content/__init__.py index b9be927..c7bfad2 100644 --- a/web/src/padelnomics/content/__init__.py +++ b/web/src/padelnomics/content/__init__.py @@ -275,6 +275,7 @@ async def generate_articles( *, limit: int = 500, base_url: str = "https://padelnomics.io", + task_id: int | None = None, ) -> int: """ Generate articles from a git template + DuckDB data. @@ -288,8 +289,14 @@ async def generate_articles( - write HTML to disk - upsert article row in SQLite - Returns count of articles generated. + If task_id is given, writes progress_current / progress_total / error_log + to the tasks table every _PROGRESS_BATCH articles so the pSEO dashboard + can show a live progress bar. Per-article errors are logged and collected + rather than aborting the run — the full task still completes. + + Returns count of articles generated (excluding per-article errors). """ + from ..core import execute as db_execute from ..planner.calculator import DEFAULTS, calc, validate_state from .routes import bake_scenario_cards, is_reserved_path @@ -321,6 +328,15 @@ async def generate_articles( t_calc = t_render = t_bake = 0.0 _BATCH_SIZE = 200 + _PROGRESS_BATCH = 50 # write task progress every N articles (avoid write amplification) + + # Write progress_total before the loop so the dashboard can show 0/N immediately. + if task_id is not None: + total = len(rows) * len(config["languages"]) + await db_execute( + "UPDATE tasks SET progress_total = ? WHERE id = ?", + (total, task_id), + ) async with transaction() as db: for row in rows: @@ -505,12 +521,27 @@ async def generate_articles( elif generated % 25 == 0: logger.info("%s: %d articles written…", slug, generated) + # Write progress every _PROGRESS_BATCH articles so the pSEO + # dashboard live-updates without excessive write amplification. + if task_id is not None and generated % _PROGRESS_BATCH == 0: + await db_execute( + "UPDATE tasks SET progress_current = ? WHERE id = ?", + (generated, task_id), + ) + # Stagger dates published_today += 1 if published_today >= articles_per_day: published_today = 0 publish_date += timedelta(days=1) + # Write final progress so the dashboard shows 100% on completion. + if task_id is not None: + await db_execute( + "UPDATE tasks SET progress_current = ? WHERE id = ?", + (generated, task_id), + ) + logger.info( "%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs", slug, generated, t_calc, t_render, t_bake, diff --git a/web/src/padelnomics/migrations/versions/0021_tasks_progress_tracking.py b/web/src/padelnomics/migrations/versions/0021_tasks_progress_tracking.py new file mode 100644 index 0000000..e9da322 --- /dev/null +++ b/web/src/padelnomics/migrations/versions/0021_tasks_progress_tracking.py @@ -0,0 +1,18 @@ +"""Add progress tracking columns to the tasks table. + +Enables the pSEO Engine dashboard to show live progress during article +generation jobs: a progress bar (current/total) and an error log for +per-article failures without aborting the whole run. +""" + + +def up(conn) -> None: + conn.execute( + "ALTER TABLE tasks ADD COLUMN progress_current INTEGER NOT NULL DEFAULT 0" + ) + conn.execute( + "ALTER TABLE tasks ADD COLUMN progress_total INTEGER NOT NULL DEFAULT 0" + ) + conn.execute( + "ALTER TABLE tasks ADD COLUMN error_log TEXT NOT NULL DEFAULT '[]'" + ) diff --git a/web/src/padelnomics/worker.py b/web/src/padelnomics/worker.py index 4c9762b..6a23b62 100644 --- a/web/src/padelnomics/worker.py +++ b/web/src/padelnomics/worker.py @@ -4,11 +4,10 @@ Background task worker - SQLite-based queue (no Redis needed). import asyncio import json +import logging import traceback from datetime import datetime, timedelta -import logging - from .core import ( EMAIL_ADDRESSES, config, @@ -730,8 +729,11 @@ async def handle_generate_articles(payload: dict) -> None: start_date = date_cls.fromisoformat(payload["start_date"]) articles_per_day = payload.get("articles_per_day", 3) limit = payload.get("limit", 500) + task_id = payload.get("_task_id") - count = await generate_articles(slug, start_date, articles_per_day, limit=limit) + count = await generate_articles( + slug, start_date, articles_per_day, limit=limit, task_id=task_id + ) logger.info("Generated %s articles for template '%s'", count, slug) @@ -753,6 +755,9 @@ async def process_task(task: dict) -> None: try: payload = json.loads(task["payload"]) if task["payload"] else {} + # Inject task_id so progress-aware handlers (e.g. generate_articles) can + # write progress_current to the tasks table without a separate lookup. + payload["_task_id"] = task_id await handler(payload) await mark_complete(task_id) logger.info("Completed: %s (id=%s)", task_name, task_id)