feat(pseo): add generation progress tracking to tasks table
- Migration 0021: add progress_current, progress_total columns to tasks - generate_articles(): accept task_id param, write progress every 50 articles and once at completion via db_execute() - worker.py handle_generate_articles: inject _task_id from process_task(), pass to generate_articles() so the pSEO dashboard can poll live progress Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -275,6 +275,7 @@ async def generate_articles(
|
||||
*,
|
||||
limit: int = 500,
|
||||
base_url: str = "https://padelnomics.io",
|
||||
task_id: int | None = None,
|
||||
) -> int:
|
||||
"""
|
||||
Generate articles from a git template + DuckDB data.
|
||||
@@ -288,8 +289,14 @@ async def generate_articles(
|
||||
- write HTML to disk
|
||||
- upsert article row in SQLite
|
||||
|
||||
Returns count of articles generated.
|
||||
If task_id is given, writes progress_current / progress_total / error_log
|
||||
to the tasks table every _PROGRESS_BATCH articles so the pSEO dashboard
|
||||
can show a live progress bar. Per-article errors are logged and collected
|
||||
rather than aborting the run — the full task still completes.
|
||||
|
||||
Returns count of articles generated (excluding per-article errors).
|
||||
"""
|
||||
from ..core import execute as db_execute
|
||||
from ..planner.calculator import DEFAULTS, calc, validate_state
|
||||
from .routes import bake_scenario_cards, is_reserved_path
|
||||
|
||||
@@ -321,6 +328,15 @@ async def generate_articles(
|
||||
t_calc = t_render = t_bake = 0.0
|
||||
|
||||
_BATCH_SIZE = 200
|
||||
_PROGRESS_BATCH = 50 # write task progress every N articles (avoid write amplification)
|
||||
|
||||
# Write progress_total before the loop so the dashboard can show 0/N immediately.
|
||||
if task_id is not None:
|
||||
total = len(rows) * len(config["languages"])
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_total = ? WHERE id = ?",
|
||||
(total, task_id),
|
||||
)
|
||||
|
||||
async with transaction() as db:
|
||||
for row in rows:
|
||||
@@ -505,12 +521,27 @@ async def generate_articles(
|
||||
elif generated % 25 == 0:
|
||||
logger.info("%s: %d articles written…", slug, generated)
|
||||
|
||||
# Write progress every _PROGRESS_BATCH articles so the pSEO
|
||||
# dashboard live-updates without excessive write amplification.
|
||||
if task_id is not None and generated % _PROGRESS_BATCH == 0:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
# Stagger dates
|
||||
published_today += 1
|
||||
if published_today >= articles_per_day:
|
||||
published_today = 0
|
||||
publish_date += timedelta(days=1)
|
||||
|
||||
# Write final progress so the dashboard shows 100% on completion.
|
||||
if task_id is not None:
|
||||
await db_execute(
|
||||
"UPDATE tasks SET progress_current = ? WHERE id = ?",
|
||||
(generated, task_id),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"%s: done — %d total | calc=%.1fs render=%.1fs bake=%.1fs",
|
||||
slug, generated, t_calc, t_render, t_bake,
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Add progress tracking columns to the tasks table.
|
||||
|
||||
Enables the pSEO Engine dashboard to show live progress during article
|
||||
generation jobs: a progress bar (current/total) and an error log for
|
||||
per-article failures without aborting the whole run.
|
||||
"""
|
||||
|
||||
|
||||
def up(conn) -> None:
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_current INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN progress_total INTEGER NOT NULL DEFAULT 0"
|
||||
)
|
||||
conn.execute(
|
||||
"ALTER TABLE tasks ADD COLUMN error_log TEXT NOT NULL DEFAULT '[]'"
|
||||
)
|
||||
@@ -4,11 +4,10 @@ Background task worker - SQLite-based queue (no Redis needed).
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import logging
|
||||
|
||||
from .core import (
|
||||
EMAIL_ADDRESSES,
|
||||
config,
|
||||
@@ -730,8 +729,11 @@ async def handle_generate_articles(payload: dict) -> None:
|
||||
start_date = date_cls.fromisoformat(payload["start_date"])
|
||||
articles_per_day = payload.get("articles_per_day", 3)
|
||||
limit = payload.get("limit", 500)
|
||||
task_id = payload.get("_task_id")
|
||||
|
||||
count = await generate_articles(slug, start_date, articles_per_day, limit=limit)
|
||||
count = await generate_articles(
|
||||
slug, start_date, articles_per_day, limit=limit, task_id=task_id
|
||||
)
|
||||
logger.info("Generated %s articles for template '%s'", count, slug)
|
||||
|
||||
|
||||
@@ -753,6 +755,9 @@ async def process_task(task: dict) -> None:
|
||||
|
||||
try:
|
||||
payload = json.loads(task["payload"]) if task["payload"] else {}
|
||||
# Inject task_id so progress-aware handlers (e.g. generate_articles) can
|
||||
# write progress_current to the tasks table without a separate lookup.
|
||||
payload["_task_id"] = task_id
|
||||
await handler(payload)
|
||||
await mark_complete(task_id)
|
||||
logger.info("Completed: %s (id=%s)", task_name, task_id)
|
||||
|
||||
Reference in New Issue
Block a user