#!/usr/bin/env python3 """ seed_cms_coffee.py — Seed coffee commodity CMS article templates. Creates: 1. Article templates (Jinja2 body_template + URL/title patterns) 2. Template data rows (one per country/commodity/year combo) pulled from the DuckDB serving layer when available. Usage (from web/ directory): uv run python scripts/seed_cms_coffee.py [--db data/app.db] [--dry-run] After running this, go to /admin/cms to bulk-generate the articles. """ import argparse import json import os import sqlite3 import sys from pathlib import Path # ── Config ──────────────────────────────────────────────────────────────────── DB_DEFAULT = "data/app.db" SERVING_DB = os.getenv("SERVING_DUCKDB_PATH", "") # ── Article templates ───────────────────────────────────────────────────────── TEMPLATES = [ { "name": "coffee-country-overview", "url_pattern": "/coffee/{{ country_slug }}", "title_pattern": "{{ country_name }} Coffee Production & Trade — BeanFlows", "meta_description_pattern": ( "USDA PSD supply/demand data for {{ country_name }} coffee: " "production, exports, imports, ending stocks, and market trends." ), "body_template": """\
{{ country_name }} is {% if rank <= 5 %}one of the world's top coffee-producing nations {% else %}a notable player in the global coffee market{% endif %}. In {{ latest_year }}, total production reached {{ "{:,}".format(production_bags|int) }} 60-kg bags.
| Metric | Value (60-kg bags) |
|---|---|
| Production | {{ "{:,}".format(production_bags|int) }} |
| Exports | {{ "{:,}".format(exports_bags|int) }} |
| Domestic Consumption | {{ "{:,}".format(domestic_consumption_bags|int) }} |
| Ending Stocks | {{ "{:,}".format(ending_stocks_bags|int) }} |
Over the past decade, {{ country_name }}'s coffee output has shown {% if production_trend == 'up' %}a rising trend {% elif production_trend == 'down' %}a declining trend {% else %}relatively stable production {% endif %}. Year-on-year change in {{ latest_year }}: {{ production_yoy_pct }}%.
{{ country_name }} primarily exports to international commodity markets, with volumes settled against the ICE Coffee C futures contract (KC=F). Track live price data and warehouse stocks on the BeanFlows positioning dashboard.
Data source: USDA PSD Online, updated {{ data_vintage }}.
""", }, { "name": "coffee-global-market-year", "url_pattern": "/coffee/market/{{ market_year }}", "title_pattern": "Global Coffee Market {{ market_year }} — Supply, Demand & Stocks", "meta_description_pattern": ( "Global coffee supply and demand balance for {{ market_year }}: " "USDA PSD production, consumption, trade, and ending stocks data." ), "body_template": """\The {{ market_year }} global coffee marketing year ran from October {{ market_year|int - 1 }} through September {{ market_year }}. World production totalled {{ "{:,}".format(world_production_bags|int) }} million 60-kg bags.
| Metric | Million 60-kg Bags |
|---|---|
| Opening Stocks | {{ "%.1f"|format(beginning_stocks_m|float) }} |
| Production | {{ "%.1f"|format(production_m|float) }} |
| Total Supply | {{ "%.1f"|format(total_supply_m|float) }} |
| Consumption | {{ "%.1f"|format(consumption_m|float) }} |
| Ending Stocks | {{ "%.1f"|format(ending_stocks_m|float) }} |
| Stock-to-Use Ratio | {{ "%.1f"|format(stu_pct|float) }}% |
The {{ market_year }} marketing year ended with a {% if balance >= 0 %}surplus of {{ "%.1f"|format(balance|float) }}M bags {% else %}deficit of {{ "%.1f"|format((balance|float)|abs) }}M bags {% endif %}. The stock-to-use ratio of {{ "%.1f"|format(stu_pct|float) }}% indicates {% if stu_pct|float > 25 %}comfortable{% elif stu_pct|float > 18 %}adequate{% else %}tight{% endif %} global supply conditions.
Explore live supply & demand charts and price data on BeanFlows Supply Dashboard.
Data source: USDA PSD Online, updated {{ data_vintage }}.
""", }, ] # ── Data generation ─────────────────────────────────────────────────────────── def fetch_country_data_from_duckdb() -> list[dict]: """Pull top coffee-producing countries from DuckDB serving layer.""" if not SERVING_DB or not Path(SERVING_DB).exists(): print(f" Serving DB not found at {SERVING_DB!r} — using placeholder countries") return [] try: import duckdb conn = duckdb.connect(SERVING_DB, read_only=True) rows = conn.execute(""" WITH latest AS ( SELECT MAX(market_year) AS max_year FROM serving.commodity_metrics WHERE commodity_code = 711100 AND country_code IS NOT NULL ), ranked AS ( SELECT country_name, country_code, market_year, production * 1000 AS production_bags, exports * 1000 AS exports_bags, domestic_consumption * 1000 AS domestic_consumption_bags, ending_stocks * 1000 AS ending_stocks_bags, production_yoy_pct, ROW_NUMBER() OVER (ORDER BY production DESC) AS rank FROM serving.commodity_metrics, latest WHERE commodity_code = 711100 AND country_code IS NOT NULL AND market_year = latest.max_year AND production > 0 ) SELECT * FROM ranked LIMIT 30 """).fetchall() cols = [d[0] for d in conn.execute(""" WITH latest AS (SELECT MAX(market_year) AS max_year FROM serving.commodity_metrics WHERE commodity_code = 711100 AND country_code IS NOT NULL) SELECT country_name, country_code, market_year, production * 1000, exports * 1000, domestic_consumption * 1000, ending_stocks * 1000, production_yoy_pct, 1 FROM serving.commodity_metrics, latest LIMIT 0 """).description or []] return [dict(zip(["country_name","country_code","market_year","production_bags", "exports_bags","domestic_consumption_bags","ending_stocks_bags", "production_yoy_pct","rank"], row)) for row in rows] except Exception as e: print(f" DuckDB error: {e} — using placeholder countries") return [] def fetch_global_year_data_from_duckdb() -> list[dict]: """Pull global supply/demand summary per market year.""" if not SERVING_DB or not Path(SERVING_DB).exists(): return [] try: import duckdb conn = duckdb.connect(SERVING_DB, read_only=True) rows = conn.execute(""" SELECT market_year, beginning_stocks * 1000 AS beginning_stocks_bags, production * 1000 AS world_production_bags, total_supply * 1000 AS total_supply_bags, domestic_consumption * 1000 AS consumption_bags, ending_stocks * 1000 AS ending_stocks_bags, production / NULLIF(total_distribution, 0) * 1000 AS beginning_stocks_m, production AS production_m, total_supply AS total_supply_m, domestic_consumption AS consumption_m, ending_stocks AS ending_stocks_m, supply_demand_balance AS balance, stock_to_use_ratio_pct AS stu_pct FROM serving.commodity_metrics WHERE commodity_code = 711100 AND country_name = 'Global' ORDER BY market_year DESC LIMIT 10 """).fetchall() cols = ["market_year","beginning_stocks_bags","world_production_bags", "total_supply_bags","consumption_bags","ending_stocks_bags", "beginning_stocks_m","production_m","total_supply_m","consumption_m", "ending_stocks_m","balance","stu_pct"] return [dict(zip(cols, row)) for row in rows] except Exception as e: print(f" DuckDB error (global): {e}") return [] PLACEHOLDER_COUNTRIES = [ {"country_name": "Brazil", "country_code": "BR", "rank": 1}, {"country_name": "Vietnam", "country_code": "VN", "rank": 2}, {"country_name": "Colombia", "country_code": "CO", "rank": 3}, {"country_name": "Indonesia", "country_code": "ID", "rank": 4}, {"country_name": "Ethiopia", "country_code": "ET", "rank": 5}, {"country_name": "Honduras", "country_code": "HN", "rank": 6}, {"country_name": "India", "country_code": "IN", "rank": 7}, {"country_name": "Uganda", "country_code": "UG", "rank": 8}, {"country_name": "Mexico", "country_code": "MX", "rank": 9}, {"country_name": "Peru", "country_code": "PE", "rank": 10}, ] def slug(name: str) -> str: return name.lower().replace(" ", "-").replace(",", "").replace("'", "") # ── Main ────────────────────────────────────────────────────────────────────── def run(db_path: str, dry_run: bool = False): conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row now = __import__("datetime").datetime.utcnow().isoformat() data_vintage = __import__("datetime").date.today().strftime("%B %Y") inserted_templates = 0 inserted_data_rows = 0 for tmpl in TEMPLATES: existing = conn.execute( "SELECT id FROM article_templates WHERE name = ?", (tmpl["name"],) ).fetchone() if existing: tmpl_id = existing["id"] print(f" Template '{tmpl['name']}' already exists (id={tmpl_id})") else: if dry_run: print(f" [dry-run] Would insert template: {tmpl['name']}") tmpl_id = -1 else: cursor = conn.execute( """INSERT INTO article_templates (name, slug, url_pattern, title_pattern, meta_description_pattern, body_template, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)""", (tmpl["name"], tmpl["name"], tmpl["url_pattern"], tmpl["title_pattern"], tmpl["meta_description_pattern"], tmpl["body_template"], now), ) tmpl_id = cursor.lastrowid inserted_templates += 1 print(f" Inserted template: {tmpl['name']} (id={tmpl_id})") # Seed data rows per template if tmpl["name"] == "coffee-country-overview": countries = fetch_country_data_from_duckdb() or [ {**c, "latest_year": 2024, "production_bags": 0, "exports_bags": 0, "domestic_consumption_bags": 0, "ending_stocks_bags": 0, "production_yoy_pct": 0, "production_trend": "stable"} for c in PLACEHOLDER_COUNTRIES ] for c in countries: country_slug = slug(c["country_name"]) data = { "country_name": c["country_name"], "country_code": c.get("country_code", ""), "country_slug": country_slug, "latest_year": c.get("market_year", 2024), "production_bags": c.get("production_bags", 0), "exports_bags": c.get("exports_bags", 0), "domestic_consumption_bags": c.get("domestic_consumption_bags", 0), "ending_stocks_bags": c.get("ending_stocks_bags", 0), "production_yoy_pct": round(c.get("production_yoy_pct") or 0, 1), "production_trend": ( "up" if (c.get("production_yoy_pct") or 0) > 2 else "down" if (c.get("production_yoy_pct") or 0) < -2 else "stable" ), "rank": c.get("rank", 99), "data_vintage": data_vintage, } exists = conn.execute( "SELECT id FROM template_data WHERE template_id = ? " "AND json_extract(data_json, '$.country_code') = ?", (tmpl_id, c.get("country_code", "")), ).fetchone() if not exists and not dry_run and tmpl_id > 0: conn.execute( "INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)", (tmpl_id, json.dumps(data), now), ) inserted_data_rows += 1 elif dry_run: print(f" [dry-run] Would insert data row: {c['country_name']}") elif tmpl["name"] == "coffee-global-market-year": years_data = fetch_global_year_data_from_duckdb() if not years_data: years_data = [{"market_year": y} for y in range(2020, 2025)] for y in years_data: data = { "market_year": y["market_year"], "world_production_bags": y.get("world_production_bags", 0), "beginning_stocks_m": round(y.get("beginning_stocks_m") or 0, 1), "production_m": round(y.get("production_m") or 0, 1), "total_supply_m": round(y.get("total_supply_m") or 0, 1), "consumption_m": round(y.get("consumption_m") or 0, 1), "ending_stocks_m": round(y.get("ending_stocks_m") or 0, 1), "balance": round(y.get("balance") or 0, 2), "stu_pct": round(y.get("stu_pct") or 0, 1), "data_vintage": data_vintage, } exists = conn.execute( "SELECT id FROM template_data WHERE template_id = ? " "AND json_extract(data_json, '$.market_year') = ?", (tmpl_id, y["market_year"]), ).fetchone() if not exists and not dry_run and tmpl_id > 0: conn.execute( "INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)", (tmpl_id, json.dumps(data), now), ) inserted_data_rows += 1 elif dry_run: print(f" [dry-run] Would insert data row: market_year={y['market_year']}") if not dry_run: conn.commit() conn.close() print(f"\nDone — inserted {inserted_templates} templates, {inserted_data_rows} data rows.") if not dry_run: print("Next: go to /admin/cms → pSEO Templates → Bulk Generate") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Seed coffee CMS templates") parser.add_argument("--db", default=DB_DEFAULT, help=f"SQLite DB path (default: {DB_DEFAULT})") parser.add_argument("--dry-run", action="store_true", help="Print what would be inserted, don't write") args = parser.parse_args() db = Path(args.db) if not db.exists(): print(f"DB not found at {db}. Run migrations first: uv run python -m beanflows.migrations.migrate") sys.exit(1) print(f"Seeding coffee CMS content into {db}...") run(str(db), dry_run=args.dry_run)