Files
beanflows/web/scripts/seed_cms_coffee.py
Deeman 52bd731fc3
Some checks failed
CI / test-cli (push) Successful in 11s
CI / test-sqlmesh (push) Successful in 13s
CI / test-web (push) Failing after 14s
CI / tag (push) Has been skipped
chore: fix all ruff lint warnings (unused imports, unsorted imports, unused vars)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-28 10:05:05 +01:00

376 lines
16 KiB
Python

#!/usr/bin/env python3
"""
seed_cms_coffee.py — Seed coffee commodity CMS article templates.
Creates:
1. Article templates (Jinja2 body_template + URL/title patterns)
2. Template data rows (one per country/commodity/year combo)
pulled from the DuckDB serving layer when available.
Usage (from web/ directory):
uv run python scripts/seed_cms_coffee.py [--db data/app.db] [--dry-run]
After running this, go to /admin/cms to bulk-generate the articles.
"""
import argparse
import json
import os
import sqlite3
import sys
from pathlib import Path
# ── Config ────────────────────────────────────────────────────────────────────
DB_DEFAULT = "data/app.db"
SERVING_DB = os.getenv("SERVING_DUCKDB_PATH", "")
# ── Article templates ─────────────────────────────────────────────────────────
TEMPLATES = [
{
"name": "coffee-country-overview",
"url_pattern": "/coffee/{{ country_slug }}",
"title_pattern": "{{ country_name }} Coffee Production & Trade — BeanFlows",
"meta_description_pattern": (
"USDA PSD supply/demand data for {{ country_name }} coffee: "
"production, exports, imports, ending stocks, and market trends."
),
"body_template": """\
<h2>{{ country_name }} Coffee Overview</h2>
<p>
{{ country_name }} is {% if rank <= 5 %}one of the world's top coffee-producing nations
{% else %}a notable player in the global coffee market{% endif %}.
In {{ latest_year }}, total production reached
<strong>{{ "{:,}".format(production_bags|int) }} 60-kg bags</strong>.
</p>
<h3>Supply & Demand Snapshot ({{ latest_year }})</h3>
<table>
<thead>
<tr><th>Metric</th><th>Value (60-kg bags)</th></tr>
</thead>
<tbody>
<tr><td>Production</td><td>{{ "{:,}".format(production_bags|int) }}</td></tr>
<tr><td>Exports</td><td>{{ "{:,}".format(exports_bags|int) }}</td></tr>
<tr><td>Domestic Consumption</td><td>{{ "{:,}".format(domestic_consumption_bags|int) }}</td></tr>
<tr><td>Ending Stocks</td><td>{{ "{:,}".format(ending_stocks_bags|int) }}</td></tr>
</tbody>
</table>
<h3>Production Trend</h3>
<p>
Over the past decade, {{ country_name }}'s coffee output has shown
{% if production_trend == 'up' %}a <strong>rising trend</strong>
{% elif production_trend == 'down' %}a <strong>declining trend</strong>
{% else %}relatively <strong>stable</strong> production
{% endif %}.
Year-on-year change in {{ latest_year }}: <strong>{{ production_yoy_pct }}%</strong>.
</p>
<h3>Key Export Markets</h3>
<p>
{{ country_name }} primarily exports to international commodity markets,
with volumes settled against the ICE Coffee C futures contract (KC=F).
Track live price data and warehouse stocks on the
<a href="/dashboard/positioning">BeanFlows positioning dashboard</a>.
</p>
<p><em>Data source: USDA PSD Online, updated {{ data_vintage }}.</em></p>
""",
},
{
"name": "coffee-global-market-year",
"url_pattern": "/coffee/market/{{ market_year }}",
"title_pattern": "Global Coffee Market {{ market_year }} — Supply, Demand & Stocks",
"meta_description_pattern": (
"Global coffee supply and demand balance for {{ market_year }}: "
"USDA PSD production, consumption, trade, and ending stocks data."
),
"body_template": """\
<h2>Global Coffee Market {{ market_year }}</h2>
<p>
The <strong>{{ market_year }}</strong> global coffee marketing year ran from
October {{ market_year|int - 1 }} through September {{ market_year }}.
World production totalled
<strong>{{ "{:,}".format(world_production_bags|int) }} million 60-kg bags</strong>.
</p>
<h3>World Supply & Demand Balance</h3>
<table>
<thead>
<tr><th>Metric</th><th>Million 60-kg Bags</th></tr>
</thead>
<tbody>
<tr><td>Opening Stocks</td><td>{{ "%.1f"|format(beginning_stocks_m|float) }}</td></tr>
<tr><td>Production</td><td>{{ "%.1f"|format(production_m|float) }}</td></tr>
<tr><td>Total Supply</td><td>{{ "%.1f"|format(total_supply_m|float) }}</td></tr>
<tr><td>Consumption</td><td>{{ "%.1f"|format(consumption_m|float) }}</td></tr>
<tr><td>Ending Stocks</td><td>{{ "%.1f"|format(ending_stocks_m|float) }}</td></tr>
<tr><td>Stock-to-Use Ratio</td><td>{{ "%.1f"|format(stu_pct|float) }}%</td></tr>
</tbody>
</table>
<h3>Supply/Demand Balance</h3>
<p>
The {{ market_year }} marketing year ended with a
{% if balance >= 0 %}<strong>surplus</strong> of {{ "%.1f"|format(balance|float) }}M bags
{% else %}<strong>deficit</strong> of {{ "%.1f"|format((balance|float)|abs) }}M bags
{% endif %}.
The stock-to-use ratio of <strong>{{ "%.1f"|format(stu_pct|float) }}%</strong> indicates
{% if stu_pct|float > 25 %}comfortable{% elif stu_pct|float > 18 %}adequate{% else %}tight{% endif %}
global supply conditions.
</p>
<p>
Explore live supply & demand charts and price data on
<a href="/dashboard/supply">BeanFlows Supply Dashboard</a>.
</p>
<p><em>Data source: USDA PSD Online, updated {{ data_vintage }}.</em></p>
""",
},
]
# ── Data generation ───────────────────────────────────────────────────────────
def fetch_country_data_from_duckdb() -> list[dict]:
"""Pull top coffee-producing countries from DuckDB serving layer."""
if not SERVING_DB or not Path(SERVING_DB).exists():
print(f" Serving DB not found at {SERVING_DB!r} — using placeholder countries")
return []
try:
import duckdb
conn = duckdb.connect(SERVING_DB, read_only=True)
rows = conn.execute("""
WITH latest AS (
SELECT MAX(market_year) AS max_year
FROM serving.commodity_metrics
WHERE commodity_code = 711100 AND country_code IS NOT NULL
),
ranked AS (
SELECT country_name, country_code, market_year,
production * 1000 AS production_bags,
exports * 1000 AS exports_bags,
domestic_consumption * 1000 AS domestic_consumption_bags,
ending_stocks * 1000 AS ending_stocks_bags,
production_yoy_pct,
ROW_NUMBER() OVER (ORDER BY production DESC) AS rank
FROM serving.commodity_metrics, latest
WHERE commodity_code = 711100
AND country_code IS NOT NULL
AND market_year = latest.max_year
AND production > 0
)
SELECT * FROM ranked LIMIT 30
""").fetchall()
_ = [d[0] for d in conn.execute("""
WITH latest AS (SELECT MAX(market_year) AS max_year FROM serving.commodity_metrics
WHERE commodity_code = 711100 AND country_code IS NOT NULL)
SELECT country_name, country_code, market_year, production * 1000,
exports * 1000, domestic_consumption * 1000, ending_stocks * 1000,
production_yoy_pct, 1 FROM serving.commodity_metrics, latest LIMIT 0
""").description or []]
return [dict(zip(["country_name","country_code","market_year","production_bags",
"exports_bags","domestic_consumption_bags","ending_stocks_bags",
"production_yoy_pct","rank"], row)) for row in rows]
except Exception as e:
print(f" DuckDB error: {e} — using placeholder countries")
return []
def fetch_global_year_data_from_duckdb() -> list[dict]:
"""Pull global supply/demand summary per market year."""
if not SERVING_DB or not Path(SERVING_DB).exists():
return []
try:
import duckdb
conn = duckdb.connect(SERVING_DB, read_only=True)
rows = conn.execute("""
SELECT market_year,
beginning_stocks * 1000 AS beginning_stocks_bags,
production * 1000 AS world_production_bags,
total_supply * 1000 AS total_supply_bags,
domestic_consumption * 1000 AS consumption_bags,
ending_stocks * 1000 AS ending_stocks_bags,
production / NULLIF(total_distribution, 0) * 1000 AS beginning_stocks_m,
production AS production_m,
total_supply AS total_supply_m,
domestic_consumption AS consumption_m,
ending_stocks AS ending_stocks_m,
supply_demand_balance AS balance,
stock_to_use_ratio_pct AS stu_pct
FROM serving.commodity_metrics
WHERE commodity_code = 711100 AND country_name = 'Global'
ORDER BY market_year DESC
LIMIT 10
""").fetchall()
cols = ["market_year","beginning_stocks_bags","world_production_bags",
"total_supply_bags","consumption_bags","ending_stocks_bags",
"beginning_stocks_m","production_m","total_supply_m","consumption_m",
"ending_stocks_m","balance","stu_pct"]
return [dict(zip(cols, row)) for row in rows]
except Exception as e:
print(f" DuckDB error (global): {e}")
return []
PLACEHOLDER_COUNTRIES = [
{"country_name": "Brazil", "country_code": "BR", "rank": 1},
{"country_name": "Vietnam", "country_code": "VN", "rank": 2},
{"country_name": "Colombia", "country_code": "CO", "rank": 3},
{"country_name": "Indonesia", "country_code": "ID", "rank": 4},
{"country_name": "Ethiopia", "country_code": "ET", "rank": 5},
{"country_name": "Honduras", "country_code": "HN", "rank": 6},
{"country_name": "India", "country_code": "IN", "rank": 7},
{"country_name": "Uganda", "country_code": "UG", "rank": 8},
{"country_name": "Mexico", "country_code": "MX", "rank": 9},
{"country_name": "Peru", "country_code": "PE", "rank": 10},
]
def slug(name: str) -> str:
return name.lower().replace(" ", "-").replace(",", "").replace("'", "")
# ── Main ──────────────────────────────────────────────────────────────────────
def run(db_path: str, dry_run: bool = False):
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
now = __import__("datetime").datetime.utcnow().isoformat()
data_vintage = __import__("datetime").date.today().strftime("%B %Y")
inserted_templates = 0
inserted_data_rows = 0
for tmpl in TEMPLATES:
existing = conn.execute(
"SELECT id FROM article_templates WHERE name = ?", (tmpl["name"],)
).fetchone()
if existing:
tmpl_id = existing["id"]
print(f" Template '{tmpl['name']}' already exists (id={tmpl_id})")
else:
if dry_run:
print(f" [dry-run] Would insert template: {tmpl['name']}")
tmpl_id = -1
else:
cursor = conn.execute(
"""INSERT INTO article_templates
(name, slug, url_pattern, title_pattern, meta_description_pattern,
body_template, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)""",
(tmpl["name"], tmpl["name"], tmpl["url_pattern"], tmpl["title_pattern"],
tmpl["meta_description_pattern"], tmpl["body_template"], now),
)
tmpl_id = cursor.lastrowid
inserted_templates += 1
print(f" Inserted template: {tmpl['name']} (id={tmpl_id})")
# Seed data rows per template
if tmpl["name"] == "coffee-country-overview":
countries = fetch_country_data_from_duckdb() or [
{**c, "latest_year": 2024, "production_bags": 0,
"exports_bags": 0, "domestic_consumption_bags": 0,
"ending_stocks_bags": 0, "production_yoy_pct": 0,
"production_trend": "stable"}
for c in PLACEHOLDER_COUNTRIES
]
for c in countries:
country_slug = slug(c["country_name"])
data = {
"country_name": c["country_name"],
"country_code": c.get("country_code", ""),
"country_slug": country_slug,
"latest_year": c.get("market_year", 2024),
"production_bags": c.get("production_bags", 0),
"exports_bags": c.get("exports_bags", 0),
"domestic_consumption_bags": c.get("domestic_consumption_bags", 0),
"ending_stocks_bags": c.get("ending_stocks_bags", 0),
"production_yoy_pct": round(c.get("production_yoy_pct") or 0, 1),
"production_trend": (
"up" if (c.get("production_yoy_pct") or 0) > 2
else "down" if (c.get("production_yoy_pct") or 0) < -2
else "stable"
),
"rank": c.get("rank", 99),
"data_vintage": data_vintage,
}
exists = conn.execute(
"SELECT id FROM template_data WHERE template_id = ? "
"AND json_extract(data_json, '$.country_code') = ?",
(tmpl_id, c.get("country_code", "")),
).fetchone()
if not exists and not dry_run and tmpl_id > 0:
conn.execute(
"INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)",
(tmpl_id, json.dumps(data), now),
)
inserted_data_rows += 1
elif dry_run:
print(f" [dry-run] Would insert data row: {c['country_name']}")
elif tmpl["name"] == "coffee-global-market-year":
years_data = fetch_global_year_data_from_duckdb()
if not years_data:
years_data = [{"market_year": y} for y in range(2020, 2025)]
for y in years_data:
data = {
"market_year": y["market_year"],
"world_production_bags": y.get("world_production_bags", 0),
"beginning_stocks_m": round(y.get("beginning_stocks_m") or 0, 1),
"production_m": round(y.get("production_m") or 0, 1),
"total_supply_m": round(y.get("total_supply_m") or 0, 1),
"consumption_m": round(y.get("consumption_m") or 0, 1),
"ending_stocks_m": round(y.get("ending_stocks_m") or 0, 1),
"balance": round(y.get("balance") or 0, 2),
"stu_pct": round(y.get("stu_pct") or 0, 1),
"data_vintage": data_vintage,
}
exists = conn.execute(
"SELECT id FROM template_data WHERE template_id = ? "
"AND json_extract(data_json, '$.market_year') = ?",
(tmpl_id, y["market_year"]),
).fetchone()
if not exists and not dry_run and tmpl_id > 0:
conn.execute(
"INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)",
(tmpl_id, json.dumps(data), now),
)
inserted_data_rows += 1
elif dry_run:
print(f" [dry-run] Would insert data row: market_year={y['market_year']}")
if not dry_run:
conn.commit()
conn.close()
print(f"\nDone — inserted {inserted_templates} templates, {inserted_data_rows} data rows.")
if not dry_run:
print("Next: go to /admin/cms → pSEO Templates → Bulk Generate")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Seed coffee CMS templates")
parser.add_argument("--db", default=DB_DEFAULT, help=f"SQLite DB path (default: {DB_DEFAULT})")
parser.add_argument("--dry-run", action="store_true", help="Print what would be inserted, don't write")
args = parser.parse_args()
db = Path(args.db)
if not db.exists():
print(f"DB not found at {db}. Run migrations first: uv run python -m beanflows.migrations.migrate")
sys.exit(1)
print(f"Seeding coffee CMS content into {db}...")
run(str(db), dry_run=args.dry_run)