376 lines
16 KiB
Python
376 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
seed_cms_coffee.py — Seed coffee commodity CMS article templates.
|
|
|
|
Creates:
|
|
1. Article templates (Jinja2 body_template + URL/title patterns)
|
|
2. Template data rows (one per country/commodity/year combo)
|
|
pulled from the DuckDB serving layer when available.
|
|
|
|
Usage (from web/ directory):
|
|
uv run python scripts/seed_cms_coffee.py [--db data/app.db] [--dry-run]
|
|
|
|
After running this, go to /admin/cms to bulk-generate the articles.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
|
|
DB_DEFAULT = "data/app.db"
|
|
SERVING_DB = os.getenv("SERVING_DUCKDB_PATH", "")
|
|
|
|
|
|
# ── Article templates ─────────────────────────────────────────────────────────
|
|
|
|
TEMPLATES = [
|
|
{
|
|
"name": "coffee-country-overview",
|
|
"url_pattern": "/coffee/{{ country_slug }}",
|
|
"title_pattern": "{{ country_name }} Coffee Production & Trade — BeanFlows",
|
|
"meta_description_pattern": (
|
|
"USDA PSD supply/demand data for {{ country_name }} coffee: "
|
|
"production, exports, imports, ending stocks, and market trends."
|
|
),
|
|
"body_template": """\
|
|
<h2>{{ country_name }} Coffee Overview</h2>
|
|
|
|
<p>
|
|
{{ country_name }} is {% if rank <= 5 %}one of the world's top coffee-producing nations
|
|
{% else %}a notable player in the global coffee market{% endif %}.
|
|
In {{ latest_year }}, total production reached
|
|
<strong>{{ "{:,}".format(production_bags|int) }} 60-kg bags</strong>.
|
|
</p>
|
|
|
|
<h3>Supply & Demand Snapshot ({{ latest_year }})</h3>
|
|
<table>
|
|
<thead>
|
|
<tr><th>Metric</th><th>Value (60-kg bags)</th></tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr><td>Production</td><td>{{ "{:,}".format(production_bags|int) }}</td></tr>
|
|
<tr><td>Exports</td><td>{{ "{:,}".format(exports_bags|int) }}</td></tr>
|
|
<tr><td>Domestic Consumption</td><td>{{ "{:,}".format(domestic_consumption_bags|int) }}</td></tr>
|
|
<tr><td>Ending Stocks</td><td>{{ "{:,}".format(ending_stocks_bags|int) }}</td></tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<h3>Production Trend</h3>
|
|
<p>
|
|
Over the past decade, {{ country_name }}'s coffee output has shown
|
|
{% if production_trend == 'up' %}a <strong>rising trend</strong>
|
|
{% elif production_trend == 'down' %}a <strong>declining trend</strong>
|
|
{% else %}relatively <strong>stable</strong> production
|
|
{% endif %}.
|
|
Year-on-year change in {{ latest_year }}: <strong>{{ production_yoy_pct }}%</strong>.
|
|
</p>
|
|
|
|
<h3>Key Export Markets</h3>
|
|
<p>
|
|
{{ country_name }} primarily exports to international commodity markets,
|
|
with volumes settled against the ICE Coffee C futures contract (KC=F).
|
|
Track live price data and warehouse stocks on the
|
|
<a href="/dashboard/positioning">BeanFlows positioning dashboard</a>.
|
|
</p>
|
|
|
|
<p><em>Data source: USDA PSD Online, updated {{ data_vintage }}.</em></p>
|
|
""",
|
|
},
|
|
{
|
|
"name": "coffee-global-market-year",
|
|
"url_pattern": "/coffee/market/{{ market_year }}",
|
|
"title_pattern": "Global Coffee Market {{ market_year }} — Supply, Demand & Stocks",
|
|
"meta_description_pattern": (
|
|
"Global coffee supply and demand balance for {{ market_year }}: "
|
|
"USDA PSD production, consumption, trade, and ending stocks data."
|
|
),
|
|
"body_template": """\
|
|
<h2>Global Coffee Market {{ market_year }}</h2>
|
|
|
|
<p>
|
|
The <strong>{{ market_year }}</strong> global coffee marketing year ran from
|
|
October {{ market_year|int - 1 }} through September {{ market_year }}.
|
|
World production totalled
|
|
<strong>{{ "{:,}".format(world_production_bags|int) }} million 60-kg bags</strong>.
|
|
</p>
|
|
|
|
<h3>World Supply & Demand Balance</h3>
|
|
<table>
|
|
<thead>
|
|
<tr><th>Metric</th><th>Million 60-kg Bags</th></tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr><td>Opening Stocks</td><td>{{ "%.1f"|format(beginning_stocks_m|float) }}</td></tr>
|
|
<tr><td>Production</td><td>{{ "%.1f"|format(production_m|float) }}</td></tr>
|
|
<tr><td>Total Supply</td><td>{{ "%.1f"|format(total_supply_m|float) }}</td></tr>
|
|
<tr><td>Consumption</td><td>{{ "%.1f"|format(consumption_m|float) }}</td></tr>
|
|
<tr><td>Ending Stocks</td><td>{{ "%.1f"|format(ending_stocks_m|float) }}</td></tr>
|
|
<tr><td>Stock-to-Use Ratio</td><td>{{ "%.1f"|format(stu_pct|float) }}%</td></tr>
|
|
</tbody>
|
|
</table>
|
|
|
|
<h3>Supply/Demand Balance</h3>
|
|
<p>
|
|
The {{ market_year }} marketing year ended with a
|
|
{% if balance >= 0 %}<strong>surplus</strong> of {{ "%.1f"|format(balance|float) }}M bags
|
|
{% else %}<strong>deficit</strong> of {{ "%.1f"|format((balance|float)|abs) }}M bags
|
|
{% endif %}.
|
|
The stock-to-use ratio of <strong>{{ "%.1f"|format(stu_pct|float) }}%</strong> indicates
|
|
{% if stu_pct|float > 25 %}comfortable{% elif stu_pct|float > 18 %}adequate{% else %}tight{% endif %}
|
|
global supply conditions.
|
|
</p>
|
|
|
|
<p>
|
|
Explore live supply & demand charts and price data on
|
|
<a href="/dashboard/supply">BeanFlows Supply Dashboard</a>.
|
|
</p>
|
|
|
|
<p><em>Data source: USDA PSD Online, updated {{ data_vintage }}.</em></p>
|
|
""",
|
|
},
|
|
]
|
|
|
|
|
|
# ── Data generation ───────────────────────────────────────────────────────────
|
|
|
|
def fetch_country_data_from_duckdb() -> list[dict]:
|
|
"""Pull top coffee-producing countries from DuckDB serving layer."""
|
|
if not SERVING_DB or not Path(SERVING_DB).exists():
|
|
print(f" Serving DB not found at {SERVING_DB!r} — using placeholder countries")
|
|
return []
|
|
|
|
try:
|
|
import duckdb
|
|
conn = duckdb.connect(SERVING_DB, read_only=True)
|
|
rows = conn.execute("""
|
|
WITH latest AS (
|
|
SELECT MAX(market_year) AS max_year
|
|
FROM serving.commodity_metrics
|
|
WHERE commodity_code = 711100 AND country_code IS NOT NULL
|
|
),
|
|
ranked AS (
|
|
SELECT country_name, country_code, market_year,
|
|
production * 1000 AS production_bags,
|
|
exports * 1000 AS exports_bags,
|
|
domestic_consumption * 1000 AS domestic_consumption_bags,
|
|
ending_stocks * 1000 AS ending_stocks_bags,
|
|
production_yoy_pct,
|
|
ROW_NUMBER() OVER (ORDER BY production DESC) AS rank
|
|
FROM serving.commodity_metrics, latest
|
|
WHERE commodity_code = 711100
|
|
AND country_code IS NOT NULL
|
|
AND market_year = latest.max_year
|
|
AND production > 0
|
|
)
|
|
SELECT * FROM ranked LIMIT 30
|
|
""").fetchall()
|
|
cols = [d[0] for d in conn.execute("""
|
|
WITH latest AS (SELECT MAX(market_year) AS max_year FROM serving.commodity_metrics
|
|
WHERE commodity_code = 711100 AND country_code IS NOT NULL)
|
|
SELECT country_name, country_code, market_year, production * 1000,
|
|
exports * 1000, domestic_consumption * 1000, ending_stocks * 1000,
|
|
production_yoy_pct, 1 FROM serving.commodity_metrics, latest LIMIT 0
|
|
""").description or []]
|
|
return [dict(zip(["country_name","country_code","market_year","production_bags",
|
|
"exports_bags","domestic_consumption_bags","ending_stocks_bags",
|
|
"production_yoy_pct","rank"], row)) for row in rows]
|
|
except Exception as e:
|
|
print(f" DuckDB error: {e} — using placeholder countries")
|
|
return []
|
|
|
|
|
|
def fetch_global_year_data_from_duckdb() -> list[dict]:
|
|
"""Pull global supply/demand summary per market year."""
|
|
if not SERVING_DB or not Path(SERVING_DB).exists():
|
|
return []
|
|
|
|
try:
|
|
import duckdb
|
|
conn = duckdb.connect(SERVING_DB, read_only=True)
|
|
rows = conn.execute("""
|
|
SELECT market_year,
|
|
beginning_stocks * 1000 AS beginning_stocks_bags,
|
|
production * 1000 AS world_production_bags,
|
|
total_supply * 1000 AS total_supply_bags,
|
|
domestic_consumption * 1000 AS consumption_bags,
|
|
ending_stocks * 1000 AS ending_stocks_bags,
|
|
production / NULLIF(total_distribution, 0) * 1000 AS beginning_stocks_m,
|
|
production AS production_m,
|
|
total_supply AS total_supply_m,
|
|
domestic_consumption AS consumption_m,
|
|
ending_stocks AS ending_stocks_m,
|
|
supply_demand_balance AS balance,
|
|
stock_to_use_ratio_pct AS stu_pct
|
|
FROM serving.commodity_metrics
|
|
WHERE commodity_code = 711100 AND country_name = 'Global'
|
|
ORDER BY market_year DESC
|
|
LIMIT 10
|
|
""").fetchall()
|
|
cols = ["market_year","beginning_stocks_bags","world_production_bags",
|
|
"total_supply_bags","consumption_bags","ending_stocks_bags",
|
|
"beginning_stocks_m","production_m","total_supply_m","consumption_m",
|
|
"ending_stocks_m","balance","stu_pct"]
|
|
return [dict(zip(cols, row)) for row in rows]
|
|
except Exception as e:
|
|
print(f" DuckDB error (global): {e}")
|
|
return []
|
|
|
|
|
|
PLACEHOLDER_COUNTRIES = [
|
|
{"country_name": "Brazil", "country_code": "BR", "rank": 1},
|
|
{"country_name": "Vietnam", "country_code": "VN", "rank": 2},
|
|
{"country_name": "Colombia", "country_code": "CO", "rank": 3},
|
|
{"country_name": "Indonesia", "country_code": "ID", "rank": 4},
|
|
{"country_name": "Ethiopia", "country_code": "ET", "rank": 5},
|
|
{"country_name": "Honduras", "country_code": "HN", "rank": 6},
|
|
{"country_name": "India", "country_code": "IN", "rank": 7},
|
|
{"country_name": "Uganda", "country_code": "UG", "rank": 8},
|
|
{"country_name": "Mexico", "country_code": "MX", "rank": 9},
|
|
{"country_name": "Peru", "country_code": "PE", "rank": 10},
|
|
]
|
|
|
|
|
|
def slug(name: str) -> str:
|
|
return name.lower().replace(" ", "-").replace(",", "").replace("'", "")
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────────────────────────────
|
|
|
|
def run(db_path: str, dry_run: bool = False):
|
|
conn = sqlite3.connect(db_path)
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
now = __import__("datetime").datetime.utcnow().isoformat()
|
|
data_vintage = __import__("datetime").date.today().strftime("%B %Y")
|
|
|
|
inserted_templates = 0
|
|
inserted_data_rows = 0
|
|
|
|
for tmpl in TEMPLATES:
|
|
existing = conn.execute(
|
|
"SELECT id FROM article_templates WHERE name = ?", (tmpl["name"],)
|
|
).fetchone()
|
|
|
|
if existing:
|
|
tmpl_id = existing["id"]
|
|
print(f" Template '{tmpl['name']}' already exists (id={tmpl_id})")
|
|
else:
|
|
if dry_run:
|
|
print(f" [dry-run] Would insert template: {tmpl['name']}")
|
|
tmpl_id = -1
|
|
else:
|
|
cursor = conn.execute(
|
|
"""INSERT INTO article_templates
|
|
(name, slug, url_pattern, title_pattern, meta_description_pattern,
|
|
body_template, created_at)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
(tmpl["name"], tmpl["name"], tmpl["url_pattern"], tmpl["title_pattern"],
|
|
tmpl["meta_description_pattern"], tmpl["body_template"], now),
|
|
)
|
|
tmpl_id = cursor.lastrowid
|
|
inserted_templates += 1
|
|
print(f" Inserted template: {tmpl['name']} (id={tmpl_id})")
|
|
|
|
# Seed data rows per template
|
|
if tmpl["name"] == "coffee-country-overview":
|
|
countries = fetch_country_data_from_duckdb() or [
|
|
{**c, "latest_year": 2024, "production_bags": 0,
|
|
"exports_bags": 0, "domestic_consumption_bags": 0,
|
|
"ending_stocks_bags": 0, "production_yoy_pct": 0,
|
|
"production_trend": "stable"}
|
|
for c in PLACEHOLDER_COUNTRIES
|
|
]
|
|
for c in countries:
|
|
country_slug = slug(c["country_name"])
|
|
data = {
|
|
"country_name": c["country_name"],
|
|
"country_code": c.get("country_code", ""),
|
|
"country_slug": country_slug,
|
|
"latest_year": c.get("market_year", 2024),
|
|
"production_bags": c.get("production_bags", 0),
|
|
"exports_bags": c.get("exports_bags", 0),
|
|
"domestic_consumption_bags": c.get("domestic_consumption_bags", 0),
|
|
"ending_stocks_bags": c.get("ending_stocks_bags", 0),
|
|
"production_yoy_pct": round(c.get("production_yoy_pct") or 0, 1),
|
|
"production_trend": (
|
|
"up" if (c.get("production_yoy_pct") or 0) > 2
|
|
else "down" if (c.get("production_yoy_pct") or 0) < -2
|
|
else "stable"
|
|
),
|
|
"rank": c.get("rank", 99),
|
|
"data_vintage": data_vintage,
|
|
}
|
|
exists = conn.execute(
|
|
"SELECT id FROM template_data WHERE template_id = ? "
|
|
"AND json_extract(data_json, '$.country_code') = ?",
|
|
(tmpl_id, c.get("country_code", "")),
|
|
).fetchone()
|
|
if not exists and not dry_run and tmpl_id > 0:
|
|
conn.execute(
|
|
"INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)",
|
|
(tmpl_id, json.dumps(data), now),
|
|
)
|
|
inserted_data_rows += 1
|
|
elif dry_run:
|
|
print(f" [dry-run] Would insert data row: {c['country_name']}")
|
|
|
|
elif tmpl["name"] == "coffee-global-market-year":
|
|
years_data = fetch_global_year_data_from_duckdb()
|
|
if not years_data:
|
|
years_data = [{"market_year": y} for y in range(2020, 2025)]
|
|
|
|
for y in years_data:
|
|
data = {
|
|
"market_year": y["market_year"],
|
|
"world_production_bags": y.get("world_production_bags", 0),
|
|
"beginning_stocks_m": round(y.get("beginning_stocks_m") or 0, 1),
|
|
"production_m": round(y.get("production_m") or 0, 1),
|
|
"total_supply_m": round(y.get("total_supply_m") or 0, 1),
|
|
"consumption_m": round(y.get("consumption_m") or 0, 1),
|
|
"ending_stocks_m": round(y.get("ending_stocks_m") or 0, 1),
|
|
"balance": round(y.get("balance") or 0, 2),
|
|
"stu_pct": round(y.get("stu_pct") or 0, 1),
|
|
"data_vintage": data_vintage,
|
|
}
|
|
exists = conn.execute(
|
|
"SELECT id FROM template_data WHERE template_id = ? "
|
|
"AND json_extract(data_json, '$.market_year') = ?",
|
|
(tmpl_id, y["market_year"]),
|
|
).fetchone()
|
|
if not exists and not dry_run and tmpl_id > 0:
|
|
conn.execute(
|
|
"INSERT INTO template_data (template_id, data_json, created_at) VALUES (?, ?, ?)",
|
|
(tmpl_id, json.dumps(data), now),
|
|
)
|
|
inserted_data_rows += 1
|
|
elif dry_run:
|
|
print(f" [dry-run] Would insert data row: market_year={y['market_year']}")
|
|
|
|
if not dry_run:
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
print(f"\nDone — inserted {inserted_templates} templates, {inserted_data_rows} data rows.")
|
|
if not dry_run:
|
|
print("Next: go to /admin/cms → pSEO Templates → Bulk Generate")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Seed coffee CMS templates")
|
|
parser.add_argument("--db", default=DB_DEFAULT, help=f"SQLite DB path (default: {DB_DEFAULT})")
|
|
parser.add_argument("--dry-run", action="store_true", help="Print what would be inserted, don't write")
|
|
args = parser.parse_args()
|
|
|
|
db = Path(args.db)
|
|
if not db.exists():
|
|
print(f"DB not found at {db}. Run migrations first: uv run python -m beanflows.migrations.migrate")
|
|
sys.exit(1)
|
|
|
|
print(f"Seeding coffee CMS content into {db}...")
|
|
run(str(db), dry_run=args.dry_run)
|