Add CFTC COT data integration with foundation data model layer

- New extraction package (cftc_cot): downloads yearly Disaggregated Futures ZIPs
  from CFTC, etag-based dedup, dynamic inner filename discovery, gzip normalization
- SQLMesh 3-layer architecture: raw (technical) → foundation (business model) → serving (mart)
- dim_commodity seed: conformed dimension mapping USDA ↔ CFTC codes — the commodity ontology
- fct_cot_positioning: typed, deduplicated weekly positioning facts for all commodities
- obt_cot_positioning: Coffee C mart with COT Index (26w/52w), WoW delta, OI ratios
- Analytics functions + REST API endpoints: /commodities/<code>/positioning[/latest]
- Dashboard widget: Managed Money net, COT Index card, dual-axis Chart.js chart
- 23 passing tests (10 unit + 2 SQLMesh model + existing regression suite)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-20 21:57:04 +01:00
parent d09ba91023
commit 0a83b2cb74
19 changed files with 1111 additions and 3 deletions

View File

@@ -12,6 +12,9 @@ import duckdb
# Coffee (Green) commodity code in USDA PSD
COFFEE_COMMODITY_CODE = 711100
# Coffee C futures commodity code in CFTC COT reports
COFFEE_CFTC_CODE = "083731"
# Metrics safe for user-facing queries (prevents SQL injection in dynamic column refs)
ALLOWED_METRICS = frozenset({
"production",
@@ -203,6 +206,116 @@ async def get_production_yoy_by_country(
)
# =============================================================================
# COT Positioning Queries
# =============================================================================
# Columns safe for user-facing COT queries
ALLOWED_COT_METRICS = frozenset({
"open_interest",
"managed_money_long",
"managed_money_short",
"managed_money_net",
"managed_money_spread",
"managed_money_net_pct_of_oi",
"managed_money_long_short_ratio",
"managed_money_net_wow",
"prod_merc_long",
"prod_merc_short",
"prod_merc_net",
"swap_long",
"swap_short",
"swap_net",
"other_reportable_net",
"nonreportable_net",
"change_open_interest",
"change_managed_money_net",
"cot_index_26w",
"cot_index_52w",
"concentration_top4_long_pct",
"concentration_top8_long_pct",
"traders_total",
"traders_managed_money_long",
"traders_managed_money_short",
})
def _validate_cot_metrics(metrics: list[str]) -> list[str]:
valid = [m for m in metrics if m in ALLOWED_COT_METRICS]
assert valid, f"No valid COT metrics in {metrics}. Allowed: {sorted(ALLOWED_COT_METRICS)}"
return valid
async def get_cot_positioning_time_series(
cftc_commodity_code: str,
metrics: list[str],
start_date: str | None = None,
end_date: str | None = None,
limit: int = 520,
) -> list[dict]:
"""Weekly COT positioning time series. limit defaults to ~10 years of weekly data."""
assert 1 <= limit <= 2000, "limit must be between 1 and 2000"
metrics = _validate_cot_metrics(metrics)
cols = ", ".join(metrics)
where_parts = ["cftc_commodity_code = ?"]
params: list = [cftc_commodity_code]
if start_date is not None:
where_parts.append("report_date >= ?")
params.append(start_date)
if end_date is not None:
where_parts.append("report_date <= ?")
params.append(end_date)
where_clause = " AND ".join(where_parts)
return await fetch_analytics(
f"""
SELECT report_date, {cols}
FROM serving.cot_positioning
WHERE {where_clause}
ORDER BY report_date ASC
LIMIT ?
""",
[*params, limit],
)
async def get_cot_positioning_latest(cftc_commodity_code: str) -> dict | None:
"""Latest week's full COT positioning snapshot."""
rows = await fetch_analytics(
"""
SELECT *
FROM serving.cot_positioning
WHERE cftc_commodity_code = ?
ORDER BY report_date DESC
LIMIT 1
""",
[cftc_commodity_code],
)
return rows[0] if rows else None
async def get_cot_index_trend(
cftc_commodity_code: str,
weeks: int = 104,
) -> list[dict]:
"""COT Index time series (26w and 52w) for the trailing N weeks."""
assert 1 <= weeks <= 1040, "weeks must be between 1 and 1040"
return await fetch_analytics(
"""
SELECT report_date, cot_index_26w, cot_index_52w,
managed_money_net, managed_money_net_pct_of_oi
FROM serving.cot_positioning
WHERE cftc_commodity_code = ?
ORDER BY report_date DESC
LIMIT ?
""",
[cftc_commodity_code, weeks],
)
async def get_country_comparison(
commodity_code: int,
country_codes: list[str],

View File

@@ -162,6 +162,42 @@ async def commodity_countries(code: int):
return jsonify({"commodity_code": code, "metric": metric, "data": data})
@bp.route("/commodities/<code>/positioning")
@api_key_required(scopes=["read"])
async def commodity_positioning(code: str):
"""COT trader positioning time series for a commodity.
Query params:
metrics — repeated param, e.g. ?metrics=managed_money_net&metrics=cot_index_26w
start_date — ISO date filter (YYYY-MM-DD)
end_date — ISO date filter (YYYY-MM-DD)
limit — max rows returned (default 260, max 2000)
"""
raw_metrics = request.args.getlist("metrics") or [
"managed_money_net", "prod_merc_net", "open_interest", "cot_index_26w"
]
metrics = [m for m in raw_metrics if m in analytics.ALLOWED_COT_METRICS]
if not metrics:
return jsonify({"error": f"No valid metrics. Allowed: {sorted(analytics.ALLOWED_COT_METRICS)}"}), 400
start_date = request.args.get("start_date")
end_date = request.args.get("end_date")
limit = min(int(request.args.get("limit", 260)), 2000)
data = await analytics.get_cot_positioning_time_series(code, metrics, start_date, end_date, limit)
return jsonify({"cftc_commodity_code": code, "metrics": metrics, "data": data})
@bp.route("/commodities/<code>/positioning/latest")
@api_key_required(scopes=["read"])
async def commodity_positioning_latest(code: str):
"""Latest week's full COT positioning snapshot for a commodity."""
data = await analytics.get_cot_positioning_latest(code)
if not data:
return jsonify({"error": "No positioning data found for this commodity"}), 404
return jsonify({"cftc_commodity_code": code, "data": data})
@bp.route("/commodities/<int:code>/metrics.csv")
@api_key_required(scopes=["read"])
async def commodity_metrics_csv(code: int):

View File

@@ -11,7 +11,6 @@ from quart import Blueprint, flash, g, jsonify, redirect, render_template, reque
from .. import analytics
from ..auth.routes import login_required, update_user
from ..core import csrf_protect, execute, fetch_all, fetch_one, soft_delete
# Blueprint with its own template folder
@@ -99,9 +98,9 @@ async def index():
stats = await get_user_stats(g.user["id"])
plan = (g.get("subscription") or {}).get("plan", "free")
# Fetch all analytics data in parallel (empty lists if DB not available)
# Fetch all analytics data in parallel (empty lists/None if DB not available)
if analytics._conn is not None:
time_series, top_producers, stu_trend, balance, yoy = await asyncio.gather(
time_series, top_producers, stu_trend, balance, yoy, cot_latest, cot_trend = await asyncio.gather(
analytics.get_global_time_series(
analytics.COFFEE_COMMODITY_CODE,
["production", "exports", "imports", "ending_stocks", "total_distribution"],
@@ -110,9 +109,12 @@ async def index():
analytics.get_stock_to_use_trend(analytics.COFFEE_COMMODITY_CODE),
analytics.get_supply_demand_balance(analytics.COFFEE_COMMODITY_CODE),
analytics.get_production_yoy_by_country(analytics.COFFEE_COMMODITY_CODE, limit=15),
analytics.get_cot_positioning_latest(analytics.COFFEE_CFTC_CODE),
analytics.get_cot_index_trend(analytics.COFFEE_CFTC_CODE, weeks=104),
)
else:
time_series, top_producers, stu_trend, balance, yoy = [], [], [], [], []
cot_latest, cot_trend = None, []
# Latest global snapshot for key metric cards
latest = time_series[-1] if time_series else {}
@@ -136,6 +138,8 @@ async def index():
stu_trend=stu_trend,
balance=balance,
yoy=yoy,
cot_latest=cot_latest,
cot_trend=cot_trend,
)

View File

@@ -115,6 +115,39 @@
<div class="plan-gate mb-8">CSV export available on Trader and Analyst plans. <a href="{{ url_for('billing.pricing') }}">Upgrade</a></div>
{% endif %}
<!-- Speculative Positioning (CFTC COT) -->
{% if cot_latest %}
<div class="chart-container mb-8">
<h2 class="text-xl mb-1">Speculative Positioning — Coffee C Futures</h2>
<p class="text-muted mb-4">CFTC Commitment of Traders · Managed Money net position (hedge funds &amp; CTAs) · as of {{ cot_latest.report_date }}</p>
<div class="grid-4 mb-4">
<div class="metric-card">
<div class="metric-label">Managed Money Net</div>
<div class="metric-value {% if cot_latest.managed_money_net > 0 %}text-green{% else %}text-red{% endif %}">
{{ "{:+,d}".format(cot_latest.managed_money_net | int) }}
</div>
<div class="metric-sub">contracts (long short)</div>
</div>
<div class="metric-card">
<div class="metric-label">COT Index (26w)</div>
<div class="metric-value">{{ "{:.0f}".format(cot_latest.cot_index_26w) }}</div>
<div class="metric-sub">0 = most bearish · 100 = most bullish</div>
</div>
<div class="metric-card">
<div class="metric-label">Net % of Open Interest</div>
<div class="metric-value">{{ "{:+.1f}".format(cot_latest.managed_money_net_pct_of_oi) }}%</div>
<div class="metric-sub">managed money positioning</div>
</div>
<div class="metric-card">
<div class="metric-label">Open Interest</div>
<div class="metric-value">{{ "{:,d}".format(cot_latest.open_interest | int) }}</div>
<div class="metric-sub">total contracts outstanding</div>
</div>
</div>
<canvas id="cotPositioningChart"></canvas>
</div>
{% endif %}
<!-- Quick Actions -->
<div class="grid-3">
<a href="{{ url_for('dashboard.countries') }}" class="btn-outline text-center">Country Comparison</a>
@@ -202,6 +235,57 @@ if (stuData.length > 0) {
});
}
// -- COT Positioning Chart --
const cotRaw = {{ cot_trend | tojson }};
if (cotRaw && cotRaw.length > 0) {
const cotData = [...cotRaw].reverse(); // query returns DESC, chart needs ASC
new Chart(document.getElementById('cotPositioningChart'), {
type: 'line',
data: {
labels: cotData.map(r => r.report_date),
datasets: [
{
label: 'Managed Money Net (contracts)',
data: cotData.map(r => r.managed_money_net),
borderColor: CHART_PALETTE[0],
backgroundColor: CHART_PALETTE[0] + '22',
fill: true,
tension: 0.3,
yAxisID: 'y'
},
{
label: 'COT Index 26w (0100)',
data: cotData.map(r => r.cot_index_26w),
borderColor: CHART_PALETTE[2],
borderDash: [5, 4],
tension: 0.3,
pointRadius: 0,
yAxisID: 'y1'
}
]
},
options: {
responsive: true,
interaction: {mode: 'index', intersect: false},
plugins: {legend: {position: 'bottom'}},
scales: {
x: {ticks: {maxTicksLimit: 12}},
y: {
title: {display: true, text: 'Net Contracts'},
position: 'left'
},
y1: {
title: {display: true, text: 'COT Index'},
position: 'right',
min: 0,
max: 100,
grid: {drawOnChartArea: false}
}
}
}
});
}
// -- Top Producers Horizontal Bar --
const topData = {{ top_producers | tojson }};
if (topData.length > 0) {