Add CFTC COT data integration with foundation data model layer

- New extraction package (cftc_cot): downloads yearly Disaggregated Futures ZIPs
  from CFTC, etag-based dedup, dynamic inner filename discovery, gzip normalization
- SQLMesh 3-layer architecture: raw (technical) → foundation (business model) → serving (mart)
- dim_commodity seed: conformed dimension mapping USDA ↔ CFTC codes — the commodity ontology
- fct_cot_positioning: typed, deduplicated weekly positioning facts for all commodities
- obt_cot_positioning: Coffee C mart with COT Index (26w/52w), WoW delta, OI ratios
- Analytics functions + REST API endpoints: /commodities/<code>/positioning[/latest]
- Dashboard widget: Managed Money net, COT Index card, dual-axis Chart.js chart
- 23 passing tests (10 unit + 2 SQLMesh model + existing regression suite)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-20 21:57:04 +01:00
parent d09ba91023
commit 0a83b2cb74
19 changed files with 1111 additions and 3 deletions

View File

@@ -0,0 +1,140 @@
-- Serving mart: COT positioning for Coffee C futures, analytics-ready.
--
-- Joins foundation.fct_cot_positioning with foundation.dim_commodity so
-- the coffee filter is driven by the dimension (not a hardcoded CFTC code).
-- Adds derived analytics used by the dashboard and API:
-- - Normalized positioning (% of open interest)
-- - Long/short ratio
-- - Week-over-week momentum
-- - COT Index over 26-week and 52-week trailing windows (0=bearish, 100=bullish)
--
-- Grain: one row per report_date for Coffee C futures.
-- Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections.
MODEL (
name serving.cot_positioning,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (report_date),
start '2006-06-13',
cron '@daily'
);
WITH latest_revision AS (
-- Pick the most recently ingested row when CFTC issues corrections
SELECT f.*
FROM foundation.fct_cot_positioning f
INNER JOIN foundation.dim_commodity d
ON f.cftc_commodity_code = d.cftc_commodity_code
WHERE d.commodity_name = 'Coffee, Green'
AND f.report_date BETWEEN @start_ds AND @end_ds
QUALIFY ROW_NUMBER() OVER (
PARTITION BY f.report_date, f.cftc_contract_market_code
ORDER BY f.ingest_date DESC
) = 1
),
with_derived AS (
SELECT
report_date,
market_and_exchange_name,
cftc_commodity_code,
cftc_contract_market_code,
contract_units,
ingest_date,
-- Absolute positions (contracts)
open_interest,
managed_money_long,
managed_money_short,
managed_money_spread,
managed_money_net,
prod_merc_long,
prod_merc_short,
prod_merc_net,
swap_long,
swap_short,
swap_spread,
swap_net,
other_reportable_long,
other_reportable_short,
other_reportable_spread,
other_reportable_net,
nonreportable_long,
nonreportable_short,
nonreportable_net,
-- Normalized: managed money net as % of open interest
-- Removes size effects and makes cross-period comparison meaningful
round(
managed_money_net::float / NULLIF(open_interest, 0) * 100,
2
) AS managed_money_net_pct_of_oi,
-- Long/short ratio: >1 = more bulls than bears in managed money
round(
managed_money_long::float / NULLIF(managed_money_short, 0),
3
) AS managed_money_long_short_ratio,
-- Weekly changes
change_open_interest,
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
-- Week-over-week momentum in managed money net (via LAG)
managed_money_net - LAG(managed_money_net, 1) OVER (
ORDER BY report_date
) AS managed_money_net_wow,
-- Concentration
concentration_top4_long_pct,
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
-- Trader counts
traders_total,
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
-- COT Index (26-week): where is current net vs. trailing 26 weeks?
-- 0 = most bearish extreme, 100 = most bullish extreme
-- Industry-standard sentiment gauge (equivalent to RSI for positioning)
CASE
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
THEN 50.0
ELSE round(
(managed_money_net - MIN(managed_money_net) OVER w26)::float
/ (MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26)
* 100,
1
)
END AS cot_index_26w,
-- COT Index (52-week): longer-term positioning context
CASE
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
THEN 50.0
ELSE round(
(managed_money_net - MIN(managed_money_net) OVER w52)::float
/ (MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52)
* 100,
1
)
END AS cot_index_52w
FROM latest_revision
WINDOW
w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW),
w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
)
SELECT *
FROM with_derived
ORDER BY report_date