Files
beanflows/transform/sqlmesh_materia/models/raw/cot_disaggregated.sql
Deeman 2962bf5e3b Fix COT pipeline: TRY_CAST nulls, dim_commodity leading zeros, correct CFTC codes
- config.yaml: remove ambiguousorinvalidcolumn linter rule (false positives on read_csv TVFs)
- fct_cot_positioning: use TRY_CAST throughout — CFTC uses '.' as null in many columns
- raw/cot_disaggregated: add columns() declaration for 33 varchar cols
- dim_commodity: switch from SEED to FULL model with SQL VALUES to preserve leading zeros
  Pandas auto-converts '083' → 83 even with varchar column declarations in SEED models
- seeds/dim_commodity.csv: correct cftc_commodity_code from '083731' (contract market code)
  to '083' (3-digit CFTC commodity code); add CSV quoting
- test_cot_foundation.yaml: fix output key name, vars for time range, partial: true,
  and correct cftc_commodity_code to '083'
- analytics.py: COFFEE_CFTC_CODE '083731' → '083' to match actual data

Result: serving.cot_positioning has 685 rows (2013-01-08 to 2026-02-17), 23/23 tests pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 23:28:10 +01:00

121 lines
4.8 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Raw CFTC Commitment of Traders — Disaggregated Futures Only.
--
-- Technical ingestion layer only: reads gzip CSVs from the landing directory
-- and surfaces the columns needed by downstream foundation models.
-- All values are varchar; casting happens in foundation.
--
-- Source: CFTC yearly ZIPs at
-- https://www.cftc.gov/files/dea/history/fut_disagg_txt_{year}.zip
-- Coverage: June 2006 present (new file every Friday at 3:30 PM ET)
MODEL (
name raw.cot_disaggregated,
kind FULL,
grain (cftc_commodity_code, report_date_as_yyyy_mm_dd, cftc_contract_market_code),
start '2006-06-13',
cron '@daily',
columns (
market_and_exchange_names varchar,
report_date_as_yyyy_mm_dd varchar,
cftc_commodity_code varchar,
cftc_contract_market_code varchar,
contract_units varchar,
open_interest_all varchar,
prod_merc_positions_long_all varchar,
prod_merc_positions_short_all varchar,
swap_positions_long_all varchar,
swap_positions_short_all varchar,
swap_positions_spread_all varchar,
m_money_positions_long_all varchar,
m_money_positions_short_all varchar,
m_money_positions_spread_all varchar,
other_rept_positions_long_all varchar,
other_rept_positions_short_all varchar,
other_rept_positions_spread_all varchar,
nonrept_positions_long_all varchar,
nonrept_positions_short_all varchar,
change_in_open_interest_all varchar,
change_in_m_money_long_all varchar,
change_in_m_money_short_all varchar,
change_in_prod_merc_long_all varchar,
change_in_prod_merc_short_all varchar,
conc_gross_le_4_tdr_long_all varchar,
conc_gross_le_4_tdr_short_all varchar,
conc_gross_le_8_tdr_long_all varchar,
conc_gross_le_8_tdr_short_all varchar,
traders_tot_all varchar,
traders_m_money_long_all varchar,
traders_m_money_short_all varchar,
traders_m_money_spread_all varchar,
filename varchar
)
);
SELECT
-- Identifiers
"Market_and_Exchange_Names" AS market_and_exchange_names,
"Report_Date_as_YYYY-MM-DD" AS report_date_as_yyyy_mm_dd,
"CFTC_Commodity_Code" AS cftc_commodity_code,
"CFTC_Contract_Market_Code" AS cftc_contract_market_code,
"Contract_Units" AS contract_units,
-- Open interest
"Open_Interest_All" AS open_interest_all,
-- Producer / Merchant / Processor / User (commercial hedgers)
"Prod_Merc_Positions_Long_All" AS prod_merc_positions_long_all,
"Prod_Merc_Positions_Short_All" AS prod_merc_positions_short_all,
-- Swap dealers
"Swap_Positions_Long_All" AS swap_positions_long_all,
"Swap__Positions_Short_All" AS swap_positions_short_all,
"Swap__Positions_Spread_All" AS swap_positions_spread_all,
-- Managed money (hedge funds, CTAs — key speculative signal)
"M_Money_Positions_Long_All" AS m_money_positions_long_all,
"M_Money_Positions_Short_All" AS m_money_positions_short_all,
"M_Money_Positions_Spread_All" AS m_money_positions_spread_all,
-- Other reportables
"Other_Rept_Positions_Long_All" AS other_rept_positions_long_all,
"Other_Rept_Positions_Short_All" AS other_rept_positions_short_all,
"Other_Rept_Positions_Spread_All" AS other_rept_positions_spread_all,
-- Non-reportable (small speculators)
"NonRept_Positions_Long_All" AS nonrept_positions_long_all,
"NonRept_Positions_Short_All" AS nonrept_positions_short_all,
-- Week-over-week changes
"Change_in_Open_Interest_All" AS change_in_open_interest_all,
"Change_in_M_Money_Long_All" AS change_in_m_money_long_all,
"Change_in_M_Money_Short_All" AS change_in_m_money_short_all,
"Change_in_Prod_Merc_Long_All" AS change_in_prod_merc_long_all,
"Change_in_Prod_Merc_Short_All" AS change_in_prod_merc_short_all,
-- Concentration (% of OI held by top 4 and top 8 traders)
"Conc_Gross_LE_4_TDR_Long_All" AS conc_gross_le_4_tdr_long_all,
"Conc_Gross_LE_4_TDR_Short_All" AS conc_gross_le_4_tdr_short_all,
"Conc_Gross_LE_8_TDR_Long_All" AS conc_gross_le_8_tdr_long_all,
"Conc_Gross_LE_8_TDR_Short_All" AS conc_gross_le_8_tdr_short_all,
-- Trader counts
"Traders_Tot_All" AS traders_tot_all,
"Traders_M_Money_Long_All" AS traders_m_money_long_all,
"Traders_M_Money_Short_All" AS traders_m_money_short_all,
"Traders_M_Money_Spread_All" AS traders_m_money_spread_all,
-- Lineage
filename
FROM read_csv(
@cot_glob(),
delim = ',',
encoding = 'utf-8',
compression = 'gzip',
header = true,
union_by_name = true,
filename = true,
all_varchar = true,
max_line_size = 10000000,
ignore_errors = true
)