feat(cot): add combined (futures+options) COT extractor and transform models
- extract/cftc_cot: refactor extract_cot_year() to accept url_template and
landing_subdir params; add _extract_cot() shared loop; add extract_cot_combined()
entry point using com_disagg_txt_{year}.zip → landing/cot_combined/
- pyproject.toml: add extract_cot_combined script entry point
- macros/__init__.py: add @cot_combined_glob() for cot_combined/**/*.csv.gzip
- fct_cot_positioning.sql: union cot_glob and cot_combined_glob in src CTE;
add report_type column (FutOnly_or_Combined) to cast_and_clean + deduplicated;
include FutOnly_or_Combined in hkey to avoid key collisions; add report_type to grain
- obt_cot_positioning.sql: add report_type = 'FutOnly' filter to preserve
existing serving behavior
- obt_cot_positioning_combined.sql: new serving model filtered to report_type =
'Combined'; identical analytics (COT index, net %, windows) on combined data
- pipelines.py: register extract_cot_combined; add to extract_all meta-pipeline
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,7 @@ MODEL (
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (cftc_commodity_code, report_date, cftc_contract_market_code, ingest_date),
|
||||
grain (cftc_commodity_code, report_date, cftc_contract_market_code, ingest_date, report_type),
|
||||
start '2006-06-13',
|
||||
cron '@daily'
|
||||
);
|
||||
@@ -21,6 +21,18 @@ WITH src AS (
|
||||
all_varchar = TRUE,
|
||||
max_line_size = 10000000
|
||||
)
|
||||
UNION ALL BY NAME
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@cot_combined_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE,
|
||||
max_line_size = 10000000
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRIM(market_and_exchange_names) AS market_and_exchange_name, /* Identifiers */
|
||||
@@ -28,6 +40,7 @@ WITH src AS (
|
||||
TRIM(cftc_commodity_code) AS cftc_commodity_code,
|
||||
TRIM(cftc_contract_market_code) AS cftc_contract_market_code,
|
||||
TRIM(contract_units) AS contract_units,
|
||||
TRIM("FutOnly_or_Combined") AS report_type, /* 'FutOnly' or 'Combined' — discriminates the two CFTC report variants */
|
||||
TRY_CAST(open_interest_all AS INT) AS open_interest, /* Open interest */ /* CFTC uses '.' as null for any field — use TRY_CAST throughout */
|
||||
TRY_CAST(prod_merc_positions_long_all AS INT) AS prod_merc_long, /* Producer / Merchant (commercial hedgers: exporters, processors) */
|
||||
TRY_CAST(prod_merc_positions_short_all AS INT) AS prod_merc_short,
|
||||
@@ -66,12 +79,13 @@ WITH src AS (
|
||||
cftc_commodity_code,
|
||||
report_date_as_yyyy_mm_dd,
|
||||
cftc_contract_market_code,
|
||||
"FutOnly_or_Combined",
|
||||
open_interest_all,
|
||||
m_money_positions_long_all,
|
||||
m_money_positions_short_all,
|
||||
prod_merc_positions_long_all,
|
||||
prod_merc_positions_short_all
|
||||
) AS hkey /* Dedup key: hash of business grain + key metrics */
|
||||
) AS hkey /* Dedup key: hash of business grain + key metrics; includes report variant so fut-only and combined rows get distinct keys */
|
||||
FROM src
|
||||
/* Reject rows with null commodity code or malformed date */
|
||||
WHERE
|
||||
@@ -119,6 +133,7 @@ WITH src AS (
|
||||
ANY_VALUE(traders_managed_money_short) AS traders_managed_money_short,
|
||||
ANY_VALUE(traders_managed_money_spread) AS traders_managed_money_spread,
|
||||
ANY_VALUE(ingest_date) AS ingest_date,
|
||||
ANY_VALUE(report_type) AS report_type,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
|
||||
Reference in New Issue
Block a user