refactor(transform): remove raw layer, read landing zone directly

- Delete 6 data raw models (coffee_prices, cot_disaggregated, ice_*,
  psd_data) — pure read_csv passthroughs with no added value
- Move 3 PSD seed models raw/ → seeds/, rename schema raw.* → seeds.*
- Update staging.psdalldata__commodity: read_csv(@psd_glob()) directly,
  join seeds.psd_* instead of raw.psd_*
- Update 5 foundation models: inline read_csv() with src CTE, removing
  raw.* dependency (fct_coffee_prices, fct_cot_positioning, fct_ice_*)
- Remove fixture-based SQLMesh test that depended on raw.cot_disaggregated
  (unit tests incompatible with inline read_csv; integration run covers this)
- Update readme.md: 3-layer architecture (staging/foundation → serving)

Landing files are immutable and content-addressed — the landing directory
is the audit trail. A raw SQL layer duplicated file bytes into DuckDB
with no added value.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-22 17:30:18 +01:00
parent 1814a76e74
commit c3c8333407
18 changed files with 266 additions and 643 deletions

View File

@@ -1,108 +0,0 @@
test_fct_cot_positioning_types_and_net_positions:
model: foundation.fct_cot_positioning
vars:
start: "2024-01-01"
end: "2024-01-31"
inputs:
raw.cot_disaggregated:
rows:
- market_and_exchange_names: "COFFEE C - ICE FUTURES U.S."
report_date_as_yyyy_mm_dd: "2024-01-02"
cftc_commodity_code: "083"
cftc_contract_market_code: "083731"
contract_units: "37,500 POUNDS"
open_interest_all: "250000"
prod_merc_positions_long_all: "80000"
prod_merc_positions_short_all: "90000"
swap_positions_long_all: "30000"
swap_positions_short_all: "35000"
swap_positions_spread_all: "10000"
m_money_positions_long_all: "60000"
m_money_positions_short_all: "40000"
m_money_positions_spread_all: "15000"
other_rept_positions_long_all: "20000"
other_rept_positions_short_all: "18000"
other_rept_positions_spread_all: "5000"
nonrept_positions_long_all: "12000"
nonrept_positions_short_all: "14000"
change_in_open_interest_all: "5000"
change_in_m_money_long_all: "2000"
change_in_m_money_short_all: "-1000"
change_in_prod_merc_long_all: "1000"
change_in_prod_merc_short_all: "500"
conc_gross_le_4_tdr_long_all: "35.5"
conc_gross_le_4_tdr_short_all: "28.3"
conc_gross_le_8_tdr_long_all: "52.1"
conc_gross_le_8_tdr_short_all: "44.7"
traders_tot_all: "450"
traders_m_money_long_all: "85"
traders_m_money_short_all: "62"
traders_m_money_spread_all: "20"
filename: "data/landing/cot/2024/abc123.csv.gzip"
outputs:
partial: true
query:
rows:
- report_date: "2024-01-02"
cftc_commodity_code: "083"
open_interest: 250000
managed_money_long: 60000
managed_money_short: 40000
managed_money_net: 20000
prod_merc_long: 80000
prod_merc_short: 90000
prod_merc_net: -10000
swap_long: 30000
swap_short: 35000
swap_net: -5000
nonreportable_long: 12000
nonreportable_short: 14000
nonreportable_net: -2000
change_managed_money_net: 3000
traders_managed_money_long: 85
traders_managed_money_short: 62
test_fct_cot_positioning_rejects_null_commodity:
model: foundation.fct_cot_positioning
vars:
start: "2024-01-01"
end: "2024-01-31"
inputs:
raw.cot_disaggregated:
rows:
- market_and_exchange_names: "SOME OTHER CONTRACT"
report_date_as_yyyy_mm_dd: "2024-01-02"
cftc_commodity_code: ""
cftc_contract_market_code: "999999"
contract_units: "N/A"
open_interest_all: "1000"
prod_merc_positions_long_all: "500"
prod_merc_positions_short_all: "500"
swap_positions_long_all: "0"
swap_positions_short_all: "0"
swap_positions_spread_all: "0"
m_money_positions_long_all: "0"
m_money_positions_short_all: "0"
m_money_positions_spread_all: "0"
other_rept_positions_long_all: "0"
other_rept_positions_short_all: "0"
other_rept_positions_spread_all: "0"
nonrept_positions_long_all: "0"
nonrept_positions_short_all: "0"
change_in_open_interest_all: "0"
change_in_m_money_long_all: "0"
change_in_m_money_short_all: "0"
change_in_prod_merc_long_all: "0"
change_in_prod_merc_short_all: "0"
conc_gross_le_4_tdr_long_all: "0"
conc_gross_le_4_tdr_short_all: "0"
conc_gross_le_8_tdr_long_all: "0"
conc_gross_le_8_tdr_short_all: "0"
traders_tot_all: "10"
traders_m_money_long_all: "0"
traders_m_money_short_all: "0"
traders_m_money_spread_all: "0"
filename: "data/landing/cot/2024/abc123.csv.gzip"
outputs:
query:
rows: []