refactor: rename materia → beanflows throughout codebase
Some checks failed
CI / test-cli (push) Failing after 5s
CI / test-sqlmesh (push) Failing after 4s
CI / test-web (push) Failing after 5s
CI / tag (push) Has been skipped

- Rename src/materia/ → src/beanflows/ (Python package)
- Rename transform/sqlmesh_materia/ → transform/sqlmesh_beanflows/
- Rename infra/supervisor/materia-supervisor.service → beanflows-supervisor.service
- Rename infra/backup/materia-backup.{service,timer} → beanflows-backup.{service,timer}
- Update all path strings: /opt/materia → /opt/beanflows, /data/materia → /data/beanflows
- Update pyproject.toml: project name, CLI entrypoint, workspace source key
- Update all internal imports from materia.* → beanflows.*
- Update infra scripts: REPO_DIR, service names, systemctl references
- Fix docker-compose.prod.yml: /data/materia → /data/beanflows (bind mount path)

Intentionally left unchanged: Pulumi stack name (materia-infrastructure) and
Hetzner resource names ("materia-key", "managed_by: materia") — these reference
live cloud infrastructure and require separate cloud-side renames.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-28 23:00:52 +01:00
parent 9ea4f09600
commit d14990bb01
58 changed files with 128 additions and 93 deletions

View File

@@ -0,0 +1,9 @@
AUDIT (
name assert_positive_order_ids
);
SELECT
*
FROM @this_model
WHERE
item_id < 0

View File

@@ -0,0 +1,41 @@
# --- Gateway Connection ---
# Single local DuckDB gateway
# Local dev uses virtual environments (e.g., dev_<username>)
# Production uses the 'prod' environment
gateways:
duckdb:
connection:
type: duckdb
catalogs:
local: '{{ env_var("DUCKDB_PATH", "local.duckdb") }}'
default_gateway: duckdb
# --- Variables ---
variables:
LANDING_DIR: '{{ env_var("LANDING_DIR", "data/landing") }}'
# --- Model Defaults ---
# https://sqlmesh.readthedocs.io/en/stable/reference/model_configuration/#model-defaults
model_defaults:
dialect: duckdb
start: 2025-07-07 # Start date for backfill history
cron: '@daily' # Run models daily at 12am UTC (can override per model)
# --- Linting Rules ---
# https://sqlmesh.readthedocs.io/en/stable/guides/linter/
linter:
enabled: true
rules:
# ambiguousorinvalidcolumn removed: sqlglot cannot introspect read_csv() TVF
# schemas at lint time, causing false positives on all raw models. Cross-model
# column validation is handled by SQLMesh at plan time via columns() declarations.
- invalidselectstarexpansion
# --- Default Target Environment ---
# Prevents accidentally applying plans to prod during local development.
# https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#default-target-environment
default_target_environment: dev_{{ user() }}

View File

@@ -0,0 +1 @@
[]

View File

@@ -0,0 +1,63 @@
import os
from sqlmesh import macro
@macro()
def psd_glob(evaluator) -> str:
"""Return a quoted glob path for all PSD CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/psd/**/*.csv.gzip'"
@macro()
def cot_glob(evaluator) -> str:
"""Return a quoted glob path for all COT CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/cot/**/*.csv.gzip'"
@macro()
def cot_combined_glob(evaluator) -> str:
"""Return a quoted glob path for all COT combined (futures+options) CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/cot_combined/**/*.csv.gzip'"
@macro()
def prices_glob(evaluator) -> str:
"""Return a quoted glob path for all coffee price CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/prices/coffee_kc/**/*.csv.gzip'"
@macro()
def ice_stocks_glob(evaluator) -> str:
"""Return a quoted glob path for all ICE warehouse stock CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/ice_stocks/**/*.csv.gzip'"
@macro()
def ice_aging_glob(evaluator) -> str:
"""Return a quoted glob path for all ICE aging report CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/ice_aging/**/*.csv.gzip'"
@macro()
def ice_stocks_by_port_glob(evaluator) -> str:
"""Return a quoted glob path for all ICE historical by-port CSV gzip files under LANDING_DIR."""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/ice_stocks_by_port/**/*.csv.gzip'"
@macro()
def weather_glob(evaluator) -> str:
"""Return a quoted glob path for all OWM weather JSON gzip files under LANDING_DIR.
Pattern: weather/{location_id}/{year}/{date}.json.gz
The double-star catches all location_id subdirectories.
"""
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
return f"'{landing_dir}/weather/**/*.json.gz'"

View File

@@ -0,0 +1,59 @@
MODEL (
name cleaned.psdalldata__commodity_pivoted,
kind INCREMENTAL_BY_TIME_RANGE (
time_column ingest_date
),
start '2006-08-01',
cron '@daily'
);
SELECT
MAX(hkey) AS hkey,
commodity_code,
MAX(commodity_name) AS commodity_name,
country_code,
MAX(country_name) AS country_name,
market_year,
ingest_date,
COALESCE(SUM(CASE WHEN attribute_name = 'Production' THEN value END), 0) AS Production,
COALESCE(SUM(CASE WHEN attribute_name = 'Imports' THEN value END), 0) AS Imports,
COALESCE(SUM(CASE WHEN attribute_name = 'Exports' THEN value END), 0) AS Exports,
COALESCE(SUM(CASE WHEN attribute_name = 'Total Distribution' THEN value END), 0) AS Total_Distribution,
COALESCE(SUM(CASE WHEN attribute_name = 'Ending Stocks' THEN value END), 0) AS Ending_Stocks,
COALESCE(SUM(CASE WHEN attribute_name = 'Beginning Stocks' THEN value END), 0) AS Beginning_Stocks,
COALESCE(SUM(CASE WHEN attribute_name = 'Total Supply' THEN value END), 0) AS Total_Supply,
COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Consumption' THEN value END), 0) AS Domestic_Consumption,
COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Demand' THEN value END), 0) AS Domestic_Demand,
COALESCE(SUM(CASE WHEN attribute_name = 'Food Use' THEN value END), 0) AS Food_Use,
COALESCE(SUM(CASE WHEN attribute_name = 'Industrial Use' THEN value END), 0) AS Industrial_Use,
COALESCE(SUM(CASE WHEN attribute_name = 'Seed Use' THEN value END), 0) AS Seed_Use,
COALESCE(SUM(CASE WHEN attribute_name = 'Waste' THEN value END), 0) AS Waste,
COALESCE(SUM(CASE WHEN attribute_name = 'Feed Use' THEN value END), 0) AS Feed_Use
FROM staging.psdalldata__commodity
WHERE
attribute_name IN (
'Production',
'Imports',
'Exports',
'Total Distribution',
'Ending Stocks',
'Beginning Stocks',
'Total Supply',
'Domestic Consumption',
'Domestic Demand',
'Food Use',
'Industrial Use',
'Seed Use',
'Waste',
'Feed Use'
)
GROUP BY
commodity_code,
country_code,
market_year,
ingest_date
ORDER BY
commodity_code,
country_code,
market_year,
ingest_date

View File

@@ -0,0 +1,15 @@
/* Commodity dimension: conforms identifiers across source systems. */ /* This is the ontology. Each row is a commodity tracked by BeanFlows. */ /* As new sources are added (ICO, futures prices, satellite), their */ /* commodity identifiers are added as columns here — not as separate tables. */ /* As new commodities are added (cocoa, sugar), rows are added here. */ /* References: */ /* usda_commodity_code → staging.psdalldata__commodity.commodity_code (numeric string, e.g. '0711100') */ /* cftc_commodity_code → foundation.fct_cot_positioning.cftc_commodity_code (3-char, e.g. '083') */ /* NOTE: Defined as FULL model (not SEED) to guarantee leading-zero preservation. */ /* Pandas CSV loading converts '083' → 83 even with varchar column declarations. */
MODEL (
name foundation.dim_commodity,
kind FULL
);
SELECT
usda_commodity_code,
cftc_commodity_code,
ticker,
ice_stock_report_code,
commodity_name,
commodity_group
FROM (VALUES
('0711100', '083', 'KC=F', 'COFFEE-C', 'Coffee, Green', 'Softs')) AS t(usda_commodity_code, cftc_commodity_code, ticker, ice_stock_report_code, commodity_name, commodity_group)

View File

@@ -0,0 +1,58 @@
/* Foundation fact: daily KC=F Coffee C futures prices. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Covers all available history from the landing directory. */ /* Grain: one row per trade_date. */ /* Dedup: hash of (trade_date, close) — if Yahoo Finance corrects a price, */ /* the new hash triggers a re-ingest on the next incremental run. */
MODEL (
name foundation.fct_coffee_prices,
kind INCREMENTAL_BY_TIME_RANGE (
time_column trade_date
),
grain (
trade_date
),
start '1971-08-16',
cron '@daily'
);
WITH src AS (
SELECT
*
FROM READ_CSV(
@prices_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE
)
), cast_and_clean AS (
SELECT
TRY_CAST(Date AS DATE) AS trade_date,
TRY_CAST(Open AS DOUBLE) AS open,
TRY_CAST(High AS DOUBLE) AS high,
TRY_CAST(Low AS DOUBLE) AS low,
TRY_CAST(Close AS DOUBLE) AS close,
TRY_CAST("Adj Close" AS DOUBLE) AS adj_close,
TRY_CAST(Volume AS BIGINT) AS volume,
filename AS source_file, /* Filename encodes the content hash — use as ingest identifier */
HASH(Date, Close) AS hkey /* Dedup key: trade date + close price */
FROM src
WHERE
NOT TRY_CAST(Date AS DATE) IS NULL AND NOT TRY_CAST(Close AS DOUBLE) IS NULL
), deduplicated AS (
SELECT
ANY_VALUE(trade_date) AS trade_date,
ANY_VALUE(open) AS open,
ANY_VALUE(high) AS high,
ANY_VALUE(low) AS low,
ANY_VALUE(close) AS close,
ANY_VALUE(adj_close) AS adj_close,
ANY_VALUE(volume) AS volume,
ANY_VALUE(source_file) AS source_file,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
*
FROM deduplicated
WHERE
trade_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,146 @@
/* Foundation fact: CFTC COT positioning, weekly grain, all commodities. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* cleans column names, computes net positions (long - short) per trader category, */ /* and deduplicates via hash key. Covers all commodities — filtering to */ /* a specific commodity happens in the serving layer. */ /* Grain: one row per (cftc_commodity_code, report_date, cftc_contract_market_code) */ /* History: revisions appear as new rows with a later ingest_date. */ /* Serving layer picks max(ingest_date) per grain for latest view. */
MODEL (
name foundation.fct_cot_positioning,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (cftc_commodity_code, report_date, cftc_contract_market_code, ingest_date, report_type),
start '2006-06-13',
cron '@daily'
);
WITH src AS (
SELECT
*
FROM READ_CSV(
@cot_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE,
max_line_size = 10000000
)
UNION ALL BY NAME
SELECT
*
FROM READ_CSV(
@cot_combined_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE,
max_line_size = 10000000
)
), cast_and_clean AS (
SELECT
TRIM(market_and_exchange_names) AS market_and_exchange_name, /* Identifiers */
"Report_Date_as_YYYY-MM-DD"::DATE AS report_date,
TRIM(cftc_commodity_code) AS cftc_commodity_code,
TRIM(cftc_contract_market_code) AS cftc_contract_market_code,
TRIM(contract_units) AS contract_units,
TRIM("FutOnly_or_Combined") AS report_type, /* 'FutOnly' or 'Combined' — discriminates the two CFTC report variants */
TRY_CAST(open_interest_all AS INT) AS open_interest, /* Open interest */ /* CFTC uses '.' as null for any field — use TRY_CAST throughout */
TRY_CAST(prod_merc_positions_long_all AS INT) AS prod_merc_long, /* Producer / Merchant (commercial hedgers: exporters, processors) */
TRY_CAST(prod_merc_positions_short_all AS INT) AS prod_merc_short,
TRY_CAST(swap_positions_long_all AS INT) AS swap_long, /* Swap dealers */
TRY_CAST("Swap__Positions_Short_All" AS INT) AS swap_short,
TRY_CAST("Swap__Positions_Spread_All" AS INT) AS swap_spread,
TRY_CAST(m_money_positions_long_all AS INT) AS managed_money_long, /* Managed money (hedge funds, CTAs — the primary speculative signal) */
TRY_CAST(m_money_positions_short_all AS INT) AS managed_money_short,
TRY_CAST(m_money_positions_spread_all AS INT) AS managed_money_spread,
TRY_CAST(other_rept_positions_long_all AS INT) AS other_reportable_long, /* Other reportables */
TRY_CAST(other_rept_positions_short_all AS INT) AS other_reportable_short,
TRY_CAST(other_rept_positions_spread_all AS INT) AS other_reportable_spread,
TRY_CAST(nonrept_positions_long_all AS INT) AS nonreportable_long, /* Non-reportable (small speculators, below reporting threshold) */
TRY_CAST(nonrept_positions_short_all AS INT) AS nonreportable_short,
TRY_CAST(prod_merc_positions_long_all AS INT) /* Net positions (long minus short per category) */ - TRY_CAST(prod_merc_positions_short_all AS INT) AS prod_merc_net,
TRY_CAST(m_money_positions_long_all AS INT) - TRY_CAST(m_money_positions_short_all AS INT) AS managed_money_net,
TRY_CAST(swap_positions_long_all AS INT) - TRY_CAST("Swap__Positions_Short_All" AS INT) AS swap_net,
TRY_CAST(other_rept_positions_long_all AS INT) - TRY_CAST(other_rept_positions_short_all AS INT) AS other_reportable_net,
TRY_CAST(nonrept_positions_long_all AS INT) - TRY_CAST(nonrept_positions_short_all AS INT) AS nonreportable_net,
TRY_CAST(change_in_open_interest_all AS INT) AS change_open_interest, /* Week-over-week changes */
TRY_CAST(change_in_m_money_long_all AS INT) AS change_managed_money_long,
TRY_CAST(change_in_m_money_short_all AS INT) AS change_managed_money_short,
TRY_CAST(change_in_m_money_long_all AS INT) - TRY_CAST(change_in_m_money_short_all AS INT) AS change_managed_money_net,
TRY_CAST(change_in_prod_merc_long_all AS INT) AS change_prod_merc_long,
TRY_CAST(change_in_prod_merc_short_all AS INT) AS change_prod_merc_short,
TRY_CAST(conc_gross_le_4_tdr_long_all AS REAL) AS concentration_top4_long_pct, /* Concentration ratios (% of OI held by top 4 / top 8 traders) */
TRY_CAST(conc_gross_le_4_tdr_short_all AS REAL) AS concentration_top4_short_pct,
TRY_CAST(conc_gross_le_8_tdr_long_all AS REAL) AS concentration_top8_long_pct,
TRY_CAST(conc_gross_le_8_tdr_short_all AS REAL) AS concentration_top8_short_pct,
TRY_CAST(traders_tot_all AS INT) AS traders_total, /* Trader counts */
TRY_CAST(traders_m_money_long_all AS INT) AS traders_managed_money_long,
TRY_CAST(traders_m_money_short_all AS INT) AS traders_managed_money_short,
TRY_CAST(traders_m_money_spread_all AS INT) AS traders_managed_money_spread,
MAKE_DATE(STR_SPLIT(filename, '/')[-2]::INT, 1, 1) AS ingest_date, /* Ingest date: derived from landing path year directory */ /* Path: .../cot/{year}/{etag}.csv.gzip → extract year from [-2] */
HASH(
cftc_commodity_code,
"Report_Date_as_YYYY-MM-DD",
cftc_contract_market_code,
"FutOnly_or_Combined",
open_interest_all,
m_money_positions_long_all,
m_money_positions_short_all,
prod_merc_positions_long_all,
prod_merc_positions_short_all
) AS hkey /* Dedup key: hash of business grain + key metrics; includes report variant so fut-only and combined rows get distinct keys */
FROM src
/* Reject rows with null commodity code or malformed date */
WHERE
NOT TRIM(cftc_commodity_code) IS NULL
AND LENGTH(TRIM(cftc_commodity_code)) > 0
AND NOT "Report_Date_as_YYYY-MM-DD"::DATE IS NULL
), deduplicated AS (
SELECT
ANY_VALUE(market_and_exchange_name) AS market_and_exchange_name,
ANY_VALUE(report_date) AS report_date,
ANY_VALUE(cftc_commodity_code) AS cftc_commodity_code,
ANY_VALUE(cftc_contract_market_code) AS cftc_contract_market_code,
ANY_VALUE(contract_units) AS contract_units,
ANY_VALUE(open_interest) AS open_interest,
ANY_VALUE(prod_merc_long) AS prod_merc_long,
ANY_VALUE(prod_merc_short) AS prod_merc_short,
ANY_VALUE(prod_merc_net) AS prod_merc_net,
ANY_VALUE(swap_long) AS swap_long,
ANY_VALUE(swap_short) AS swap_short,
ANY_VALUE(swap_spread) AS swap_spread,
ANY_VALUE(swap_net) AS swap_net,
ANY_VALUE(managed_money_long) AS managed_money_long,
ANY_VALUE(managed_money_short) AS managed_money_short,
ANY_VALUE(managed_money_spread) AS managed_money_spread,
ANY_VALUE(managed_money_net) AS managed_money_net,
ANY_VALUE(other_reportable_long) AS other_reportable_long,
ANY_VALUE(other_reportable_short) AS other_reportable_short,
ANY_VALUE(other_reportable_spread) AS other_reportable_spread,
ANY_VALUE(other_reportable_net) AS other_reportable_net,
ANY_VALUE(nonreportable_long) AS nonreportable_long,
ANY_VALUE(nonreportable_short) AS nonreportable_short,
ANY_VALUE(nonreportable_net) AS nonreportable_net,
ANY_VALUE(change_open_interest) AS change_open_interest,
ANY_VALUE(change_managed_money_long) AS change_managed_money_long,
ANY_VALUE(change_managed_money_short) AS change_managed_money_short,
ANY_VALUE(change_managed_money_net) AS change_managed_money_net,
ANY_VALUE(change_prod_merc_long) AS change_prod_merc_long,
ANY_VALUE(change_prod_merc_short) AS change_prod_merc_short,
ANY_VALUE(concentration_top4_long_pct) AS concentration_top4_long_pct,
ANY_VALUE(concentration_top4_short_pct) AS concentration_top4_short_pct,
ANY_VALUE(concentration_top8_long_pct) AS concentration_top8_long_pct,
ANY_VALUE(concentration_top8_short_pct) AS concentration_top8_short_pct,
ANY_VALUE(traders_total) AS traders_total,
ANY_VALUE(traders_managed_money_long) AS traders_managed_money_long,
ANY_VALUE(traders_managed_money_short) AS traders_managed_money_short,
ANY_VALUE(traders_managed_money_spread) AS traders_managed_money_spread,
ANY_VALUE(ingest_date) AS ingest_date,
ANY_VALUE(report_type) AS report_type,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
*
FROM deduplicated
WHERE
report_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,62 @@
/* Foundation fact: ICE certified Coffee C (Arabica) aging report. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Grain: one row per (report_date, age_bucket). */ /* Age buckets represent how long coffee has been in certified storage. */ /* Port columns are in bags (60kg). */
MODEL (
name foundation.fct_ice_aging_stocks,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (report_date, age_bucket),
start '2020-01-01',
cron '@daily'
);
WITH src AS (
SELECT
*
FROM READ_CSV(
@ice_aging_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE
)
), cast_and_clean AS (
SELECT
TRY_CAST(report_date AS DATE) AS report_date,
age_bucket,
TRY_CAST(antwerp_bags AS BIGINT) AS antwerp_bags,
TRY_CAST(hamburg_bremen_bags AS BIGINT) AS hamburg_bremen_bags,
TRY_CAST(houston_bags AS BIGINT) AS houston_bags,
TRY_CAST(miami_bags AS BIGINT) AS miami_bags,
TRY_CAST(new_orleans_bags AS BIGINT) AS new_orleans_bags,
TRY_CAST(new_york_bags AS BIGINT) AS new_york_bags,
TRY_CAST(total_bags AS BIGINT) AS total_bags,
filename AS source_file,
HASH(report_date, age_bucket, total_bags) AS hkey
FROM src
WHERE
NOT TRY_CAST(report_date AS DATE) IS NULL
AND NOT age_bucket IS NULL
AND age_bucket <> ''
), deduplicated AS (
SELECT
ANY_VALUE(report_date) AS report_date,
ANY_VALUE(age_bucket) AS age_bucket,
ANY_VALUE(antwerp_bags) AS antwerp_bags,
ANY_VALUE(hamburg_bremen_bags) AS hamburg_bremen_bags,
ANY_VALUE(houston_bags) AS houston_bags,
ANY_VALUE(miami_bags) AS miami_bags,
ANY_VALUE(new_orleans_bags) AS new_orleans_bags,
ANY_VALUE(new_york_bags) AS new_york_bags,
ANY_VALUE(total_bags) AS total_bags,
ANY_VALUE(source_file) AS source_file,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
*
FROM deduplicated
WHERE
report_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,51 @@
/* Foundation fact: ICE certified Coffee C (Arabica) warehouse stocks. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* "Certified" means Coffee C graded and stamped as delivery-eligible */ /* against ICE futures contracts — a key physical supply indicator. */ /* Grain: one row per report_date. */
MODEL (
name foundation.fct_ice_warehouse_stocks,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '2000-01-01',
cron '@daily'
);
WITH src AS (
SELECT
*
FROM READ_CSV(
@ice_stocks_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE
)
), cast_and_clean AS (
SELECT
TRY_CAST(report_date AS DATE) AS report_date,
TRY_CAST(total_certified_bags AS BIGINT) AS total_certified_bags,
TRY_CAST(pending_grading_bags AS BIGINT) AS pending_grading_bags,
filename AS source_file,
HASH(report_date, total_certified_bags) AS hkey /* Dedup key: report date + total bags */
FROM src
WHERE
NOT TRY_CAST(report_date AS DATE) IS NULL
AND NOT TRY_CAST(total_certified_bags AS BIGINT) IS NULL
), deduplicated AS (
SELECT
ANY_VALUE(report_date) AS report_date,
ANY_VALUE(total_certified_bags) AS total_certified_bags,
ANY_VALUE(pending_grading_bags) AS pending_grading_bags,
ANY_VALUE(source_file) AS source_file,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
*
FROM deduplicated
WHERE
report_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,65 @@
/* Foundation fact: ICE historical end-of-month Coffee C certified warehouse stocks by port. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Covers November 1996 to present (30-year history). */ /* Grain: one row per report_date (end-of-month). */ /* Port columns are in bags (60kg). */
MODEL (
name foundation.fct_ice_warehouse_stocks_by_port,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '1996-11-01',
cron '@daily'
);
WITH src AS (
SELECT
*
FROM READ_CSV(
@ice_stocks_by_port_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE
)
), cast_and_clean AS (
SELECT
TRY_CAST(report_date AS DATE) AS report_date,
TRY_CAST(new_york_bags AS BIGINT) AS new_york_bags,
TRY_CAST(new_orleans_bags AS BIGINT) AS new_orleans_bags,
TRY_CAST(houston_bags AS BIGINT) AS houston_bags,
TRY_CAST(miami_bags AS BIGINT) AS miami_bags,
TRY_CAST(antwerp_bags AS BIGINT) AS antwerp_bags,
TRY_CAST(hamburg_bremen_bags AS BIGINT) AS hamburg_bremen_bags,
TRY_CAST(barcelona_bags AS BIGINT) AS barcelona_bags,
TRY_CAST(virginia_bags AS BIGINT) AS virginia_bags,
TRY_CAST(total_bags AS BIGINT) AS total_bags,
filename AS source_file,
HASH(report_date, total_bags) AS hkey
FROM src
WHERE
NOT TRY_CAST(report_date AS DATE) IS NULL
AND NOT TRY_CAST(total_bags AS BIGINT) IS NULL
), deduplicated AS (
SELECT
ANY_VALUE(report_date) AS report_date,
ANY_VALUE(new_york_bags) AS new_york_bags,
ANY_VALUE(new_orleans_bags) AS new_orleans_bags,
ANY_VALUE(houston_bags) AS houston_bags,
ANY_VALUE(miami_bags) AS miami_bags,
ANY_VALUE(antwerp_bags) AS antwerp_bags,
ANY_VALUE(hamburg_bremen_bags) AS hamburg_bremen_bags,
ANY_VALUE(barcelona_bags) AS barcelona_bags,
ANY_VALUE(virginia_bags) AS virginia_bags,
ANY_VALUE(total_bags) AS total_bags,
ANY_VALUE(source_file) AS source_file,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
*
FROM deduplicated
WHERE
report_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,125 @@
/* Foundation fact: daily weather observations for 12 coffee-growing regions. */
/* Source: Open-Meteo (ERA5 reanalysis archive + forecast model for recent days) */
/* Landing: LANDING_DIR/weather/{location_id}/{year}/{date}.json.gz */
/* One file per (location_id, date). Content: flat Open-Meteo JSON per day. */
/* Open-Meteo returns parallel arrays; execute.py splits them into per-day files. */
/* Grain: (location_id, observation_date) — one row per location per day. */
/* Dedup key: hash(location_id, date) — past weather is immutable. */
/* location_id is parsed from filename: split(filename, '/')[-3] */
/* Path structure: .../weather/{location_id}/{year}/{date}.json.gz */
/* Crop stress flags: */
/* is_frost — temp_min_c < 2.0°C (ICO Arabica frost damage threshold) */
/* is_heat_stress — temp_max_c > 35.0°C (photosynthesis impairment) */
/* is_drought — precipitation_mm < 1.0 (agronomic dry day) */
/* is_high_vpd — vpd_max_kpa > 1.5 (significant plant water stress) */
/* in_growing_season — simplified month-range flag by variety */
MODEL (
name foundation.fct_weather_daily,
kind INCREMENTAL_BY_TIME_RANGE (
time_column observation_date
),
grain (location_id, observation_date),
start '2020-01-01',
cron '@daily'
);
WITH src AS (
/* Open-Meteo files are flat JSON: all variables at top level (no nested structs). */
/* read_json(format='auto') infers column types directly from the numeric values. */
SELECT
*
FROM READ_JSON(@weather_glob(), format = 'auto', compression = 'gzip', filename = TRUE)
), located AS (
SELECT
src.*,
STR_SPLIT(filename, '/')[-3] AS location_id, /* location_id is the 3rd-from-last path segment: */
/* e.g. .../weather/brazil_minas_gerais/2024/2024-01-15.json.gz → 'brazil_minas_gerais' */
TRY_CAST(src."date" AS DATE) AS observation_date
FROM src
), cast_and_clean AS (
SELECT
location_id,
observation_date,
/* Temperature (°C) */
TRY_CAST(located.temperature_2m_min AS DOUBLE) AS temp_min_c,
TRY_CAST(located.temperature_2m_max AS DOUBLE) AS temp_max_c,
TRY_CAST(located.temperature_2m_mean AS DOUBLE) AS temp_mean_c,
/* Precipitation (mm total for the day) */
COALESCE(TRY_CAST(located.precipitation_sum AS DOUBLE), 0.0) AS precipitation_mm,
/* Humidity (% — daily max) */
TRY_CAST(located.relative_humidity_2m_max AS DOUBLE) AS humidity_max_pct,
/* Cloud cover (% — daily mean) */
TRY_CAST(located.cloud_cover_mean AS DOUBLE) AS cloud_cover_mean_pct,
/* Wind (m/s max — Open-Meteo requested with wind_speed_unit=ms) */
TRY_CAST(located.wind_speed_10m_max AS DOUBLE) AS wind_max_speed_ms,
/* ET₀ (mm/day — FAO Penman-Monteith; direct crop water demand signal) */
TRY_CAST(located.et0_fao_evapotranspiration AS DOUBLE) AS et0_mm,
/* VPD (kPa — max; >1.5 kPa = significant plant water stress) */
TRY_CAST(located.vapour_pressure_deficit_max AS DOUBLE) AS vpd_max_kpa,
/* Crop stress flags */
TRY_CAST(located.temperature_2m_min AS DOUBLE) < 2.0 AS is_frost,
TRY_CAST(located.temperature_2m_max AS DOUBLE) > 35.0 AS is_heat_stress,
COALESCE(TRY_CAST(located.precipitation_sum AS DOUBLE), 0.0) < 1.0 AS is_drought,
TRY_CAST(located.vapour_pressure_deficit_max AS DOUBLE) > 1.5 AS is_high_vpd,
HASH(location_id, located."date") AS hkey,
filename
FROM located
WHERE
NOT observation_date IS NULL AND NOT location_id IS NULL AND location_id <> ''
), deduplicated AS (
SELECT
ANY_VALUE(location_id) AS location_id,
ANY_VALUE(observation_date) AS observation_date,
ANY_VALUE(temp_min_c) AS temp_min_c,
ANY_VALUE(temp_max_c) AS temp_max_c,
ANY_VALUE(temp_mean_c) AS temp_mean_c,
ANY_VALUE(precipitation_mm) AS precipitation_mm,
ANY_VALUE(humidity_max_pct) AS humidity_max_pct,
ANY_VALUE(cloud_cover_mean_pct) AS cloud_cover_mean_pct,
ANY_VALUE(wind_max_speed_ms) AS wind_max_speed_ms,
ANY_VALUE(et0_mm) AS et0_mm,
ANY_VALUE(vpd_max_kpa) AS vpd_max_kpa,
ANY_VALUE(is_frost) AS is_frost,
ANY_VALUE(is_heat_stress) AS is_heat_stress,
ANY_VALUE(is_drought) AS is_drought,
ANY_VALUE(is_high_vpd) AS is_high_vpd,
hkey
FROM cast_and_clean
GROUP BY
hkey
)
SELECT
d.observation_date,
d.location_id,
loc.name AS location_name,
loc.country,
loc.lat,
loc.lon,
loc.variety,
d.temp_min_c,
d.temp_max_c,
d.temp_mean_c,
d.precipitation_mm,
d.humidity_max_pct,
d.cloud_cover_mean_pct,
d.wind_max_speed_ms,
d.et0_mm,
d.vpd_max_kpa,
d.is_frost,
d.is_heat_stress,
d.is_drought,
d.is_high_vpd,
CASE loc.variety
WHEN 'Arabica'
THEN EXTRACT(MONTH FROM d.observation_date) BETWEEN 4 AND 10
WHEN 'Robusta'
THEN EXTRACT(MONTH FROM d.observation_date) BETWEEN 4 AND 11
ELSE FALSE
END AS in_growing_season /* Growing season: simplified month-range flag by variety. */
/* Arabica: AprOct (covers northern + southern hemisphere risk windows). */
/* Robusta: AprNov (Vietnam/Indonesia main cycle). */
FROM deduplicated AS d
LEFT JOIN seeds.weather_locations AS loc
ON d.location_id = loc.location_id
WHERE
d.observation_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,7 @@
MODEL (
name seeds.psd_attribute_codes,
kind SEED (
path '$root/seeds/psd_attribute_codes.csv',
csv_settings (delimiter = ';')
)
)

View File

@@ -0,0 +1,7 @@
MODEL (
name seeds.psd_commodity_codes,
kind SEED (
path '$root/seeds/psd_commodity_codes.csv',
csv_settings (delimiter = ';')
)
)

View File

@@ -0,0 +1,7 @@
MODEL (
name seeds.psd_unit_of_measure_codes,
kind SEED (
path '$root/seeds/psd_unit_of_measure_codes.csv',
csv_settings (delimiter = ';')
)
)

View File

@@ -0,0 +1,7 @@
MODEL (
name seeds.weather_locations,
kind SEED (
path '$root/seeds/weather_locations.csv',
csv_settings (delimiter = ';')
)
)

View File

@@ -0,0 +1,63 @@
/* Serving mart: KC=F Coffee C futures prices, analytics-ready. */ /* Adds moving averages (20-day, 50-day SMA) and 52-week high/low range. */ /* Filtered to trading days only (NULL close rows excluded upstream). */ /* Grain: one row per trade_date. */
MODEL (
name serving.coffee_prices,
kind INCREMENTAL_BY_TIME_RANGE (
time_column trade_date
),
grain (
trade_date
),
start '1971-08-16',
cron '@daily'
);
WITH base AS (
SELECT
f.trade_date,
f.open,
f.high,
f.low,
f.close,
f.adj_close,
f.volume,
ROUND(
(
f.close - LAG(f.close, 1) OVER (ORDER BY f.trade_date)
) / NULLIF(LAG(f.close, 1) OVER (ORDER BY f.trade_date), 0) * 100,
4
) AS daily_return_pct, /* Daily return: (close - prev_close) / prev_close * 100 */
ROUND(
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 19 PRECEDING AND CURRENT ROW),
4
) AS sma_20d, /* 20-day simple moving average (1 trading month) */
ROUND(
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 49 PRECEDING AND CURRENT ROW),
4
) AS sma_50d, /* 50-day simple moving average (2.5 trading months) */
MAX(f.high) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW) AS high_52w, /* 52-week high (approximately 252 trading days) */
MIN(f.low) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW) AS low_52w /* 52-week low */
FROM foundation.fct_coffee_prices AS f
WHERE
f.trade_date BETWEEN @start_ds AND @end_ds
)
SELECT
b.trade_date,
d.commodity_name,
d.ticker,
b.open,
b.high,
b.low,
b.close,
b.adj_close,
b.volume,
b.daily_return_pct,
b.sma_20d,
b.sma_50d,
b.high_52w,
b.low_52w
FROM base AS b
CROSS JOIN foundation.dim_commodity AS d
WHERE
d.ticker = 'KC=F'
ORDER BY
b.trade_date

View File

@@ -0,0 +1,51 @@
/* Serving mart: ICE certified Coffee C stock aging report, analytics-ready. */ /* Shows the age distribution of certified stocks across delivery ports. */ /* Age buckets represent how long coffee has been in certified storage. */ /* Older stock approaching certificate limits is a supply quality signal. */ /* Source: ICE Certified Stock Aging Report (monthly) */ /* Grain: one row per (report_date, age_bucket). */
MODEL (
name serving.ice_aging_stocks,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (report_date, age_bucket),
start '2020-01-01',
cron '@daily'
);
WITH base AS (
SELECT
f.report_date,
f.age_bucket,
TRY_CAST(SPLIT_PART(f.age_bucket, ' to ', 1) AS INT) AS age_bucket_start_days, /* Parse age range from "0000 to 0120" format for correct sort order */
TRY_CAST(SPLIT_PART(f.age_bucket, ' to ', 2) AS INT) AS age_bucket_end_days,
f.antwerp_bags,
f.hamburg_bremen_bags,
f.houston_bags,
f.miami_bags,
f.new_orleans_bags,
f.new_york_bags,
f.total_bags,
f.source_file
FROM foundation.fct_ice_aging_stocks AS f
WHERE
f.report_date BETWEEN @start_ds AND @end_ds
)
SELECT
b.report_date,
d.commodity_name,
d.ice_stock_report_code,
b.age_bucket,
b.age_bucket_start_days,
b.age_bucket_end_days,
b.antwerp_bags,
b.hamburg_bremen_bags,
b.houston_bags,
b.miami_bags,
b.new_orleans_bags,
b.new_york_bags,
b.total_bags,
b.source_file
FROM base AS b
CROSS JOIN foundation.dim_commodity AS d
WHERE
d.ice_stock_report_code = 'COFFEE-C'
ORDER BY
b.report_date,
b.age_bucket_start_days

View File

@@ -0,0 +1,53 @@
/* Serving mart: ICE certified Coffee C warehouse stocks, analytics-ready. */ /* Adds 30-day rolling average, week-over-week change, and drawdown from */ /* 52-week high. Physical supply indicator used alongside S/D and positioning. */ /* "Certified stocks" = coffee graded and stamped as eligible for delivery */ /* against ICE Coffee C futures — traders watch this as a squeeze indicator. */ /* Grain: one row per report_date. */
MODEL (
name serving.ice_warehouse_stocks,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '2000-01-01',
cron '@daily'
);
WITH base AS (
SELECT
f.report_date,
f.total_certified_bags,
f.pending_grading_bags,
f.total_certified_bags /* Week-over-week change (compare to 7 calendar days ago via LAG over ordered rows) */ /* Using LAG(1) since data is daily: compares to previous trading/reporting day */ - LAG(f.total_certified_bags, 1) OVER (ORDER BY f.report_date) AS wow_change_bags,
ROUND(
AVG(f.total_certified_bags::DOUBLE) OVER (ORDER BY f.report_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW),
0
) AS avg_30d_bags, /* 30-day rolling average (smooths daily noise) */
MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW) AS high_52w_bags, /* 52-week high (365 calendar days ≈ 252 trading days; use 365-row window as proxy) */
ROUND(
(
f.total_certified_bags::DOUBLE - MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW)::DOUBLE
) / NULLIF(
MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW)::DOUBLE,
0
) * 100,
2
) AS drawdown_from_52w_high_pct /* Drawdown from 52-week high (pct below peak — squeeze indicator) */
FROM foundation.fct_ice_warehouse_stocks AS f
WHERE
f.report_date BETWEEN @start_ds AND @end_ds
)
SELECT
b.report_date,
d.commodity_name,
d.ice_stock_report_code,
b.total_certified_bags,
b.pending_grading_bags,
b.wow_change_bags,
b.avg_30d_bags,
b.high_52w_bags,
b.drawdown_from_52w_high_pct
FROM base AS b
CROSS JOIN foundation.dim_commodity AS d
WHERE
d.ice_stock_report_code = 'COFFEE-C'
ORDER BY
b.report_date

View File

@@ -0,0 +1,64 @@
/* Serving mart: ICE certified Coffee C warehouse stocks by port, analytics-ready. */ /* End-of-month certified stock levels broken down by delivery port. */ /* Covers November 1996 to present (~30 years). Useful for understanding */ /* geographic shifts in the certified supply base over time. */ /* Source: ICE historical by-port XLS (EOM_KC_cert_stox_by_port_nov96-present.xls) */ /* Grain: one row per report_date (end-of-month). */
MODEL (
name serving.ice_warehouse_stocks_by_port,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '1996-11-01',
cron '@daily'
);
WITH base AS (
SELECT
f.report_date,
f.new_york_bags,
f.new_orleans_bags,
f.houston_bags,
f.miami_bags,
f.antwerp_bags,
f.hamburg_bremen_bags,
f.barcelona_bags,
f.virginia_bags,
f.total_bags,
f.total_bags /* Month-over-month change in total certified bags */ - LAG(f.total_bags, 1) OVER (ORDER BY f.report_date) AS mom_change_bags,
ROUND(
(
f.total_bags::DOUBLE - LAG(f.total_bags, 1) OVER (ORDER BY f.report_date)::DOUBLE
) / NULLIF(LAG(f.total_bags, 1) OVER (ORDER BY f.report_date)::DOUBLE, 0) * 100,
2
) AS mom_change_pct, /* Month-over-month percent change */
ROUND(
AVG(f.total_bags::DOUBLE) OVER (ORDER BY f.report_date ROWS BETWEEN 11 PRECEDING AND CURRENT ROW),
0
) AS avg_12m_bags, /* 12-month rolling average */
f.source_file
FROM foundation.fct_ice_warehouse_stocks_by_port AS f
WHERE
f.report_date BETWEEN @start_ds AND @end_ds
)
SELECT
b.report_date,
d.commodity_name,
d.ice_stock_report_code,
b.new_york_bags,
b.new_orleans_bags,
b.houston_bags,
b.miami_bags,
b.antwerp_bags,
b.hamburg_bremen_bags,
b.barcelona_bags,
b.virginia_bags,
b.total_bags,
b.mom_change_bags,
b.mom_change_pct,
b.avg_12m_bags,
b.source_file
FROM base AS b
CROSS JOIN foundation.dim_commodity AS d
WHERE
d.ice_stock_report_code = 'COFFEE-C'
ORDER BY
b.report_date

View File

@@ -0,0 +1,126 @@
MODEL (
name serving.commodity_metrics,
kind INCREMENTAL_BY_TIME_RANGE (
time_column ingest_date
),
start '2006-08-01',
cron '@daily'
);
/* CTE to calculate country-level derived metrics */
WITH country_metrics AS (
SELECT
commodity_code,
commodity_name,
country_code,
country_name,
market_year,
ingest_date,
Production,
Imports,
Exports,
Total_Distribution,
Ending_Stocks,
(
Production + Imports - Exports
) AS Net_Supply, /* Derived metrics per country, mirroring Python script */
(
Exports - Imports
) AS Trade_Balance,
(
Production + Imports - Exports
) - Total_Distribution AS Supply_Demand_Balance,
(
Ending_Stocks / NULLIF(Total_Distribution, 0)
) /* Handle division by zero for Stock-to-Use Ratio */ * 100 AS Stock_to_Use_Ratio_pct,
(
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date)
) /* Calculate Production YoY percentage change using a window function */ / NULLIF(
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date),
0
) * 100 AS Production_YoY_pct
FROM cleaned.psdalldata__commodity_pivoted
), global_aggregates AS (
SELECT
commodity_code,
commodity_name,
NULL::TEXT AS country_code, /* Use NULL for global aggregates */
'Global' AS country_name,
market_year,
ingest_date,
SUM(Production) AS Production,
SUM(Imports) AS Imports,
SUM(Exports) AS Exports,
SUM(Total_Distribution) AS Total_Distribution,
SUM(Ending_Stocks) AS Ending_Stocks
FROM cleaned.psdalldata__commodity_pivoted
GROUP BY
commodity_code,
commodity_name,
market_year,
ingest_date
), global_metrics /* CTE to calculate derived metrics for global aggregates */ AS (
SELECT
commodity_code,
commodity_name,
country_code,
country_name,
market_year,
ingest_date,
Production,
Imports,
Exports,
Total_Distribution,
Ending_Stocks,
(
Production + Imports - Exports
) AS Net_Supply,
(
Exports - Imports
) AS Trade_Balance,
(
Production + Imports - Exports
) - Total_Distribution AS Supply_Demand_Balance,
(
Ending_Stocks / NULLIF(Total_Distribution, 0)
) * 100 AS Stock_to_Use_Ratio_pct,
(
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date)
) / NULLIF(
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date),
0
) * 100 AS Production_YoY_pct
FROM global_aggregates
)
/* Combine country-level and global-level data into a single output */
SELECT
commodity_code,
commodity_name,
country_code,
country_name,
market_year,
ingest_date,
Production,
Imports,
Exports,
Total_Distribution,
Ending_Stocks,
Net_Supply,
Trade_Balance,
Supply_Demand_Balance,
Stock_to_Use_Ratio_pct,
Production_YoY_pct
FROM (
SELECT
*
FROM country_metrics
UNION ALL
SELECT
*
FROM global_metrics
) AS combined_data
ORDER BY
commodity_name,
country_name,
market_year,
ingest_date

View File

@@ -0,0 +1,148 @@
/* Serving mart: COT positioning for Coffee C futures, analytics-ready. */ /* Joins foundation.fct_cot_positioning with foundation.dim_commodity so */ /* the coffee filter is driven by the dimension (not a hardcoded CFTC code). */ /* Adds derived analytics used by the dashboard and API: */ /* - Normalized positioning (% of open interest) */ /* - Long/short ratio */ /* - Week-over-week momentum */ /* - COT Index over 26-week and 52-week trailing windows (0=bearish, 100=bullish) */ /* Grain: one row per report_date for Coffee C futures. */ /* Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections. */
MODEL (
name serving.cot_positioning,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '2006-06-13',
cron '@daily'
);
WITH latest_revision AS (
/* Pick the most recently ingested row when CFTC issues corrections */
SELECT
f.*
FROM foundation.fct_cot_positioning AS f
INNER JOIN foundation.dim_commodity AS d
ON f.cftc_commodity_code = d.cftc_commodity_code
WHERE
d.commodity_name = 'Coffee, Green'
AND f.report_type = 'FutOnly'
AND f.report_date BETWEEN @start_ds AND @end_ds
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY f.report_date, f.cftc_contract_market_code
ORDER BY f.ingest_date DESC
) = 1
), with_derived AS (
SELECT
report_date,
market_and_exchange_name,
cftc_commodity_code,
cftc_contract_market_code,
contract_units,
ingest_date,
open_interest, /* Absolute positions (contracts) */
managed_money_long,
managed_money_short,
managed_money_spread,
managed_money_net,
prod_merc_long,
prod_merc_short,
prod_merc_net,
swap_long,
swap_short,
swap_spread,
swap_net,
other_reportable_long,
other_reportable_short,
other_reportable_spread,
other_reportable_net,
nonreportable_long,
nonreportable_short,
nonreportable_net,
ROUND(managed_money_net::REAL / NULLIF(open_interest, 0) * 100, 2) AS managed_money_net_pct_of_oi, /* Normalized: managed money net as % of open interest */ /* Removes size effects and makes cross-period comparison meaningful */
ROUND(managed_money_long::REAL / NULLIF(managed_money_short, 0), 3) AS managed_money_long_short_ratio, /* Long/short ratio: >1 = more bulls than bears in managed money */
change_open_interest, /* Weekly changes */
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
managed_money_net /* Week-over-week momentum in managed money net (via LAG) */ - LAG(managed_money_net, 1) OVER (ORDER BY report_date) AS managed_money_net_wow,
concentration_top4_long_pct, /* Concentration */
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
traders_total, /* Trader counts */
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
CASE
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w26
)::REAL / (
MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26
) * 100,
1
)
END AS cot_index_26w, /* COT Index (26-week): where is current net vs. trailing 26 weeks? */ /* 0 = most bearish extreme, 100 = most bullish extreme */ /* Industry-standard sentiment gauge (equivalent to RSI for positioning) */
CASE
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w52
)::REAL / (
MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52
) * 100,
1
)
END AS cot_index_52w /* COT Index (52-week): longer-term positioning context */
FROM latest_revision
WINDOW w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW), w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
)
SELECT
report_date,
market_and_exchange_name,
cftc_commodity_code,
cftc_contract_market_code,
contract_units,
ingest_date,
open_interest,
managed_money_long,
managed_money_short,
managed_money_spread,
managed_money_net,
prod_merc_long,
prod_merc_short,
prod_merc_net,
swap_long,
swap_short,
swap_spread,
swap_net,
other_reportable_long,
other_reportable_short,
other_reportable_spread,
other_reportable_net,
nonreportable_long,
nonreportable_short,
nonreportable_net,
managed_money_net_pct_of_oi,
managed_money_long_short_ratio,
change_open_interest,
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
managed_money_net_wow,
concentration_top4_long_pct,
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
traders_total,
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
cot_index_26w,
cot_index_52w
FROM with_derived
ORDER BY
report_date

View File

@@ -0,0 +1,148 @@
/* Serving mart: COT positioning (combined futures+options) for Coffee C futures. */ /* Same analytics as serving.cot_positioning, but filtered to the combined */ /* report variant (FutOnly_or_Combined = 'Combined'). Positions include */ /* options delta-equivalent exposure, showing total directional market bet. */ /* Grain: one row per report_date for Coffee C futures. */ /* Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections. */
MODEL (
name serving.cot_positioning_combined,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (
report_date
),
start '2006-06-13',
cron '@daily'
);
WITH latest_revision AS (
/* Pick the most recently ingested row when CFTC issues corrections */
SELECT
f.*
FROM foundation.fct_cot_positioning AS f
INNER JOIN foundation.dim_commodity AS d
ON f.cftc_commodity_code = d.cftc_commodity_code
WHERE
d.commodity_name = 'Coffee, Green'
AND f.report_type = 'Combined'
AND f.report_date BETWEEN @start_ds AND @end_ds
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY f.report_date, f.cftc_contract_market_code
ORDER BY f.ingest_date DESC
) = 1
), with_derived AS (
SELECT
report_date,
market_and_exchange_name,
cftc_commodity_code,
cftc_contract_market_code,
contract_units,
ingest_date,
open_interest, /* Absolute positions (contracts, delta-equivalent for options) */
managed_money_long,
managed_money_short,
managed_money_spread,
managed_money_net,
prod_merc_long,
prod_merc_short,
prod_merc_net,
swap_long,
swap_short,
swap_spread,
swap_net,
other_reportable_long,
other_reportable_short,
other_reportable_spread,
other_reportable_net,
nonreportable_long,
nonreportable_short,
nonreportable_net,
ROUND(managed_money_net::REAL / NULLIF(open_interest, 0) * 100, 2) AS managed_money_net_pct_of_oi, /* Normalized: managed money net as % of open interest */ /* Removes size effects and makes cross-period comparison meaningful */
ROUND(managed_money_long::REAL / NULLIF(managed_money_short, 0), 3) AS managed_money_long_short_ratio, /* Long/short ratio: >1 = more bulls than bears in managed money */
change_open_interest, /* Weekly changes */
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
managed_money_net /* Week-over-week momentum in managed money net (via LAG) */ - LAG(managed_money_net, 1) OVER (ORDER BY report_date) AS managed_money_net_wow,
concentration_top4_long_pct, /* Concentration */
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
traders_total, /* Trader counts */
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
CASE
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w26
)::REAL / (
MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26
) * 100,
1
)
END AS cot_index_26w, /* COT Index (26-week): where is current net vs. trailing 26 weeks? */ /* 0 = most bearish extreme, 100 = most bullish extreme */ /* Includes options delta-equivalent exposure */
CASE
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w52
)::REAL / (
MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52
) * 100,
1
)
END AS cot_index_52w /* COT Index (52-week): longer-term positioning context */
FROM latest_revision
WINDOW w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW), w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
)
SELECT
report_date,
market_and_exchange_name,
cftc_commodity_code,
cftc_contract_market_code,
contract_units,
ingest_date,
open_interest,
managed_money_long,
managed_money_short,
managed_money_spread,
managed_money_net,
prod_merc_long,
prod_merc_short,
prod_merc_net,
swap_long,
swap_short,
swap_spread,
swap_net,
other_reportable_long,
other_reportable_short,
other_reportable_spread,
other_reportable_net,
nonreportable_long,
nonreportable_short,
nonreportable_net,
managed_money_net_pct_of_oi,
managed_money_long_short_ratio,
change_open_interest,
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
managed_money_net_wow,
concentration_top4_long_pct,
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
traders_total,
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
cot_index_26w,
cot_index_52w
FROM with_derived
ORDER BY
report_date

View File

@@ -0,0 +1,187 @@
/* Serving mart: daily weather analytics for 12 coffee-growing regions. */
/* Source: foundation.fct_weather_daily (already has seed join for location metadata). */
/* Adds rolling aggregates, water balance, gaps-and-islands streak counters, */
/* and a composite crop stress index (0100) as a single severity gauge. */
/* Grain: (location_id, observation_date) */
/* Lookback 90: rolling windows reach up to 30 days, streak counters can extend */
/* up to ~90 days; without lookback a daily run sees only 1 row and all window */
/* functions degrade to single-row values. */
MODEL (
name serving.weather_daily,
kind INCREMENTAL_BY_TIME_RANGE (
time_column observation_date,
lookback 90
),
grain (location_id, observation_date),
start '2020-01-01',
cron '@daily'
);
WITH base AS (
SELECT
observation_date,
location_id,
location_name,
country,
lat,
lon,
variety,
temp_min_c,
temp_max_c,
temp_mean_c,
precipitation_mm,
humidity_max_pct,
cloud_cover_mean_pct,
wind_max_speed_ms,
et0_mm,
vpd_max_kpa,
is_frost,
is_heat_stress,
is_drought,
is_high_vpd,
in_growing_season,
/* Rolling precipitation — w7 = trailing 7 days, w30 = trailing 30 days */
SUM(precipitation_mm) OVER w7 AS precip_sum_7d_mm,
SUM(precipitation_mm) OVER w30 AS precip_sum_30d_mm,
/* Rolling temperature baseline */
AVG(temp_mean_c) OVER w30 AS temp_mean_30d_c,
/* Temperature anomaly: today vs trailing 30-day mean */
temp_mean_c - AVG(temp_mean_c) OVER w30 AS temp_anomaly_c,
/* Water balance: net daily water gain/loss (precipitation minus evapotranspiration) */
precipitation_mm - et0_mm AS water_balance_mm,
SUM(precipitation_mm - et0_mm) OVER w7 AS water_balance_7d_mm,
/* Gaps-and-islands group markers for streak counting. */
/* Pattern: ROW_NUMBER() - running_count_of_true creates a stable group ID */
/* for each consecutive run of TRUE. Rows where flag=FALSE get a unique group ID */
/* (so their streak length stays 0 after the CASE in with_streaks). */
ROW_NUMBER() OVER (
PARTITION BY location_id
ORDER BY observation_date
) - SUM(
CASE WHEN is_drought THEN 1 ELSE 0 END
) OVER (
PARTITION BY location_id
ORDER BY observation_date
ROWS UNBOUNDED PRECEDING
) AS _drought_group,
ROW_NUMBER() OVER (
PARTITION BY location_id
ORDER BY observation_date
) - SUM(
CASE WHEN is_heat_stress THEN 1 ELSE 0 END
) OVER (
PARTITION BY location_id
ORDER BY observation_date
ROWS UNBOUNDED PRECEDING
) AS _heat_group,
ROW_NUMBER() OVER (
PARTITION BY location_id
ORDER BY observation_date
) - SUM(
CASE WHEN is_high_vpd THEN 1 ELSE 0 END
) OVER (
PARTITION BY location_id
ORDER BY observation_date
ROWS UNBOUNDED PRECEDING
) AS _vpd_group
FROM foundation.fct_weather_daily
WHERE
observation_date BETWEEN @start_ds AND @end_ds
WINDOW
w7 AS (
PARTITION BY location_id
ORDER BY observation_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
),
w30 AS (
PARTITION BY location_id
ORDER BY observation_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
)
), with_streaks AS (
SELECT
base.*,
/* Drought streak: number of consecutive dry days ending on observation_date. */
/* Returns 0 when flag is FALSE (not a drought day). */
CASE
WHEN NOT is_drought
THEN 0
ELSE ROW_NUMBER() OVER (
PARTITION BY location_id, _drought_group
ORDER BY observation_date
)
END AS drought_streak_days,
/* Heat stress streak: consecutive days with temp_max > 35°C */
CASE
WHEN NOT is_heat_stress
THEN 0
ELSE ROW_NUMBER() OVER (
PARTITION BY location_id, _heat_group
ORDER BY observation_date
)
END AS heat_streak_days,
/* VPD stress streak: consecutive days with vpd_max > 1.5 kPa */
CASE
WHEN NOT is_high_vpd
THEN 0
ELSE ROW_NUMBER() OVER (
PARTITION BY location_id, _vpd_group
ORDER BY observation_date
)
END AS vpd_streak_days
FROM base
)
SELECT
observation_date,
location_id,
location_name,
country,
lat,
lon,
variety,
temp_min_c,
temp_max_c,
temp_mean_c,
precipitation_mm,
humidity_max_pct,
cloud_cover_mean_pct,
wind_max_speed_ms,
et0_mm,
vpd_max_kpa,
is_frost,
is_heat_stress,
is_drought,
is_high_vpd,
in_growing_season,
ROUND(precip_sum_7d_mm, 2) AS precip_sum_7d_mm,
ROUND(precip_sum_30d_mm, 2) AS precip_sum_30d_mm,
ROUND(temp_mean_30d_c, 2) AS temp_mean_30d_c,
ROUND(temp_anomaly_c, 2) AS temp_anomaly_c,
ROUND(water_balance_mm, 2) AS water_balance_mm,
ROUND(water_balance_7d_mm, 2) AS water_balance_7d_mm,
drought_streak_days,
heat_streak_days,
vpd_streak_days,
/* Composite crop stress index (0100).
Weights: drought streak 30%, water deficit 25%, heat streak 20%,
VPD streak 15%, frost (binary) 10%.
Each component is normalized to [0,1] then capped before weighting:
drought: 14 days = fully stressed
water: 20mm 7d deficit = fully stressed
heat: 7 days = fully stressed
vpd: 7 days = fully stressed
frost: binary (Arabica highland catastrophic event) */
ROUND(
GREATEST(0.0, LEAST(100.0,
LEAST(1.0, drought_streak_days / 14.0) * 30.0
+ LEAST(1.0, GREATEST(0.0, -water_balance_7d_mm) / 20.0) * 25.0
+ LEAST(1.0, heat_streak_days / 7.0) * 20.0
+ LEAST(1.0, vpd_streak_days / 7.0) * 15.0
+ CASE WHEN is_frost THEN 10.0 ELSE 0.0 END
)),
1
) AS crop_stress_index
FROM with_streaks
ORDER BY
location_id,
observation_date

View File

@@ -0,0 +1,95 @@
MODEL (
name staging.psdalldata__commodity,
kind INCREMENTAL_BY_TIME_RANGE (
time_column ingest_date
),
start '2006-08-01',
cron '@daily'
);
WITH cast_dtypes AS (
SELECT
src.commodity_code::INT AS commodity_code,
COALESCE(commodity_name, commodity_description) AS commodity_name,
country_code::TEXT AS country_code,
country_name,
market_year::INT AS market_year,
calendar_year::INT AS calendar_year,
month::INT AS month,
src.attribute_id::INT AS attribute_id,
COALESCE(attribute_name, attribute_description) AS attribute_name,
src.unit_id::INT AS unit_id,
COALESCE(unit_name, unit_description) AS unit_name,
value::REAL AS value,
filename
FROM READ_CSV(
@psd_glob(),
compression = 'gzip',
header = TRUE,
union_by_name = TRUE,
filename = TRUE,
all_varchar = TRUE,
max_line_size = 10000000
) AS src
LEFT JOIN seeds.psd_commodity_codes
ON seeds.psd_commodity_codes.commodity_code = src.commodity_code::INT
LEFT JOIN seeds.psd_unit_of_measure_codes
ON seeds.psd_unit_of_measure_codes.unit_id = src.unit_id::INT
LEFT JOIN seeds.psd_attribute_codes
ON seeds.psd_attribute_codes.attribute_id = src.attribute_id::INT
), metadata_and_deduplication AS (
SELECT
ANY_VALUE(commodity_code) AS commodity_code,
ANY_VALUE(commodity_name) AS commodity_name,
ANY_VALUE(country_code) AS country_code,
ANY_VALUE(country_name) AS country_name,
ANY_VALUE(market_year) AS market_year,
ANY_VALUE(calendar_year) AS calendar_year,
ANY_VALUE(month) AS month,
ANY_VALUE(attribute_id) AS attribute_id,
ANY_VALUE(attribute_name) AS attribute_name,
ANY_VALUE(unit_id) AS unit_id,
ANY_VALUE(unit_name) AS unit_name,
ANY_VALUE(value) AS value,
HASH(
commodity_code,
commodity_name,
country_code,
country_name,
market_year,
calendar_year,
month,
attribute_id,
attribute_name,
unit_id,
unit_name,
value
) AS hkey,
ANY_VALUE(
MAKE_DATE(STR_SPLIT(filename, '/')[-3]::INT, STR_SPLIT(filename, '/')[-2]::INT, 1)
) AS ingest_date,
ANY_VALUE(
CASE WHEN month <> 0 THEN LAST_DAY(MAKE_DATE(market_year, month, 1)) ELSE NULL END
) AS market_date_month_end
FROM cast_dtypes
GROUP BY
hkey
)
SELECT
hkey,
commodity_code,
commodity_name,
country_code,
country_name,
market_year,
calendar_year,
month,
attribute_id,
attribute_name,
unit_id,
unit_name,
value,
ingest_date
FROM metadata_and_deduplication
WHERE
ingest_date BETWEEN @start_ds AND @end_ds

View File

@@ -0,0 +1,19 @@
[project]
name = "sqlmesh_beanflows"
version = "0.1.0"
description = "Add your description here"
authors = [
{ name = "Deeman", email = "hendriknote@gmail.com" }
]
requires-python = ">=3.13"
dependencies = [
"sqlmesh[duckdb,lsp]>=0.200.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["sqlmesh_beanflows"]

View File

@@ -0,0 +1,82 @@
# Materia SQLMesh Transform Layer
Data transformation pipeline using SQLMesh and DuckDB, implementing a 3-layer architecture.
## Quick Start
```bash
# From repo root
# Plan changes (dev environment)
uv run sqlmesh -p transform/sqlmesh_materia plan
# Apply to production
uv run sqlmesh -p transform/sqlmesh_materia plan prod
# Run model tests
uv run sqlmesh -p transform/sqlmesh_materia test
# Format SQL
uv run sqlmesh -p transform/sqlmesh_materia format
```
## Architecture
### 3-Layer Data Model
```
landing/ ← immutable files (extraction output)
├── psd/{year}/{month}/ ← USDA PSD
├── cot/{year}/ ← CFTC COT
├── prices/coffee_kc/ ← KC=F daily prices
├── ice_stocks/ ← ICE daily warehouse stocks
├── ice_aging/ ← ICE monthly aging report
└── ice_stocks_by_port/ ← ICE historical EOM by port
staging/ ← read_csv + seed joins + cast (PSD)
└── staging.psdalldata__commodity
seeds/ ← static lookup CSVs (PSD code mappings)
├── seeds.psd_commodity_codes
├── seeds.psd_attribute_codes
└── seeds.psd_unit_of_measure_codes
foundation/ ← read_csv + cast + dedup (prices, COT, ICE)
├── foundation.fct_coffee_prices
├── foundation.fct_cot_positioning
├── foundation.fct_ice_warehouse_stocks
├── foundation.fct_ice_aging_stocks
├── foundation.fct_ice_warehouse_stocks_by_port
└── foundation.dim_commodity
serving/ ← pre-aggregated for web app
├── serving.coffee_prices
├── serving.cot_positioning
├── serving.ice_warehouse_stocks
├── serving.ice_aging_stocks
├── serving.ice_warehouse_stocks_by_port
└── serving.commodity_metrics
```
### Layer responsibilities
**staging/** — PSD only: reads landing CSVs directly via `@psd_glob()`, joins seed lookup tables, casts types, deduplicates. Uses INCREMENTAL_BY_TIME_RANGE (ingest_date derived from filename path).
**seeds/** — Static lookup tables (commodity codes, attribute codes, unit of measure) loaded from `seeds/*.csv`. Referenced by staging.
**foundation/** — All other sources (prices, COT, ICE): reads landing data (e.g. CSVs) directly via glob macros, casts types, deduplicates. Uses INCREMENTAL_BY_TIME_RANGE. Also holds `dim_commodity` (the cross-source identity mapping).
**serving/** — Analytics-ready aggregates consumed by the web app via `analytics.duckdb`. Pre-computes moving averages, COT indices, MoM changes. These are the only tables the web app reads.
### Why no raw layer?
Landing files are immutable and content-addressed — the landing directory is the audit trail. A SQL raw layer would just duplicate file bytes into DuckDB with no added value. The first SQL layer reads directly from landing.
## Environment Variables
| Variable | Default | Description |
|----------|---------|-------------|
| `LANDING_DIR` | `data/landing` | Root of the landing zone |
| `DUCKDB_PATH` | `local.duckdb` | DuckDB file (SQLMesh exclusive write access) |
The web app reads from a separate `analytics.duckdb` via `export_serving.py`.

View File

@@ -0,0 +1,57 @@
commodity_name,exchange_code,exchange
Crude Oil WTI,CL,CME
Crude Oil Brent,BZ,ICE
Gasoline RBOB,RB,CME
Heating Oil,HO,CME
Natural Gas,NG,CME
Ethanol,CU,CME
Cocoa,CC,ICE
Cotton,CT,ICE
Orange Juice,FCOJ-A,ICE
Coffee,KC,ICE
Lumber,LBR,ICE
Sugar,SB,ICE
European Gas TTF,TTF,ICE
European Union Emissions Allowance,ECF,ICE
Gold,GC,CME
Silver,SI,CME
Platinum,PL,CME
Copper,HG,CME
Palladium,PA,CME
Live Cattle,LE,CME
Feeder Cattle,GF,CME
Lean Hogs,HE,CME
Corn,ZC,CME
Soybean Oil,ZL,CME
Soybean meal,ZM,CME
Oats,ZO,CME
Rough Rice,ZR,CME
Soybeans,ZS,CME
Wheat,ZW,CME
Canola,RS,ICE
Rebar,RB,SHFE
Hot-Rolled Coil,HC,SHFE
Nickel,NI,SHFE
Tin,SN,SHFE
Aluminum,AL,SHFE
Zinc,ZN,SHFE
Natural Rubber,RU,SHFE
Bitumen,BU,SHFE
Iron Ore,I,DCE
Palm Oil,P,DCE
Eggs,JD,DCE
Coking Coal,JM,DCE
Polyvinyl Chloride (PVC),V,DCE
White Sugar,SR,ZCE
Cotton,CF,ZCE
Apple,AP,ZCE
PTA,TA,ZCE
Methanol,MA,ZCE
LME Aluminum,AH,LME
LME Copper,CA,LME
LME Lead,PB,LME
LME Nickel,NI,LME
LME Tin,SN,LME
LME Zinc,ZS,LME
Iron Ore,TIO,SGX
Rubber,TSR,SGX
1 commodity_name exchange_code exchange
2 Crude Oil WTI CL CME
3 Crude Oil Brent BZ ICE
4 Gasoline RBOB RB CME
5 Heating Oil HO CME
6 Natural Gas NG CME
7 Ethanol CU CME
8 Cocoa CC ICE
9 Cotton CT ICE
10 Orange Juice FCOJ-A ICE
11 Coffee KC ICE
12 Lumber LBR ICE
13 Sugar SB ICE
14 European Gas TTF TTF ICE
15 European Union Emissions Allowance ECF ICE
16 Gold GC CME
17 Silver SI CME
18 Platinum PL CME
19 Copper HG CME
20 Palladium PA CME
21 Live Cattle LE CME
22 Feeder Cattle GF CME
23 Lean Hogs HE CME
24 Corn ZC CME
25 Soybean Oil ZL CME
26 Soybean meal ZM CME
27 Oats ZO CME
28 Rough Rice ZR CME
29 Soybeans ZS CME
30 Wheat ZW CME
31 Canola RS ICE
32 Rebar RB SHFE
33 Hot-Rolled Coil HC SHFE
34 Nickel NI SHFE
35 Tin SN SHFE
36 Aluminum AL SHFE
37 Zinc ZN SHFE
38 Natural Rubber RU SHFE
39 Bitumen BU SHFE
40 Iron Ore I DCE
41 Palm Oil P DCE
42 Eggs JD DCE
43 Coking Coal JM DCE
44 Polyvinyl Chloride (PVC) V DCE
45 White Sugar SR ZCE
46 Cotton CF ZCE
47 Apple AP ZCE
48 PTA TA ZCE
49 Methanol MA ZCE
50 LME Aluminum AH LME
51 LME Copper CA LME
52 LME Lead PB LME
53 LME Nickel NI LME
54 LME Tin SN LME
55 LME Zinc ZS LME
56 Iron Ore TIO SGX
57 Rubber TSR SGX

View File

@@ -0,0 +1,2 @@
usda_commodity_code;cftc_commodity_code;commodity_name;commodity_group
"0711100";"083";"Coffee, Green";"Softs"
1 usda_commodity_code cftc_commodity_code commodity_name commodity_group
2 0711100 083 Coffee, Green Softs

View File

@@ -0,0 +1,133 @@
attribute_id;attribute_name
1;Area Planted
4;Area Harvested
5;Catch For Reduction
6;Cows In Milk
7;Crush
10;Total Grape Crush
13;Deliv. To Processors
16;Total Trees
17;Bearing Trees
19;Non-Bearing Trees
20;Beginning Stocks
22;Sow Beginning Stocks
23;Dairy Cows Beg. Stocks
24;Begin Stock (Ctrl App)
25;Beef Cows Beg. Stocks
26;Begin Stock (Other)
28;Production
29;Arabica Production
30;Beet Sugar Production
31;Commercial Production
32;Cows Milk Production
33;Farm Sales Weight Prod
34;Filter Production
40;Prod. from Wine Grapes
43;Cane Sugar Production
47;Non-Comm. Production
48;Non-Filter Production
49;Other Milk Production
51;Prod. from Tabl Grapes
53;Robusta Production
54;Rough Production
56;Other Production
57;Imports
58;Bean Imports
62;Intra-EU Imports
63;MY Imp. from U.S.
64;Raw Imports
65;U.S. Leaf Imports
70;MY Imp. from EU
71;Other Imports
74;Refined Imp.(Raw Val)
75;Roast & Ground Imports
78;CY Imports
81;TY Imports
82;Soluble Imports
83;CY Imp. from U.S.
84;TY Imp. from U.S.
86;Total Supply
87;CY Exp. to U.S.
88;Exports
89;Raw Exports
90;Bean Exports
94;Intra EU Exports
95;Intra-EU Exports
97;MY Exp. to EU
99;Refined Exp.(Raw Val)
104;Other Exports
107;Roast & Ground Exports
110;CY Exports
113;TY Exports
114;Soluble Exports
116;Slaughter (Reference)
117;Total Slaughter
118;Cow Slaughter
120;Inventory (Reference)
121;Sow Slaughter
122;Calf Slaughter
124;Other Slaughter
125;Domestic Consumption
126;Total Disappearance
128;Dom. Leaf Consumption
129;Dom.Consump(Cntrl App)
130;Feed Dom. Consumption
131;Fluid Use Dom. Consum.
132;For Processing
133;Fresh Dom. Consumption
135;Fresh Dom. Consumption
138;Human Consumption
139;Human Dom. Consumption
140;Industrial Dom. Cons.
141;Rst,Ground Dom. Consum
142;Domestic Use
143;Utilization for Sugar
145;Dom.Consump(Other)
147;Factory Use Consum.
149;Food Use Dom. Cons.
150;Loss
151;Other Disappearance
152;Other Use, Losses
154;Soluble Dom. Cons.
155;U.S. Leaf Dom. Cons.
157;Utilizatn for Alcohol
158;Feed Use Dom. Consum.
161;Feed Waste Dom. Cons.
167;Other Foreign Cons.
169;Withdrawal From Market
172;Loss and Residual
173;Total Disappearance
174;Total Use
175;Total Utilization
176;Ending Stocks
177;End Stocks (Cntrl App)
178;Total Distribution
179;End Stocks (Other)
181;Extr. Rate, 999.9999
182;Milling Rate (.9999)
183;Seed to Lint Ratio
184;YieldYield (Rough)
192;FSI Consumption
194;SME
195;Stocks-to-UseStock to Use %
196;Exportable Production
198;Balance
199;Inventory Balance
200;Inventory Change
201;Import Change
202;Export Change
203;Consumption Change
204;Production Change
205;Sow Change
206;Cow Change
207;Production to Cows
208;Production to Sows
209;Slaughter to Inventory
210;Weights
211;Population
212;Per Capita Consumption
213;Slaughter to Total Supply
214;Imports Percent Consumption
215;Exports Percent Production
220;Annual % Change Per Cap. Cons.
223;Stocks to Use (Months)
1 attribute_id attribute_name
2 1 Area Planted
3 4 Area Harvested
4 5 Catch For Reduction
5 6 Cows In Milk
6 7 Crush
7 10 Total Grape Crush
8 13 Deliv. To Processors
9 16 Total Trees
10 17 Bearing Trees
11 19 Non-Bearing Trees
12 20 Beginning Stocks
13 22 Sow Beginning Stocks
14 23 Dairy Cows Beg. Stocks
15 24 Begin Stock (Ctrl App)
16 25 Beef Cows Beg. Stocks
17 26 Begin Stock (Other)
18 28 Production
19 29 Arabica Production
20 30 Beet Sugar Production
21 31 Commercial Production
22 32 Cows Milk Production
23 33 Farm Sales Weight Prod
24 34 Filter Production
25 40 Prod. from Wine Grapes
26 43 Cane Sugar Production
27 47 Non-Comm. Production
28 48 Non-Filter Production
29 49 Other Milk Production
30 51 Prod. from Tabl Grapes
31 53 Robusta Production
32 54 Rough Production
33 56 Other Production
34 57 Imports
35 58 Bean Imports
36 62 Intra-EU Imports
37 63 MY Imp. from U.S.
38 64 Raw Imports
39 65 U.S. Leaf Imports
40 70 MY Imp. from EU
41 71 Other Imports
42 74 Refined Imp.(Raw Val)
43 75 Roast & Ground Imports
44 78 CY Imports
45 81 TY Imports
46 82 Soluble Imports
47 83 CY Imp. from U.S.
48 84 TY Imp. from U.S.
49 86 Total Supply
50 87 CY Exp. to U.S.
51 88 Exports
52 89 Raw Exports
53 90 Bean Exports
54 94 Intra EU Exports
55 95 Intra-EU Exports
56 97 MY Exp. to EU
57 99 Refined Exp.(Raw Val)
58 104 Other Exports
59 107 Roast & Ground Exports
60 110 CY Exports
61 113 TY Exports
62 114 Soluble Exports
63 116 Slaughter (Reference)
64 117 Total Slaughter
65 118 Cow Slaughter
66 120 Inventory (Reference)
67 121 Sow Slaughter
68 122 Calf Slaughter
69 124 Other Slaughter
70 125 Domestic Consumption
71 126 Total Disappearance
72 128 Dom. Leaf Consumption
73 129 Dom.Consump(Cntrl App)
74 130 Feed Dom. Consumption
75 131 Fluid Use Dom. Consum.
76 132 For Processing
77 133 Fresh Dom. Consumption
78 135 Fresh Dom. Consumption
79 138 Human Consumption
80 139 Human Dom. Consumption
81 140 Industrial Dom. Cons.
82 141 Rst,Ground Dom. Consum
83 142 Domestic Use
84 143 Utilization for Sugar
85 145 Dom.Consump(Other)
86 147 Factory Use Consum.
87 149 Food Use Dom. Cons.
88 150 Loss
89 151 Other Disappearance
90 152 Other Use, Losses
91 154 Soluble Dom. Cons.
92 155 U.S. Leaf Dom. Cons.
93 157 Utilizatn for Alcohol
94 158 Feed Use Dom. Consum.
95 161 Feed Waste Dom. Cons.
96 167 Other Foreign Cons.
97 169 Withdrawal From Market
98 172 Loss and Residual
99 173 Total Disappearance
100 174 Total Use
101 175 Total Utilization
102 176 Ending Stocks
103 177 End Stocks (Cntrl App)
104 178 Total Distribution
105 179 End Stocks (Other)
106 181 Extr. Rate, 999.9999
107 182 Milling Rate (.9999)
108 183 Seed to Lint Ratio
109 184 YieldYield (Rough)
110 192 FSI Consumption
111 194 SME
112 195 Stocks-to-UseStock to Use %
113 196 Exportable Production
114 198 Balance
115 199 Inventory Balance
116 200 Inventory Change
117 201 Import Change
118 202 Export Change
119 203 Consumption Change
120 204 Production Change
121 205 Sow Change
122 206 Cow Change
123 207 Production to Cows
124 208 Production to Sows
125 209 Slaughter to Inventory
126 210 Weights
127 211 Population
128 212 Per Capita Consumption
129 213 Slaughter to Total Supply
130 214 Imports Percent Consumption
131 215 Exports Percent Production
132 220 Annual % Change Per Cap. Cons.
133 223 Stocks to Use (Months)

View File

@@ -0,0 +1,57 @@
commodity_name,exchange_code,exchange,commodity_code
Crude Oil WTI,CL,CME,NA
Crude Oil Brent,BZ,ICE,NA
Gasoline RBOB,RB,CME,NA
Heating Oil,HO,CME,NA
Natural Gas,NG,CME,NA
Ethanol,CU,CME,NA
Cocoa,CC,ICE,NA
Cotton,CT,ICE,2631000
Orange Juice,FCOJ-A,ICE,0585100
Coffee,KC,ICE,0711100
Lumber,LBR,ICE,NA
Sugar,SB,ICE,0612000
European Gas TTF,TTF,ICE,NA
European Union Emissions Allowance,ECF,ICE,NA
Gold,GC,CME,NA
Silver,SI,CME,NA
Platinum,PL,CME,NA
Copper,HG,CME,NA
Palladium,PA,CME,NA
Live Cattle,LE,CME,0011000
Feeder Cattle,GF,CME,0011000
Lean Hogs,HE,CME,NA
Corn,ZC,CME,0440000
Soybean Oil,ZL,CME,4232000
Soybean meal,ZM,CME,0813100
Oats,ZO,CME,0452000
Rough Rice,ZR,CME,0422110
Soybeans,ZS,CME,NA
Wheat,ZW,CME,0410000
Canola,RS,ICE,2226000
Rebar,RB,SHFE,NA
Hot-Rolled Coil,HC,SHFE,NA
Nickel,NI,SHFE,NA
Tin,SN,SHFE,NA
Aluminum,AL,SHFE,NA
Zinc,ZN,SHFE,NA
Natural Rubber,RU,SHFE,NA
Bitumen,BU,SHFE,NA
Iron Ore,I,DCE,NA
Palm Oil,P,DCE,4243000
Eggs,JD,DCE,NA
Coking Coal,JM,DCE,NA
Polyvinyl Chloride (PVC),V,DCE,NA
White Sugar,SR,ZCE,0612000
Cotton,CF,ZCE,2631000
Apple,AP,ZCE,0574000
PTA,TA,ZCE,NA
Methanol,MA,ZCE,NA
LME Aluminum,AH,LME,NA
LME Copper,CA,LME,NA
LME Lead,PB,LME,NA
LME Nickel,NI,LME,NA
LME Tin,SN,LME,NA
LME Zinc,ZS,LME,NA
Iron Ore,TIO,SGX,NA
Rubber,TSR,SGX,NA
1 commodity_name exchange_code exchange commodity_code
2 Crude Oil WTI CL CME NA
3 Crude Oil Brent BZ ICE NA
4 Gasoline RBOB RB CME NA
5 Heating Oil HO CME NA
6 Natural Gas NG CME NA
7 Ethanol CU CME NA
8 Cocoa CC ICE NA
9 Cotton CT ICE 2631000
10 Orange Juice FCOJ-A ICE 0585100
11 Coffee KC ICE 0711100
12 Lumber LBR ICE NA
13 Sugar SB ICE 0612000
14 European Gas TTF TTF ICE NA
15 European Union Emissions Allowance ECF ICE NA
16 Gold GC CME NA
17 Silver SI CME NA
18 Platinum PL CME NA
19 Copper HG CME NA
20 Palladium PA CME NA
21 Live Cattle LE CME 0011000
22 Feeder Cattle GF CME 0011000
23 Lean Hogs HE CME NA
24 Corn ZC CME 0440000
25 Soybean Oil ZL CME 4232000
26 Soybean meal ZM CME 0813100
27 Oats ZO CME 0452000
28 Rough Rice ZR CME 0422110
29 Soybeans ZS CME NA
30 Wheat ZW CME 0410000
31 Canola RS ICE 2226000
32 Rebar RB SHFE NA
33 Hot-Rolled Coil HC SHFE NA
34 Nickel NI SHFE NA
35 Tin SN SHFE NA
36 Aluminum AL SHFE NA
37 Zinc ZN SHFE NA
38 Natural Rubber RU SHFE NA
39 Bitumen BU SHFE NA
40 Iron Ore I DCE NA
41 Palm Oil P DCE 4243000
42 Eggs JD DCE NA
43 Coking Coal JM DCE NA
44 Polyvinyl Chloride (PVC) V DCE NA
45 White Sugar SR ZCE 0612000
46 Cotton CF ZCE 2631000
47 Apple AP ZCE 0574000
48 PTA TA ZCE NA
49 Methanol MA ZCE NA
50 LME Aluminum AH LME NA
51 LME Copper CA LME NA
52 LME Lead PB LME NA
53 LME Nickel NI LME NA
54 LME Tin SN LME NA
55 LME Zinc ZS LME NA
56 Iron Ore TIO SGX NA
57 Rubber TSR SGX NA

View File

@@ -0,0 +1,65 @@
commodity_code; commodity_name
0577400;Almonds, Shelled Basis
0011000;Animal Numbers, Cattle
0013000;Animal Numbers, Swine
0574000;Apples, Fresh
0430000;Barley
0579305;Cherries (Sweet&Sour), Fresh
0711100;Coffee, Green
0440000;Corn
2631000;Cotton
0000000;Cotton (Metric Tons)
0230000;Dairy, Butter
0240000;Dairy, Cheese
0224400;Dairy, Dry Whole Milk Powder
0223000;Dairy, Milk, Fluid
0224200;Dairy, Milk, Nonfat Dry
0572220;Grapefruit, Fresh
0575100;Grapes, Fresh Table
0572120;Lemons/Limes, Fresh
0813700;Meal, Copra
0813300;Meal, Cottonseed
0814200;Meal, Fish
0813800;Meal, Palm Kernel
0813200;Meal, Peanut
0813600;Meal, Rapeseed
0813100;Meal, Soybean
0813101;Meal, Soybean (Local)
0813500;Meal, Sunflowerseed
0111000;Meat, Beef and Veal
0115000;Meat, Chicken
0113000;Meat, Swine
0459100;Millet
0459900;Mixed Grain
0452000;Oats
4242000;Oil, Coconut
4233000;Oil, Cottonseed
4235000;Oil, Olive
4243000;Oil, Palm
4244000;Oil, Palm Kernel
4234000;Oil, Peanut
4239100;Oil, Rapeseed
4232000;Oil, Soybean
4232001;Oil, Soybean (Local)
4236000;Oil, Sunflowerseed
2231000;Oilseed, Copra
2223000;Oilseed, Cottonseed
2232000;Oilseed, Palm Kernel
2221000;Oilseed, Peanut
2226000;Oilseed, Rapeseed
2222000;Oilseed, Soybean
2222001;Oilseed, Soybean (Local)
2224000;Oilseed, Sunflowerseed
0585100;Orange Juice
0571120;Oranges, Fresh
0579309;Peaches & Nectarines, Fresh
0579220;Pears, Fresh
0577907;Pistachios, Inshell Basis
0114200;Poultry, Meat, Broiler
0422110;Rice, Milled
0451000;Rye
0459200;Sorghum
0612000;Sugar, Centrifugal
0571220;Tangerines/Mandarins, Fresh
0577901;Walnuts, Inshell Basis
0410000;Wheat
1 commodity_code commodity_name
2 0577400 Almonds, Shelled Basis
3 0011000 Animal Numbers, Cattle
4 0013000 Animal Numbers, Swine
5 0574000 Apples, Fresh
6 0430000 Barley
7 0579305 Cherries (Sweet&Sour), Fresh
8 0711100 Coffee, Green
9 0440000 Corn
10 2631000 Cotton
11 0000000 Cotton (Metric Tons)
12 0230000 Dairy, Butter
13 0240000 Dairy, Cheese
14 0224400 Dairy, Dry Whole Milk Powder
15 0223000 Dairy, Milk, Fluid
16 0224200 Dairy, Milk, Nonfat Dry
17 0572220 Grapefruit, Fresh
18 0575100 Grapes, Fresh Table
19 0572120 Lemons/Limes, Fresh
20 0813700 Meal, Copra
21 0813300 Meal, Cottonseed
22 0814200 Meal, Fish
23 0813800 Meal, Palm Kernel
24 0813200 Meal, Peanut
25 0813600 Meal, Rapeseed
26 0813100 Meal, Soybean
27 0813101 Meal, Soybean (Local)
28 0813500 Meal, Sunflowerseed
29 0111000 Meat, Beef and Veal
30 0115000 Meat, Chicken
31 0113000 Meat, Swine
32 0459100 Millet
33 0459900 Mixed Grain
34 0452000 Oats
35 4242000 Oil, Coconut
36 4233000 Oil, Cottonseed
37 4235000 Oil, Olive
38 4243000 Oil, Palm
39 4244000 Oil, Palm Kernel
40 4234000 Oil, Peanut
41 4239100 Oil, Rapeseed
42 4232000 Oil, Soybean
43 4232001 Oil, Soybean (Local)
44 4236000 Oil, Sunflowerseed
45 2231000 Oilseed, Copra
46 2223000 Oilseed, Cottonseed
47 2232000 Oilseed, Palm Kernel
48 2221000 Oilseed, Peanut
49 2226000 Oilseed, Rapeseed
50 2222000 Oilseed, Soybean
51 2222001 Oilseed, Soybean (Local)
52 2224000 Oilseed, Sunflowerseed
53 0585100 Orange Juice
54 0571120 Oranges, Fresh
55 0579309 Peaches & Nectarines, Fresh
56 0579220 Pears, Fresh
57 0577907 Pistachios, Inshell Basis
58 0114200 Poultry, Meat, Broiler
59 0422110 Rice, Milled
60 0451000 Rye
61 0459200 Sorghum
62 0612000 Sugar, Centrifugal
63 0571220 Tangerines/Mandarins, Fresh
64 0577901 Walnuts, Inshell Basis
65 0410000 Wheat

View File

@@ -0,0 +1,43 @@
unit_id; unit_name
1;(1000 BUSHES)
2;(1000 60 KG BAGS)
3;(1000 COLONIES)
4;(1000 HA)
5;(1000 HEAD)
6;(1000 HL)
7;(1000 MT CWE)
8;(1000 MT)
9;(1000 PCS)
10;(1000 TREES)
11;(Dec. Fraction)
12;(HA)
13;(HECTARES)
14;(KG)
15;(MIL HEAD)
16;(MIL PCS)
17;(MILLION TREES)
18;(MT RAW EQ)
19;(MT RAW EW)
20;(MT RE)
21;(MT)
22;(MT, Net Weight)
23;(PERCENT)
24;(RATIO)
25;(1000 CUBIC METERS)
26;(MT/HA)
27;1000 480 lb. Bales
28;(Bales/HA)
29;(KG/HA)
30;ACRES
31;BUSHELS
32;HUNDREDWEIGHT
33;MILLING RATE
34;BUSHELS/TON
35;IMPORT MILLING RATE
36;Bushels
37;SHORT TONS
38;MILLION LBS
39;BILLION LBS
40;(HEAD)
41;(PEOPLE)
42;(MONTHS)
1 unit_id unit_name
2 1 (1000 BUSHES)
3 2 (1000 60 KG BAGS)
4 3 (1000 COLONIES)
5 4 (1000 HA)
6 5 (1000 HEAD)
7 6 (1000 HL)
8 7 (1000 MT CWE)
9 8 (1000 MT)
10 9 (1000 PCS)
11 10 (1000 TREES)
12 11 (Dec. Fraction)
13 12 (HA)
14 13 (HECTARES)
15 14 (KG)
16 15 (MIL HEAD)
17 16 (MIL PCS)
18 17 (MILLION TREES)
19 18 (MT RAW EQ)
20 19 (MT RAW EW)
21 20 (MT RE)
22 21 (MT)
23 22 (MT, Net Weight)
24 23 (PERCENT)
25 24 (RATIO)
26 25 (1000 CUBIC METERS)
27 26 (MT/HA)
28 27 1000 480 lb. Bales
29 28 (Bales/HA)
30 29 (KG/HA)
31 30 ACRES
32 31 BUSHELS
33 32 HUNDREDWEIGHT
34 33 MILLING RATE
35 34 BUSHELS/TON
36 35 IMPORT MILLING RATE
37 36 Bushels
38 37 SHORT TONS
39 38 MILLION LBS
40 39 BILLION LBS
41 40 (HEAD)
42 41 (PEOPLE)
43 42 (MONTHS)

View File

@@ -0,0 +1,13 @@
location_id;name;country;lat;lon;variety
brazil_minas_gerais;Minas Gerais;BR;-19.9167;-43.9345;Arabica
brazil_parana;Paraná;BR;-23.4205;-51.9330;Arabica
vietnam_highlands;Central Highlands;VN;12.6667;108.0500;Robusta
colombia_huila;Huila;CO;2.5359;-75.5277;Arabica
ethiopia_sidama;Sidama;ET;6.7612;38.4721;Arabica
honduras_copan;Copán;HN;14.8333;-89.1500;Arabica
guatemala_antigua;Antigua;GT;14.5586;-90.7295;Arabica
indonesia_sumatra;Sumatra;ID;3.5952;98.6722;Robusta
brazil_espirito_santo;Espírito Santo;BR;-19.3908;-40.0668;Robusta
peru_jaen;Jaén;PE;-5.7064;-78.8077;Arabica
uganda_elgon;Mount Elgon;UG;1.0826;34.1751;Robusta
ivory_coast_daloa;Daloa;CI;6.8774;-6.4502;Robusta
1 location_id name country lat lon variety
2 brazil_minas_gerais Minas Gerais BR -19.9167 -43.9345 Arabica
3 brazil_parana Paraná BR -23.4205 -51.9330 Arabica
4 vietnam_highlands Central Highlands VN 12.6667 108.0500 Robusta
5 colombia_huila Huila CO 2.5359 -75.5277 Arabica
6 ethiopia_sidama Sidama ET 6.7612 38.4721 Arabica
7 honduras_copan Copán HN 14.8333 -89.1500 Arabica
8 guatemala_antigua Antigua GT 14.5586 -90.7295 Arabica
9 indonesia_sumatra Sumatra ID 3.5952 98.6722 Robusta
10 brazil_espirito_santo Espírito Santo BR -19.3908 -40.0668 Robusta
11 peru_jaen Jaén PE -5.7064 -78.8077 Arabica
12 uganda_elgon Mount Elgon UG 1.0826 34.1751 Robusta
13 ivory_coast_daloa Daloa CI 6.8774 -6.4502 Robusta