refactor: rename materia → beanflows throughout codebase
- Rename src/materia/ → src/beanflows/ (Python package)
- Rename transform/sqlmesh_materia/ → transform/sqlmesh_beanflows/
- Rename infra/supervisor/materia-supervisor.service → beanflows-supervisor.service
- Rename infra/backup/materia-backup.{service,timer} → beanflows-backup.{service,timer}
- Update all path strings: /opt/materia → /opt/beanflows, /data/materia → /data/beanflows
- Update pyproject.toml: project name, CLI entrypoint, workspace source key
- Update all internal imports from materia.* → beanflows.*
- Update infra scripts: REPO_DIR, service names, systemctl references
- Fix docker-compose.prod.yml: /data/materia → /data/beanflows (bind mount path)
Intentionally left unchanged: Pulumi stack name (materia-infrastructure) and
Hetzner resource names ("materia-key", "managed_by: materia") — these reference
live cloud infrastructure and require separate cloud-side renames.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
0
transform/sqlmesh_beanflows/audits/.gitkeep
Normal file
0
transform/sqlmesh_beanflows/audits/.gitkeep
Normal file
@@ -0,0 +1,9 @@
|
||||
AUDIT (
|
||||
name assert_positive_order_ids
|
||||
);
|
||||
|
||||
SELECT
|
||||
*
|
||||
FROM @this_model
|
||||
WHERE
|
||||
item_id < 0
|
||||
41
transform/sqlmesh_beanflows/config.yaml
Normal file
41
transform/sqlmesh_beanflows/config.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
# --- Gateway Connection ---
|
||||
# Single local DuckDB gateway
|
||||
# Local dev uses virtual environments (e.g., dev_<username>)
|
||||
# Production uses the 'prod' environment
|
||||
gateways:
|
||||
duckdb:
|
||||
connection:
|
||||
type: duckdb
|
||||
catalogs:
|
||||
local: '{{ env_var("DUCKDB_PATH", "local.duckdb") }}'
|
||||
|
||||
default_gateway: duckdb
|
||||
|
||||
# --- Variables ---
|
||||
variables:
|
||||
LANDING_DIR: '{{ env_var("LANDING_DIR", "data/landing") }}'
|
||||
|
||||
# --- Model Defaults ---
|
||||
# https://sqlmesh.readthedocs.io/en/stable/reference/model_configuration/#model-defaults
|
||||
|
||||
model_defaults:
|
||||
dialect: duckdb
|
||||
start: 2025-07-07 # Start date for backfill history
|
||||
cron: '@daily' # Run models daily at 12am UTC (can override per model)
|
||||
|
||||
# --- Linting Rules ---
|
||||
# https://sqlmesh.readthedocs.io/en/stable/guides/linter/
|
||||
|
||||
linter:
|
||||
enabled: true
|
||||
rules:
|
||||
# ambiguousorinvalidcolumn removed: sqlglot cannot introspect read_csv() TVF
|
||||
# schemas at lint time, causing false positives on all raw models. Cross-model
|
||||
# column validation is handled by SQLMesh at plan time via columns() declarations.
|
||||
- invalidselectstarexpansion
|
||||
|
||||
# --- Default Target Environment ---
|
||||
# Prevents accidentally applying plans to prod during local development.
|
||||
# https://sqlmesh.readthedocs.io/en/stable/guides/configuration/#default-target-environment
|
||||
|
||||
default_target_environment: dev_{{ user() }}
|
||||
1
transform/sqlmesh_beanflows/external_models.yaml
Normal file
1
transform/sqlmesh_beanflows/external_models.yaml
Normal file
@@ -0,0 +1 @@
|
||||
[]
|
||||
0
transform/sqlmesh_beanflows/macros/.gitkeep
Normal file
0
transform/sqlmesh_beanflows/macros/.gitkeep
Normal file
63
transform/sqlmesh_beanflows/macros/__init__.py
Normal file
63
transform/sqlmesh_beanflows/macros/__init__.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import os
|
||||
|
||||
from sqlmesh import macro
|
||||
|
||||
|
||||
@macro()
|
||||
def psd_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all PSD CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/psd/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def cot_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all COT CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/cot/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def cot_combined_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all COT combined (futures+options) CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/cot_combined/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def prices_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all coffee price CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/prices/coffee_kc/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def ice_stocks_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all ICE warehouse stock CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/ice_stocks/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def ice_aging_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all ICE aging report CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/ice_aging/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def ice_stocks_by_port_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all ICE historical by-port CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/ice_stocks_by_port/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def weather_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all OWM weather JSON gzip files under LANDING_DIR.
|
||||
|
||||
Pattern: weather/{location_id}/{year}/{date}.json.gz
|
||||
The double-star catches all location_id subdirectories.
|
||||
"""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/weather/**/*.json.gz'"
|
||||
0
transform/sqlmesh_beanflows/models/.gitkeep
Normal file
0
transform/sqlmesh_beanflows/models/.gitkeep
Normal file
@@ -0,0 +1,59 @@
|
||||
MODEL (
|
||||
name cleaned.psdalldata__commodity_pivoted,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column ingest_date
|
||||
),
|
||||
start '2006-08-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
SELECT
|
||||
MAX(hkey) AS hkey,
|
||||
commodity_code,
|
||||
MAX(commodity_name) AS commodity_name,
|
||||
country_code,
|
||||
MAX(country_name) AS country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Production' THEN value END), 0) AS Production,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Imports' THEN value END), 0) AS Imports,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Exports' THEN value END), 0) AS Exports,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Total Distribution' THEN value END), 0) AS Total_Distribution,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Ending Stocks' THEN value END), 0) AS Ending_Stocks,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Beginning Stocks' THEN value END), 0) AS Beginning_Stocks,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Total Supply' THEN value END), 0) AS Total_Supply,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Consumption' THEN value END), 0) AS Domestic_Consumption,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Demand' THEN value END), 0) AS Domestic_Demand,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Food Use' THEN value END), 0) AS Food_Use,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Industrial Use' THEN value END), 0) AS Industrial_Use,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Seed Use' THEN value END), 0) AS Seed_Use,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Waste' THEN value END), 0) AS Waste,
|
||||
COALESCE(SUM(CASE WHEN attribute_name = 'Feed Use' THEN value END), 0) AS Feed_Use
|
||||
FROM staging.psdalldata__commodity
|
||||
WHERE
|
||||
attribute_name IN (
|
||||
'Production',
|
||||
'Imports',
|
||||
'Exports',
|
||||
'Total Distribution',
|
||||
'Ending Stocks',
|
||||
'Beginning Stocks',
|
||||
'Total Supply',
|
||||
'Domestic Consumption',
|
||||
'Domestic Demand',
|
||||
'Food Use',
|
||||
'Industrial Use',
|
||||
'Seed Use',
|
||||
'Waste',
|
||||
'Feed Use'
|
||||
)
|
||||
GROUP BY
|
||||
commodity_code,
|
||||
country_code,
|
||||
market_year,
|
||||
ingest_date
|
||||
ORDER BY
|
||||
commodity_code,
|
||||
country_code,
|
||||
market_year,
|
||||
ingest_date
|
||||
@@ -0,0 +1,15 @@
|
||||
/* Commodity dimension: conforms identifiers across source systems. */ /* This is the ontology. Each row is a commodity tracked by BeanFlows. */ /* As new sources are added (ICO, futures prices, satellite), their */ /* commodity identifiers are added as columns here — not as separate tables. */ /* As new commodities are added (cocoa, sugar), rows are added here. */ /* References: */ /* usda_commodity_code → staging.psdalldata__commodity.commodity_code (numeric string, e.g. '0711100') */ /* cftc_commodity_code → foundation.fct_cot_positioning.cftc_commodity_code (3-char, e.g. '083') */ /* NOTE: Defined as FULL model (not SEED) to guarantee leading-zero preservation. */ /* Pandas CSV loading converts '083' → 83 even with varchar column declarations. */
|
||||
MODEL (
|
||||
name foundation.dim_commodity,
|
||||
kind FULL
|
||||
);
|
||||
|
||||
SELECT
|
||||
usda_commodity_code,
|
||||
cftc_commodity_code,
|
||||
ticker,
|
||||
ice_stock_report_code,
|
||||
commodity_name,
|
||||
commodity_group
|
||||
FROM (VALUES
|
||||
('0711100', '083', 'KC=F', 'COFFEE-C', 'Coffee, Green', 'Softs')) AS t(usda_commodity_code, cftc_commodity_code, ticker, ice_stock_report_code, commodity_name, commodity_group)
|
||||
@@ -0,0 +1,58 @@
|
||||
/* Foundation fact: daily KC=F Coffee C futures prices. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Covers all available history from the landing directory. */ /* Grain: one row per trade_date. */ /* Dedup: hash of (trade_date, close) — if Yahoo Finance corrects a price, */ /* the new hash triggers a re-ingest on the next incremental run. */
|
||||
MODEL (
|
||||
name foundation.fct_coffee_prices,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column trade_date
|
||||
),
|
||||
grain (
|
||||
trade_date
|
||||
),
|
||||
start '1971-08-16',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@prices_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(Date AS DATE) AS trade_date,
|
||||
TRY_CAST(Open AS DOUBLE) AS open,
|
||||
TRY_CAST(High AS DOUBLE) AS high,
|
||||
TRY_CAST(Low AS DOUBLE) AS low,
|
||||
TRY_CAST(Close AS DOUBLE) AS close,
|
||||
TRY_CAST("Adj Close" AS DOUBLE) AS adj_close,
|
||||
TRY_CAST(Volume AS BIGINT) AS volume,
|
||||
filename AS source_file, /* Filename encodes the content hash — use as ingest identifier */
|
||||
HASH(Date, Close) AS hkey /* Dedup key: trade date + close price */
|
||||
FROM src
|
||||
WHERE
|
||||
NOT TRY_CAST(Date AS DATE) IS NULL AND NOT TRY_CAST(Close AS DOUBLE) IS NULL
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(trade_date) AS trade_date,
|
||||
ANY_VALUE(open) AS open,
|
||||
ANY_VALUE(high) AS high,
|
||||
ANY_VALUE(low) AS low,
|
||||
ANY_VALUE(close) AS close,
|
||||
ANY_VALUE(adj_close) AS adj_close,
|
||||
ANY_VALUE(volume) AS volume,
|
||||
ANY_VALUE(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM deduplicated
|
||||
WHERE
|
||||
trade_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,146 @@
|
||||
/* Foundation fact: CFTC COT positioning, weekly grain, all commodities. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* cleans column names, computes net positions (long - short) per trader category, */ /* and deduplicates via hash key. Covers all commodities — filtering to */ /* a specific commodity happens in the serving layer. */ /* Grain: one row per (cftc_commodity_code, report_date, cftc_contract_market_code) */ /* History: revisions appear as new rows with a later ingest_date. */ /* Serving layer picks max(ingest_date) per grain for latest view. */
|
||||
MODEL (
|
||||
name foundation.fct_cot_positioning,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (cftc_commodity_code, report_date, cftc_contract_market_code, ingest_date, report_type),
|
||||
start '2006-06-13',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@cot_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE,
|
||||
max_line_size = 10000000
|
||||
)
|
||||
UNION ALL BY NAME
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@cot_combined_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE,
|
||||
max_line_size = 10000000
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRIM(market_and_exchange_names) AS market_and_exchange_name, /* Identifiers */
|
||||
"Report_Date_as_YYYY-MM-DD"::DATE AS report_date,
|
||||
TRIM(cftc_commodity_code) AS cftc_commodity_code,
|
||||
TRIM(cftc_contract_market_code) AS cftc_contract_market_code,
|
||||
TRIM(contract_units) AS contract_units,
|
||||
TRIM("FutOnly_or_Combined") AS report_type, /* 'FutOnly' or 'Combined' — discriminates the two CFTC report variants */
|
||||
TRY_CAST(open_interest_all AS INT) AS open_interest, /* Open interest */ /* CFTC uses '.' as null for any field — use TRY_CAST throughout */
|
||||
TRY_CAST(prod_merc_positions_long_all AS INT) AS prod_merc_long, /* Producer / Merchant (commercial hedgers: exporters, processors) */
|
||||
TRY_CAST(prod_merc_positions_short_all AS INT) AS prod_merc_short,
|
||||
TRY_CAST(swap_positions_long_all AS INT) AS swap_long, /* Swap dealers */
|
||||
TRY_CAST("Swap__Positions_Short_All" AS INT) AS swap_short,
|
||||
TRY_CAST("Swap__Positions_Spread_All" AS INT) AS swap_spread,
|
||||
TRY_CAST(m_money_positions_long_all AS INT) AS managed_money_long, /* Managed money (hedge funds, CTAs — the primary speculative signal) */
|
||||
TRY_CAST(m_money_positions_short_all AS INT) AS managed_money_short,
|
||||
TRY_CAST(m_money_positions_spread_all AS INT) AS managed_money_spread,
|
||||
TRY_CAST(other_rept_positions_long_all AS INT) AS other_reportable_long, /* Other reportables */
|
||||
TRY_CAST(other_rept_positions_short_all AS INT) AS other_reportable_short,
|
||||
TRY_CAST(other_rept_positions_spread_all AS INT) AS other_reportable_spread,
|
||||
TRY_CAST(nonrept_positions_long_all AS INT) AS nonreportable_long, /* Non-reportable (small speculators, below reporting threshold) */
|
||||
TRY_CAST(nonrept_positions_short_all AS INT) AS nonreportable_short,
|
||||
TRY_CAST(prod_merc_positions_long_all AS INT) /* Net positions (long minus short per category) */ - TRY_CAST(prod_merc_positions_short_all AS INT) AS prod_merc_net,
|
||||
TRY_CAST(m_money_positions_long_all AS INT) - TRY_CAST(m_money_positions_short_all AS INT) AS managed_money_net,
|
||||
TRY_CAST(swap_positions_long_all AS INT) - TRY_CAST("Swap__Positions_Short_All" AS INT) AS swap_net,
|
||||
TRY_CAST(other_rept_positions_long_all AS INT) - TRY_CAST(other_rept_positions_short_all AS INT) AS other_reportable_net,
|
||||
TRY_CAST(nonrept_positions_long_all AS INT) - TRY_CAST(nonrept_positions_short_all AS INT) AS nonreportable_net,
|
||||
TRY_CAST(change_in_open_interest_all AS INT) AS change_open_interest, /* Week-over-week changes */
|
||||
TRY_CAST(change_in_m_money_long_all AS INT) AS change_managed_money_long,
|
||||
TRY_CAST(change_in_m_money_short_all AS INT) AS change_managed_money_short,
|
||||
TRY_CAST(change_in_m_money_long_all AS INT) - TRY_CAST(change_in_m_money_short_all AS INT) AS change_managed_money_net,
|
||||
TRY_CAST(change_in_prod_merc_long_all AS INT) AS change_prod_merc_long,
|
||||
TRY_CAST(change_in_prod_merc_short_all AS INT) AS change_prod_merc_short,
|
||||
TRY_CAST(conc_gross_le_4_tdr_long_all AS REAL) AS concentration_top4_long_pct, /* Concentration ratios (% of OI held by top 4 / top 8 traders) */
|
||||
TRY_CAST(conc_gross_le_4_tdr_short_all AS REAL) AS concentration_top4_short_pct,
|
||||
TRY_CAST(conc_gross_le_8_tdr_long_all AS REAL) AS concentration_top8_long_pct,
|
||||
TRY_CAST(conc_gross_le_8_tdr_short_all AS REAL) AS concentration_top8_short_pct,
|
||||
TRY_CAST(traders_tot_all AS INT) AS traders_total, /* Trader counts */
|
||||
TRY_CAST(traders_m_money_long_all AS INT) AS traders_managed_money_long,
|
||||
TRY_CAST(traders_m_money_short_all AS INT) AS traders_managed_money_short,
|
||||
TRY_CAST(traders_m_money_spread_all AS INT) AS traders_managed_money_spread,
|
||||
MAKE_DATE(STR_SPLIT(filename, '/')[-2]::INT, 1, 1) AS ingest_date, /* Ingest date: derived from landing path year directory */ /* Path: .../cot/{year}/{etag}.csv.gzip → extract year from [-2] */
|
||||
HASH(
|
||||
cftc_commodity_code,
|
||||
"Report_Date_as_YYYY-MM-DD",
|
||||
cftc_contract_market_code,
|
||||
"FutOnly_or_Combined",
|
||||
open_interest_all,
|
||||
m_money_positions_long_all,
|
||||
m_money_positions_short_all,
|
||||
prod_merc_positions_long_all,
|
||||
prod_merc_positions_short_all
|
||||
) AS hkey /* Dedup key: hash of business grain + key metrics; includes report variant so fut-only and combined rows get distinct keys */
|
||||
FROM src
|
||||
/* Reject rows with null commodity code or malformed date */
|
||||
WHERE
|
||||
NOT TRIM(cftc_commodity_code) IS NULL
|
||||
AND LENGTH(TRIM(cftc_commodity_code)) > 0
|
||||
AND NOT "Report_Date_as_YYYY-MM-DD"::DATE IS NULL
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(market_and_exchange_name) AS market_and_exchange_name,
|
||||
ANY_VALUE(report_date) AS report_date,
|
||||
ANY_VALUE(cftc_commodity_code) AS cftc_commodity_code,
|
||||
ANY_VALUE(cftc_contract_market_code) AS cftc_contract_market_code,
|
||||
ANY_VALUE(contract_units) AS contract_units,
|
||||
ANY_VALUE(open_interest) AS open_interest,
|
||||
ANY_VALUE(prod_merc_long) AS prod_merc_long,
|
||||
ANY_VALUE(prod_merc_short) AS prod_merc_short,
|
||||
ANY_VALUE(prod_merc_net) AS prod_merc_net,
|
||||
ANY_VALUE(swap_long) AS swap_long,
|
||||
ANY_VALUE(swap_short) AS swap_short,
|
||||
ANY_VALUE(swap_spread) AS swap_spread,
|
||||
ANY_VALUE(swap_net) AS swap_net,
|
||||
ANY_VALUE(managed_money_long) AS managed_money_long,
|
||||
ANY_VALUE(managed_money_short) AS managed_money_short,
|
||||
ANY_VALUE(managed_money_spread) AS managed_money_spread,
|
||||
ANY_VALUE(managed_money_net) AS managed_money_net,
|
||||
ANY_VALUE(other_reportable_long) AS other_reportable_long,
|
||||
ANY_VALUE(other_reportable_short) AS other_reportable_short,
|
||||
ANY_VALUE(other_reportable_spread) AS other_reportable_spread,
|
||||
ANY_VALUE(other_reportable_net) AS other_reportable_net,
|
||||
ANY_VALUE(nonreportable_long) AS nonreportable_long,
|
||||
ANY_VALUE(nonreportable_short) AS nonreportable_short,
|
||||
ANY_VALUE(nonreportable_net) AS nonreportable_net,
|
||||
ANY_VALUE(change_open_interest) AS change_open_interest,
|
||||
ANY_VALUE(change_managed_money_long) AS change_managed_money_long,
|
||||
ANY_VALUE(change_managed_money_short) AS change_managed_money_short,
|
||||
ANY_VALUE(change_managed_money_net) AS change_managed_money_net,
|
||||
ANY_VALUE(change_prod_merc_long) AS change_prod_merc_long,
|
||||
ANY_VALUE(change_prod_merc_short) AS change_prod_merc_short,
|
||||
ANY_VALUE(concentration_top4_long_pct) AS concentration_top4_long_pct,
|
||||
ANY_VALUE(concentration_top4_short_pct) AS concentration_top4_short_pct,
|
||||
ANY_VALUE(concentration_top8_long_pct) AS concentration_top8_long_pct,
|
||||
ANY_VALUE(concentration_top8_short_pct) AS concentration_top8_short_pct,
|
||||
ANY_VALUE(traders_total) AS traders_total,
|
||||
ANY_VALUE(traders_managed_money_long) AS traders_managed_money_long,
|
||||
ANY_VALUE(traders_managed_money_short) AS traders_managed_money_short,
|
||||
ANY_VALUE(traders_managed_money_spread) AS traders_managed_money_spread,
|
||||
ANY_VALUE(ingest_date) AS ingest_date,
|
||||
ANY_VALUE(report_type) AS report_type,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM deduplicated
|
||||
WHERE
|
||||
report_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,62 @@
|
||||
/* Foundation fact: ICE certified Coffee C (Arabica) aging report. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Grain: one row per (report_date, age_bucket). */ /* Age buckets represent how long coffee has been in certified storage. */ /* Port columns are in bags (60kg). */
|
||||
MODEL (
|
||||
name foundation.fct_ice_aging_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (report_date, age_bucket),
|
||||
start '2020-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@ice_aging_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(report_date AS DATE) AS report_date,
|
||||
age_bucket,
|
||||
TRY_CAST(antwerp_bags AS BIGINT) AS antwerp_bags,
|
||||
TRY_CAST(hamburg_bremen_bags AS BIGINT) AS hamburg_bremen_bags,
|
||||
TRY_CAST(houston_bags AS BIGINT) AS houston_bags,
|
||||
TRY_CAST(miami_bags AS BIGINT) AS miami_bags,
|
||||
TRY_CAST(new_orleans_bags AS BIGINT) AS new_orleans_bags,
|
||||
TRY_CAST(new_york_bags AS BIGINT) AS new_york_bags,
|
||||
TRY_CAST(total_bags AS BIGINT) AS total_bags,
|
||||
filename AS source_file,
|
||||
HASH(report_date, age_bucket, total_bags) AS hkey
|
||||
FROM src
|
||||
WHERE
|
||||
NOT TRY_CAST(report_date AS DATE) IS NULL
|
||||
AND NOT age_bucket IS NULL
|
||||
AND age_bucket <> ''
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(report_date) AS report_date,
|
||||
ANY_VALUE(age_bucket) AS age_bucket,
|
||||
ANY_VALUE(antwerp_bags) AS antwerp_bags,
|
||||
ANY_VALUE(hamburg_bremen_bags) AS hamburg_bremen_bags,
|
||||
ANY_VALUE(houston_bags) AS houston_bags,
|
||||
ANY_VALUE(miami_bags) AS miami_bags,
|
||||
ANY_VALUE(new_orleans_bags) AS new_orleans_bags,
|
||||
ANY_VALUE(new_york_bags) AS new_york_bags,
|
||||
ANY_VALUE(total_bags) AS total_bags,
|
||||
ANY_VALUE(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM deduplicated
|
||||
WHERE
|
||||
report_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Foundation fact: ICE certified Coffee C (Arabica) warehouse stocks. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* "Certified" means Coffee C graded and stamped as delivery-eligible */ /* against ICE futures contracts — a key physical supply indicator. */ /* Grain: one row per report_date. */
|
||||
MODEL (
|
||||
name foundation.fct_ice_warehouse_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '2000-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@ice_stocks_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(report_date AS DATE) AS report_date,
|
||||
TRY_CAST(total_certified_bags AS BIGINT) AS total_certified_bags,
|
||||
TRY_CAST(pending_grading_bags AS BIGINT) AS pending_grading_bags,
|
||||
filename AS source_file,
|
||||
HASH(report_date, total_certified_bags) AS hkey /* Dedup key: report date + total bags */
|
||||
FROM src
|
||||
WHERE
|
||||
NOT TRY_CAST(report_date AS DATE) IS NULL
|
||||
AND NOT TRY_CAST(total_certified_bags AS BIGINT) IS NULL
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(report_date) AS report_date,
|
||||
ANY_VALUE(total_certified_bags) AS total_certified_bags,
|
||||
ANY_VALUE(pending_grading_bags) AS pending_grading_bags,
|
||||
ANY_VALUE(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM deduplicated
|
||||
WHERE
|
||||
report_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,65 @@
|
||||
/* Foundation fact: ICE historical end-of-month Coffee C certified warehouse stocks by port. */ /* Reads directly from the landing zone, casts varchar columns to proper types, */ /* and deduplicates via hash key. */ /* Covers November 1996 to present (30-year history). */ /* Grain: one row per report_date (end-of-month). */ /* Port columns are in bags (60kg). */
|
||||
MODEL (
|
||||
name foundation.fct_ice_warehouse_stocks_by_port,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '1996-11-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
SELECT
|
||||
*
|
||||
FROM READ_CSV(
|
||||
@ice_stocks_by_port_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE
|
||||
)
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(report_date AS DATE) AS report_date,
|
||||
TRY_CAST(new_york_bags AS BIGINT) AS new_york_bags,
|
||||
TRY_CAST(new_orleans_bags AS BIGINT) AS new_orleans_bags,
|
||||
TRY_CAST(houston_bags AS BIGINT) AS houston_bags,
|
||||
TRY_CAST(miami_bags AS BIGINT) AS miami_bags,
|
||||
TRY_CAST(antwerp_bags AS BIGINT) AS antwerp_bags,
|
||||
TRY_CAST(hamburg_bremen_bags AS BIGINT) AS hamburg_bremen_bags,
|
||||
TRY_CAST(barcelona_bags AS BIGINT) AS barcelona_bags,
|
||||
TRY_CAST(virginia_bags AS BIGINT) AS virginia_bags,
|
||||
TRY_CAST(total_bags AS BIGINT) AS total_bags,
|
||||
filename AS source_file,
|
||||
HASH(report_date, total_bags) AS hkey
|
||||
FROM src
|
||||
WHERE
|
||||
NOT TRY_CAST(report_date AS DATE) IS NULL
|
||||
AND NOT TRY_CAST(total_bags AS BIGINT) IS NULL
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(report_date) AS report_date,
|
||||
ANY_VALUE(new_york_bags) AS new_york_bags,
|
||||
ANY_VALUE(new_orleans_bags) AS new_orleans_bags,
|
||||
ANY_VALUE(houston_bags) AS houston_bags,
|
||||
ANY_VALUE(miami_bags) AS miami_bags,
|
||||
ANY_VALUE(antwerp_bags) AS antwerp_bags,
|
||||
ANY_VALUE(hamburg_bremen_bags) AS hamburg_bremen_bags,
|
||||
ANY_VALUE(barcelona_bags) AS barcelona_bags,
|
||||
ANY_VALUE(virginia_bags) AS virginia_bags,
|
||||
ANY_VALUE(total_bags) AS total_bags,
|
||||
ANY_VALUE(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
*
|
||||
FROM deduplicated
|
||||
WHERE
|
||||
report_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,125 @@
|
||||
/* Foundation fact: daily weather observations for 12 coffee-growing regions. */
|
||||
/* Source: Open-Meteo (ERA5 reanalysis archive + forecast model for recent days) */
|
||||
/* Landing: LANDING_DIR/weather/{location_id}/{year}/{date}.json.gz */
|
||||
/* One file per (location_id, date). Content: flat Open-Meteo JSON per day. */
|
||||
/* Open-Meteo returns parallel arrays; execute.py splits them into per-day files. */
|
||||
/* Grain: (location_id, observation_date) — one row per location per day. */
|
||||
/* Dedup key: hash(location_id, date) — past weather is immutable. */
|
||||
/* location_id is parsed from filename: split(filename, '/')[-3] */
|
||||
/* Path structure: .../weather/{location_id}/{year}/{date}.json.gz */
|
||||
/* Crop stress flags: */
|
||||
/* is_frost — temp_min_c < 2.0°C (ICO Arabica frost damage threshold) */
|
||||
/* is_heat_stress — temp_max_c > 35.0°C (photosynthesis impairment) */
|
||||
/* is_drought — precipitation_mm < 1.0 (agronomic dry day) */
|
||||
/* is_high_vpd — vpd_max_kpa > 1.5 (significant plant water stress) */
|
||||
/* in_growing_season — simplified month-range flag by variety */
|
||||
MODEL (
|
||||
name foundation.fct_weather_daily,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column observation_date
|
||||
),
|
||||
grain (location_id, observation_date),
|
||||
start '2020-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH src AS (
|
||||
/* Open-Meteo files are flat JSON: all variables at top level (no nested structs). */
|
||||
/* read_json(format='auto') infers column types directly from the numeric values. */
|
||||
SELECT
|
||||
*
|
||||
FROM READ_JSON(@weather_glob(), format = 'auto', compression = 'gzip', filename = TRUE)
|
||||
), located AS (
|
||||
SELECT
|
||||
src.*,
|
||||
STR_SPLIT(filename, '/')[-3] AS location_id, /* location_id is the 3rd-from-last path segment: */
|
||||
/* e.g. .../weather/brazil_minas_gerais/2024/2024-01-15.json.gz → 'brazil_minas_gerais' */
|
||||
TRY_CAST(src."date" AS DATE) AS observation_date
|
||||
FROM src
|
||||
), cast_and_clean AS (
|
||||
SELECT
|
||||
location_id,
|
||||
observation_date,
|
||||
/* Temperature (°C) */
|
||||
TRY_CAST(located.temperature_2m_min AS DOUBLE) AS temp_min_c,
|
||||
TRY_CAST(located.temperature_2m_max AS DOUBLE) AS temp_max_c,
|
||||
TRY_CAST(located.temperature_2m_mean AS DOUBLE) AS temp_mean_c,
|
||||
/* Precipitation (mm total for the day) */
|
||||
COALESCE(TRY_CAST(located.precipitation_sum AS DOUBLE), 0.0) AS precipitation_mm,
|
||||
/* Humidity (% — daily max) */
|
||||
TRY_CAST(located.relative_humidity_2m_max AS DOUBLE) AS humidity_max_pct,
|
||||
/* Cloud cover (% — daily mean) */
|
||||
TRY_CAST(located.cloud_cover_mean AS DOUBLE) AS cloud_cover_mean_pct,
|
||||
/* Wind (m/s max — Open-Meteo requested with wind_speed_unit=ms) */
|
||||
TRY_CAST(located.wind_speed_10m_max AS DOUBLE) AS wind_max_speed_ms,
|
||||
/* ET₀ (mm/day — FAO Penman-Monteith; direct crop water demand signal) */
|
||||
TRY_CAST(located.et0_fao_evapotranspiration AS DOUBLE) AS et0_mm,
|
||||
/* VPD (kPa — max; >1.5 kPa = significant plant water stress) */
|
||||
TRY_CAST(located.vapour_pressure_deficit_max AS DOUBLE) AS vpd_max_kpa,
|
||||
/* Crop stress flags */
|
||||
TRY_CAST(located.temperature_2m_min AS DOUBLE) < 2.0 AS is_frost,
|
||||
TRY_CAST(located.temperature_2m_max AS DOUBLE) > 35.0 AS is_heat_stress,
|
||||
COALESCE(TRY_CAST(located.precipitation_sum AS DOUBLE), 0.0) < 1.0 AS is_drought,
|
||||
TRY_CAST(located.vapour_pressure_deficit_max AS DOUBLE) > 1.5 AS is_high_vpd,
|
||||
HASH(location_id, located."date") AS hkey,
|
||||
filename
|
||||
FROM located
|
||||
WHERE
|
||||
NOT observation_date IS NULL AND NOT location_id IS NULL AND location_id <> ''
|
||||
), deduplicated AS (
|
||||
SELECT
|
||||
ANY_VALUE(location_id) AS location_id,
|
||||
ANY_VALUE(observation_date) AS observation_date,
|
||||
ANY_VALUE(temp_min_c) AS temp_min_c,
|
||||
ANY_VALUE(temp_max_c) AS temp_max_c,
|
||||
ANY_VALUE(temp_mean_c) AS temp_mean_c,
|
||||
ANY_VALUE(precipitation_mm) AS precipitation_mm,
|
||||
ANY_VALUE(humidity_max_pct) AS humidity_max_pct,
|
||||
ANY_VALUE(cloud_cover_mean_pct) AS cloud_cover_mean_pct,
|
||||
ANY_VALUE(wind_max_speed_ms) AS wind_max_speed_ms,
|
||||
ANY_VALUE(et0_mm) AS et0_mm,
|
||||
ANY_VALUE(vpd_max_kpa) AS vpd_max_kpa,
|
||||
ANY_VALUE(is_frost) AS is_frost,
|
||||
ANY_VALUE(is_heat_stress) AS is_heat_stress,
|
||||
ANY_VALUE(is_drought) AS is_drought,
|
||||
ANY_VALUE(is_high_vpd) AS is_high_vpd,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
d.observation_date,
|
||||
d.location_id,
|
||||
loc.name AS location_name,
|
||||
loc.country,
|
||||
loc.lat,
|
||||
loc.lon,
|
||||
loc.variety,
|
||||
d.temp_min_c,
|
||||
d.temp_max_c,
|
||||
d.temp_mean_c,
|
||||
d.precipitation_mm,
|
||||
d.humidity_max_pct,
|
||||
d.cloud_cover_mean_pct,
|
||||
d.wind_max_speed_ms,
|
||||
d.et0_mm,
|
||||
d.vpd_max_kpa,
|
||||
d.is_frost,
|
||||
d.is_heat_stress,
|
||||
d.is_drought,
|
||||
d.is_high_vpd,
|
||||
CASE loc.variety
|
||||
WHEN 'Arabica'
|
||||
THEN EXTRACT(MONTH FROM d.observation_date) BETWEEN 4 AND 10
|
||||
WHEN 'Robusta'
|
||||
THEN EXTRACT(MONTH FROM d.observation_date) BETWEEN 4 AND 11
|
||||
ELSE FALSE
|
||||
END AS in_growing_season /* Growing season: simplified month-range flag by variety. */
|
||||
/* Arabica: Apr–Oct (covers northern + southern hemisphere risk windows). */
|
||||
/* Robusta: Apr–Nov (Vietnam/Indonesia main cycle). */
|
||||
FROM deduplicated AS d
|
||||
LEFT JOIN seeds.weather_locations AS loc
|
||||
ON d.location_id = loc.location_id
|
||||
WHERE
|
||||
d.observation_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,7 @@
|
||||
MODEL (
|
||||
name seeds.psd_attribute_codes,
|
||||
kind SEED (
|
||||
path '$root/seeds/psd_attribute_codes.csv',
|
||||
csv_settings (delimiter = ';')
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
MODEL (
|
||||
name seeds.psd_commodity_codes,
|
||||
kind SEED (
|
||||
path '$root/seeds/psd_commodity_codes.csv',
|
||||
csv_settings (delimiter = ';')
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
MODEL (
|
||||
name seeds.psd_unit_of_measure_codes,
|
||||
kind SEED (
|
||||
path '$root/seeds/psd_unit_of_measure_codes.csv',
|
||||
csv_settings (delimiter = ';')
|
||||
)
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
MODEL (
|
||||
name seeds.weather_locations,
|
||||
kind SEED (
|
||||
path '$root/seeds/weather_locations.csv',
|
||||
csv_settings (delimiter = ';')
|
||||
)
|
||||
)
|
||||
63
transform/sqlmesh_beanflows/models/serving/coffee_prices.sql
Normal file
63
transform/sqlmesh_beanflows/models/serving/coffee_prices.sql
Normal file
@@ -0,0 +1,63 @@
|
||||
/* Serving mart: KC=F Coffee C futures prices, analytics-ready. */ /* Adds moving averages (20-day, 50-day SMA) and 52-week high/low range. */ /* Filtered to trading days only (NULL close rows excluded upstream). */ /* Grain: one row per trade_date. */
|
||||
MODEL (
|
||||
name serving.coffee_prices,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column trade_date
|
||||
),
|
||||
grain (
|
||||
trade_date
|
||||
),
|
||||
start '1971-08-16',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.trade_date,
|
||||
f.open,
|
||||
f.high,
|
||||
f.low,
|
||||
f.close,
|
||||
f.adj_close,
|
||||
f.volume,
|
||||
ROUND(
|
||||
(
|
||||
f.close - LAG(f.close, 1) OVER (ORDER BY f.trade_date)
|
||||
) / NULLIF(LAG(f.close, 1) OVER (ORDER BY f.trade_date), 0) * 100,
|
||||
4
|
||||
) AS daily_return_pct, /* Daily return: (close - prev_close) / prev_close * 100 */
|
||||
ROUND(
|
||||
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 19 PRECEDING AND CURRENT ROW),
|
||||
4
|
||||
) AS sma_20d, /* 20-day simple moving average (1 trading month) */
|
||||
ROUND(
|
||||
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 49 PRECEDING AND CURRENT ROW),
|
||||
4
|
||||
) AS sma_50d, /* 50-day simple moving average (2.5 trading months) */
|
||||
MAX(f.high) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW) AS high_52w, /* 52-week high (approximately 252 trading days) */
|
||||
MIN(f.low) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW) AS low_52w /* 52-week low */
|
||||
FROM foundation.fct_coffee_prices AS f
|
||||
WHERE
|
||||
f.trade_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
SELECT
|
||||
b.trade_date,
|
||||
d.commodity_name,
|
||||
d.ticker,
|
||||
b.open,
|
||||
b.high,
|
||||
b.low,
|
||||
b.close,
|
||||
b.adj_close,
|
||||
b.volume,
|
||||
b.daily_return_pct,
|
||||
b.sma_20d,
|
||||
b.sma_50d,
|
||||
b.high_52w,
|
||||
b.low_52w
|
||||
FROM base AS b
|
||||
CROSS JOIN foundation.dim_commodity AS d
|
||||
WHERE
|
||||
d.ticker = 'KC=F'
|
||||
ORDER BY
|
||||
b.trade_date
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Serving mart: ICE certified Coffee C stock aging report, analytics-ready. */ /* Shows the age distribution of certified stocks across delivery ports. */ /* Age buckets represent how long coffee has been in certified storage. */ /* Older stock approaching certificate limits is a supply quality signal. */ /* Source: ICE Certified Stock Aging Report (monthly) */ /* Grain: one row per (report_date, age_bucket). */
|
||||
MODEL (
|
||||
name serving.ice_aging_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (report_date, age_bucket),
|
||||
start '2020-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.report_date,
|
||||
f.age_bucket,
|
||||
TRY_CAST(SPLIT_PART(f.age_bucket, ' to ', 1) AS INT) AS age_bucket_start_days, /* Parse age range from "0000 to 0120" format for correct sort order */
|
||||
TRY_CAST(SPLIT_PART(f.age_bucket, ' to ', 2) AS INT) AS age_bucket_end_days,
|
||||
f.antwerp_bags,
|
||||
f.hamburg_bremen_bags,
|
||||
f.houston_bags,
|
||||
f.miami_bags,
|
||||
f.new_orleans_bags,
|
||||
f.new_york_bags,
|
||||
f.total_bags,
|
||||
f.source_file
|
||||
FROM foundation.fct_ice_aging_stocks AS f
|
||||
WHERE
|
||||
f.report_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
SELECT
|
||||
b.report_date,
|
||||
d.commodity_name,
|
||||
d.ice_stock_report_code,
|
||||
b.age_bucket,
|
||||
b.age_bucket_start_days,
|
||||
b.age_bucket_end_days,
|
||||
b.antwerp_bags,
|
||||
b.hamburg_bremen_bags,
|
||||
b.houston_bags,
|
||||
b.miami_bags,
|
||||
b.new_orleans_bags,
|
||||
b.new_york_bags,
|
||||
b.total_bags,
|
||||
b.source_file
|
||||
FROM base AS b
|
||||
CROSS JOIN foundation.dim_commodity AS d
|
||||
WHERE
|
||||
d.ice_stock_report_code = 'COFFEE-C'
|
||||
ORDER BY
|
||||
b.report_date,
|
||||
b.age_bucket_start_days
|
||||
@@ -0,0 +1,53 @@
|
||||
/* Serving mart: ICE certified Coffee C warehouse stocks, analytics-ready. */ /* Adds 30-day rolling average, week-over-week change, and drawdown from */ /* 52-week high. Physical supply indicator used alongside S/D and positioning. */ /* "Certified stocks" = coffee graded and stamped as eligible for delivery */ /* against ICE Coffee C futures — traders watch this as a squeeze indicator. */ /* Grain: one row per report_date. */
|
||||
MODEL (
|
||||
name serving.ice_warehouse_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '2000-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.report_date,
|
||||
f.total_certified_bags,
|
||||
f.pending_grading_bags,
|
||||
f.total_certified_bags /* Week-over-week change (compare to 7 calendar days ago via LAG over ordered rows) */ /* Using LAG(1) since data is daily: compares to previous trading/reporting day */ - LAG(f.total_certified_bags, 1) OVER (ORDER BY f.report_date) AS wow_change_bags,
|
||||
ROUND(
|
||||
AVG(f.total_certified_bags::DOUBLE) OVER (ORDER BY f.report_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW),
|
||||
0
|
||||
) AS avg_30d_bags, /* 30-day rolling average (smooths daily noise) */
|
||||
MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW) AS high_52w_bags, /* 52-week high (365 calendar days ≈ 252 trading days; use 365-row window as proxy) */
|
||||
ROUND(
|
||||
(
|
||||
f.total_certified_bags::DOUBLE - MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW)::DOUBLE
|
||||
) / NULLIF(
|
||||
MAX(f.total_certified_bags) OVER (ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW)::DOUBLE,
|
||||
0
|
||||
) * 100,
|
||||
2
|
||||
) AS drawdown_from_52w_high_pct /* Drawdown from 52-week high (pct below peak — squeeze indicator) */
|
||||
FROM foundation.fct_ice_warehouse_stocks AS f
|
||||
WHERE
|
||||
f.report_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
SELECT
|
||||
b.report_date,
|
||||
d.commodity_name,
|
||||
d.ice_stock_report_code,
|
||||
b.total_certified_bags,
|
||||
b.pending_grading_bags,
|
||||
b.wow_change_bags,
|
||||
b.avg_30d_bags,
|
||||
b.high_52w_bags,
|
||||
b.drawdown_from_52w_high_pct
|
||||
FROM base AS b
|
||||
CROSS JOIN foundation.dim_commodity AS d
|
||||
WHERE
|
||||
d.ice_stock_report_code = 'COFFEE-C'
|
||||
ORDER BY
|
||||
b.report_date
|
||||
@@ -0,0 +1,64 @@
|
||||
/* Serving mart: ICE certified Coffee C warehouse stocks by port, analytics-ready. */ /* End-of-month certified stock levels broken down by delivery port. */ /* Covers November 1996 to present (~30 years). Useful for understanding */ /* geographic shifts in the certified supply base over time. */ /* Source: ICE historical by-port XLS (EOM_KC_cert_stox_by_port_nov96-present.xls) */ /* Grain: one row per report_date (end-of-month). */
|
||||
MODEL (
|
||||
name serving.ice_warehouse_stocks_by_port,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '1996-11-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.report_date,
|
||||
f.new_york_bags,
|
||||
f.new_orleans_bags,
|
||||
f.houston_bags,
|
||||
f.miami_bags,
|
||||
f.antwerp_bags,
|
||||
f.hamburg_bremen_bags,
|
||||
f.barcelona_bags,
|
||||
f.virginia_bags,
|
||||
f.total_bags,
|
||||
f.total_bags /* Month-over-month change in total certified bags */ - LAG(f.total_bags, 1) OVER (ORDER BY f.report_date) AS mom_change_bags,
|
||||
ROUND(
|
||||
(
|
||||
f.total_bags::DOUBLE - LAG(f.total_bags, 1) OVER (ORDER BY f.report_date)::DOUBLE
|
||||
) / NULLIF(LAG(f.total_bags, 1) OVER (ORDER BY f.report_date)::DOUBLE, 0) * 100,
|
||||
2
|
||||
) AS mom_change_pct, /* Month-over-month percent change */
|
||||
ROUND(
|
||||
AVG(f.total_bags::DOUBLE) OVER (ORDER BY f.report_date ROWS BETWEEN 11 PRECEDING AND CURRENT ROW),
|
||||
0
|
||||
) AS avg_12m_bags, /* 12-month rolling average */
|
||||
f.source_file
|
||||
FROM foundation.fct_ice_warehouse_stocks_by_port AS f
|
||||
WHERE
|
||||
f.report_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
SELECT
|
||||
b.report_date,
|
||||
d.commodity_name,
|
||||
d.ice_stock_report_code,
|
||||
b.new_york_bags,
|
||||
b.new_orleans_bags,
|
||||
b.houston_bags,
|
||||
b.miami_bags,
|
||||
b.antwerp_bags,
|
||||
b.hamburg_bremen_bags,
|
||||
b.barcelona_bags,
|
||||
b.virginia_bags,
|
||||
b.total_bags,
|
||||
b.mom_change_bags,
|
||||
b.mom_change_pct,
|
||||
b.avg_12m_bags,
|
||||
b.source_file
|
||||
FROM base AS b
|
||||
CROSS JOIN foundation.dim_commodity AS d
|
||||
WHERE
|
||||
d.ice_stock_report_code = 'COFFEE-C'
|
||||
ORDER BY
|
||||
b.report_date
|
||||
@@ -0,0 +1,126 @@
|
||||
MODEL (
|
||||
name serving.commodity_metrics,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column ingest_date
|
||||
),
|
||||
start '2006-08-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
/* CTE to calculate country-level derived metrics */
|
||||
WITH country_metrics AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) AS Net_Supply, /* Derived metrics per country, mirroring Python script */
|
||||
(
|
||||
Exports - Imports
|
||||
) AS Trade_Balance,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) - Total_Distribution AS Supply_Demand_Balance,
|
||||
(
|
||||
Ending_Stocks / NULLIF(Total_Distribution, 0)
|
||||
) /* Handle division by zero for Stock-to-Use Ratio */ * 100 AS Stock_to_Use_Ratio_pct,
|
||||
(
|
||||
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date)
|
||||
) /* Calculate Production YoY percentage change using a window function */ / NULLIF(
|
||||
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date),
|
||||
0
|
||||
) * 100 AS Production_YoY_pct
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
), global_aggregates AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
NULL::TEXT AS country_code, /* Use NULL for global aggregates */
|
||||
'Global' AS country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
SUM(Production) AS Production,
|
||||
SUM(Imports) AS Imports,
|
||||
SUM(Exports) AS Exports,
|
||||
SUM(Total_Distribution) AS Total_Distribution,
|
||||
SUM(Ending_Stocks) AS Ending_Stocks
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
GROUP BY
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
market_year,
|
||||
ingest_date
|
||||
), global_metrics /* CTE to calculate derived metrics for global aggregates */ AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) AS Net_Supply,
|
||||
(
|
||||
Exports - Imports
|
||||
) AS Trade_Balance,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) - Total_Distribution AS Supply_Demand_Balance,
|
||||
(
|
||||
Ending_Stocks / NULLIF(Total_Distribution, 0)
|
||||
) * 100 AS Stock_to_Use_Ratio_pct,
|
||||
(
|
||||
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date)
|
||||
) / NULLIF(
|
||||
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date),
|
||||
0
|
||||
) * 100 AS Production_YoY_pct
|
||||
FROM global_aggregates
|
||||
)
|
||||
/* Combine country-level and global-level data into a single output */
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
Net_Supply,
|
||||
Trade_Balance,
|
||||
Supply_Demand_Balance,
|
||||
Stock_to_Use_Ratio_pct,
|
||||
Production_YoY_pct
|
||||
FROM (
|
||||
SELECT
|
||||
*
|
||||
FROM country_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM global_metrics
|
||||
) AS combined_data
|
||||
ORDER BY
|
||||
commodity_name,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date
|
||||
@@ -0,0 +1,148 @@
|
||||
/* Serving mart: COT positioning for Coffee C futures, analytics-ready. */ /* Joins foundation.fct_cot_positioning with foundation.dim_commodity so */ /* the coffee filter is driven by the dimension (not a hardcoded CFTC code). */ /* Adds derived analytics used by the dashboard and API: */ /* - Normalized positioning (% of open interest) */ /* - Long/short ratio */ /* - Week-over-week momentum */ /* - COT Index over 26-week and 52-week trailing windows (0=bearish, 100=bullish) */ /* Grain: one row per report_date for Coffee C futures. */ /* Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections. */
|
||||
MODEL (
|
||||
name serving.cot_positioning,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '2006-06-13',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH latest_revision AS (
|
||||
/* Pick the most recently ingested row when CFTC issues corrections */
|
||||
SELECT
|
||||
f.*
|
||||
FROM foundation.fct_cot_positioning AS f
|
||||
INNER JOIN foundation.dim_commodity AS d
|
||||
ON f.cftc_commodity_code = d.cftc_commodity_code
|
||||
WHERE
|
||||
d.commodity_name = 'Coffee, Green'
|
||||
AND f.report_type = 'FutOnly'
|
||||
AND f.report_date BETWEEN @start_ds AND @end_ds
|
||||
QUALIFY
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY f.report_date, f.cftc_contract_market_code
|
||||
ORDER BY f.ingest_date DESC
|
||||
) = 1
|
||||
), with_derived AS (
|
||||
SELECT
|
||||
report_date,
|
||||
market_and_exchange_name,
|
||||
cftc_commodity_code,
|
||||
cftc_contract_market_code,
|
||||
contract_units,
|
||||
ingest_date,
|
||||
open_interest, /* Absolute positions (contracts) */
|
||||
managed_money_long,
|
||||
managed_money_short,
|
||||
managed_money_spread,
|
||||
managed_money_net,
|
||||
prod_merc_long,
|
||||
prod_merc_short,
|
||||
prod_merc_net,
|
||||
swap_long,
|
||||
swap_short,
|
||||
swap_spread,
|
||||
swap_net,
|
||||
other_reportable_long,
|
||||
other_reportable_short,
|
||||
other_reportable_spread,
|
||||
other_reportable_net,
|
||||
nonreportable_long,
|
||||
nonreportable_short,
|
||||
nonreportable_net,
|
||||
ROUND(managed_money_net::REAL / NULLIF(open_interest, 0) * 100, 2) AS managed_money_net_pct_of_oi, /* Normalized: managed money net as % of open interest */ /* Removes size effects and makes cross-period comparison meaningful */
|
||||
ROUND(managed_money_long::REAL / NULLIF(managed_money_short, 0), 3) AS managed_money_long_short_ratio, /* Long/short ratio: >1 = more bulls than bears in managed money */
|
||||
change_open_interest, /* Weekly changes */
|
||||
change_managed_money_long,
|
||||
change_managed_money_short,
|
||||
change_managed_money_net,
|
||||
change_prod_merc_long,
|
||||
change_prod_merc_short,
|
||||
managed_money_net /* Week-over-week momentum in managed money net (via LAG) */ - LAG(managed_money_net, 1) OVER (ORDER BY report_date) AS managed_money_net_wow,
|
||||
concentration_top4_long_pct, /* Concentration */
|
||||
concentration_top4_short_pct,
|
||||
concentration_top8_long_pct,
|
||||
concentration_top8_short_pct,
|
||||
traders_total, /* Trader counts */
|
||||
traders_managed_money_long,
|
||||
traders_managed_money_short,
|
||||
traders_managed_money_spread,
|
||||
CASE
|
||||
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
|
||||
THEN 50.0
|
||||
ELSE ROUND(
|
||||
(
|
||||
managed_money_net - MIN(managed_money_net) OVER w26
|
||||
)::REAL / (
|
||||
MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26
|
||||
) * 100,
|
||||
1
|
||||
)
|
||||
END AS cot_index_26w, /* COT Index (26-week): where is current net vs. trailing 26 weeks? */ /* 0 = most bearish extreme, 100 = most bullish extreme */ /* Industry-standard sentiment gauge (equivalent to RSI for positioning) */
|
||||
CASE
|
||||
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
|
||||
THEN 50.0
|
||||
ELSE ROUND(
|
||||
(
|
||||
managed_money_net - MIN(managed_money_net) OVER w52
|
||||
)::REAL / (
|
||||
MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52
|
||||
) * 100,
|
||||
1
|
||||
)
|
||||
END AS cot_index_52w /* COT Index (52-week): longer-term positioning context */
|
||||
FROM latest_revision
|
||||
WINDOW w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW), w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
|
||||
)
|
||||
SELECT
|
||||
report_date,
|
||||
market_and_exchange_name,
|
||||
cftc_commodity_code,
|
||||
cftc_contract_market_code,
|
||||
contract_units,
|
||||
ingest_date,
|
||||
open_interest,
|
||||
managed_money_long,
|
||||
managed_money_short,
|
||||
managed_money_spread,
|
||||
managed_money_net,
|
||||
prod_merc_long,
|
||||
prod_merc_short,
|
||||
prod_merc_net,
|
||||
swap_long,
|
||||
swap_short,
|
||||
swap_spread,
|
||||
swap_net,
|
||||
other_reportable_long,
|
||||
other_reportable_short,
|
||||
other_reportable_spread,
|
||||
other_reportable_net,
|
||||
nonreportable_long,
|
||||
nonreportable_short,
|
||||
nonreportable_net,
|
||||
managed_money_net_pct_of_oi,
|
||||
managed_money_long_short_ratio,
|
||||
change_open_interest,
|
||||
change_managed_money_long,
|
||||
change_managed_money_short,
|
||||
change_managed_money_net,
|
||||
change_prod_merc_long,
|
||||
change_prod_merc_short,
|
||||
managed_money_net_wow,
|
||||
concentration_top4_long_pct,
|
||||
concentration_top4_short_pct,
|
||||
concentration_top8_long_pct,
|
||||
concentration_top8_short_pct,
|
||||
traders_total,
|
||||
traders_managed_money_long,
|
||||
traders_managed_money_short,
|
||||
traders_managed_money_spread,
|
||||
cot_index_26w,
|
||||
cot_index_52w
|
||||
FROM with_derived
|
||||
ORDER BY
|
||||
report_date
|
||||
@@ -0,0 +1,148 @@
|
||||
/* Serving mart: COT positioning (combined futures+options) for Coffee C futures. */ /* Same analytics as serving.cot_positioning, but filtered to the combined */ /* report variant (FutOnly_or_Combined = 'Combined'). Positions include */ /* options delta-equivalent exposure, showing total directional market bet. */ /* Grain: one row per report_date for Coffee C futures. */ /* Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections. */
|
||||
MODEL (
|
||||
name serving.cot_positioning_combined,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (
|
||||
report_date
|
||||
),
|
||||
start '2006-06-13',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH latest_revision AS (
|
||||
/* Pick the most recently ingested row when CFTC issues corrections */
|
||||
SELECT
|
||||
f.*
|
||||
FROM foundation.fct_cot_positioning AS f
|
||||
INNER JOIN foundation.dim_commodity AS d
|
||||
ON f.cftc_commodity_code = d.cftc_commodity_code
|
||||
WHERE
|
||||
d.commodity_name = 'Coffee, Green'
|
||||
AND f.report_type = 'Combined'
|
||||
AND f.report_date BETWEEN @start_ds AND @end_ds
|
||||
QUALIFY
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY f.report_date, f.cftc_contract_market_code
|
||||
ORDER BY f.ingest_date DESC
|
||||
) = 1
|
||||
), with_derived AS (
|
||||
SELECT
|
||||
report_date,
|
||||
market_and_exchange_name,
|
||||
cftc_commodity_code,
|
||||
cftc_contract_market_code,
|
||||
contract_units,
|
||||
ingest_date,
|
||||
open_interest, /* Absolute positions (contracts, delta-equivalent for options) */
|
||||
managed_money_long,
|
||||
managed_money_short,
|
||||
managed_money_spread,
|
||||
managed_money_net,
|
||||
prod_merc_long,
|
||||
prod_merc_short,
|
||||
prod_merc_net,
|
||||
swap_long,
|
||||
swap_short,
|
||||
swap_spread,
|
||||
swap_net,
|
||||
other_reportable_long,
|
||||
other_reportable_short,
|
||||
other_reportable_spread,
|
||||
other_reportable_net,
|
||||
nonreportable_long,
|
||||
nonreportable_short,
|
||||
nonreportable_net,
|
||||
ROUND(managed_money_net::REAL / NULLIF(open_interest, 0) * 100, 2) AS managed_money_net_pct_of_oi, /* Normalized: managed money net as % of open interest */ /* Removes size effects and makes cross-period comparison meaningful */
|
||||
ROUND(managed_money_long::REAL / NULLIF(managed_money_short, 0), 3) AS managed_money_long_short_ratio, /* Long/short ratio: >1 = more bulls than bears in managed money */
|
||||
change_open_interest, /* Weekly changes */
|
||||
change_managed_money_long,
|
||||
change_managed_money_short,
|
||||
change_managed_money_net,
|
||||
change_prod_merc_long,
|
||||
change_prod_merc_short,
|
||||
managed_money_net /* Week-over-week momentum in managed money net (via LAG) */ - LAG(managed_money_net, 1) OVER (ORDER BY report_date) AS managed_money_net_wow,
|
||||
concentration_top4_long_pct, /* Concentration */
|
||||
concentration_top4_short_pct,
|
||||
concentration_top8_long_pct,
|
||||
concentration_top8_short_pct,
|
||||
traders_total, /* Trader counts */
|
||||
traders_managed_money_long,
|
||||
traders_managed_money_short,
|
||||
traders_managed_money_spread,
|
||||
CASE
|
||||
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
|
||||
THEN 50.0
|
||||
ELSE ROUND(
|
||||
(
|
||||
managed_money_net - MIN(managed_money_net) OVER w26
|
||||
)::REAL / (
|
||||
MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26
|
||||
) * 100,
|
||||
1
|
||||
)
|
||||
END AS cot_index_26w, /* COT Index (26-week): where is current net vs. trailing 26 weeks? */ /* 0 = most bearish extreme, 100 = most bullish extreme */ /* Includes options delta-equivalent exposure */
|
||||
CASE
|
||||
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
|
||||
THEN 50.0
|
||||
ELSE ROUND(
|
||||
(
|
||||
managed_money_net - MIN(managed_money_net) OVER w52
|
||||
)::REAL / (
|
||||
MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52
|
||||
) * 100,
|
||||
1
|
||||
)
|
||||
END AS cot_index_52w /* COT Index (52-week): longer-term positioning context */
|
||||
FROM latest_revision
|
||||
WINDOW w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW), w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
|
||||
)
|
||||
SELECT
|
||||
report_date,
|
||||
market_and_exchange_name,
|
||||
cftc_commodity_code,
|
||||
cftc_contract_market_code,
|
||||
contract_units,
|
||||
ingest_date,
|
||||
open_interest,
|
||||
managed_money_long,
|
||||
managed_money_short,
|
||||
managed_money_spread,
|
||||
managed_money_net,
|
||||
prod_merc_long,
|
||||
prod_merc_short,
|
||||
prod_merc_net,
|
||||
swap_long,
|
||||
swap_short,
|
||||
swap_spread,
|
||||
swap_net,
|
||||
other_reportable_long,
|
||||
other_reportable_short,
|
||||
other_reportable_spread,
|
||||
other_reportable_net,
|
||||
nonreportable_long,
|
||||
nonreportable_short,
|
||||
nonreportable_net,
|
||||
managed_money_net_pct_of_oi,
|
||||
managed_money_long_short_ratio,
|
||||
change_open_interest,
|
||||
change_managed_money_long,
|
||||
change_managed_money_short,
|
||||
change_managed_money_net,
|
||||
change_prod_merc_long,
|
||||
change_prod_merc_short,
|
||||
managed_money_net_wow,
|
||||
concentration_top4_long_pct,
|
||||
concentration_top4_short_pct,
|
||||
concentration_top8_long_pct,
|
||||
concentration_top8_short_pct,
|
||||
traders_total,
|
||||
traders_managed_money_long,
|
||||
traders_managed_money_short,
|
||||
traders_managed_money_spread,
|
||||
cot_index_26w,
|
||||
cot_index_52w
|
||||
FROM with_derived
|
||||
ORDER BY
|
||||
report_date
|
||||
187
transform/sqlmesh_beanflows/models/serving/weather_daily.sql
Normal file
187
transform/sqlmesh_beanflows/models/serving/weather_daily.sql
Normal file
@@ -0,0 +1,187 @@
|
||||
/* Serving mart: daily weather analytics for 12 coffee-growing regions. */
|
||||
/* Source: foundation.fct_weather_daily (already has seed join for location metadata). */
|
||||
/* Adds rolling aggregates, water balance, gaps-and-islands streak counters, */
|
||||
/* and a composite crop stress index (0–100) as a single severity gauge. */
|
||||
/* Grain: (location_id, observation_date) */
|
||||
/* Lookback 90: rolling windows reach up to 30 days, streak counters can extend */
|
||||
/* up to ~90 days; without lookback a daily run sees only 1 row and all window */
|
||||
/* functions degrade to single-row values. */
|
||||
MODEL (
|
||||
name serving.weather_daily,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column observation_date,
|
||||
lookback 90
|
||||
),
|
||||
grain (location_id, observation_date),
|
||||
start '2020-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
observation_date,
|
||||
location_id,
|
||||
location_name,
|
||||
country,
|
||||
lat,
|
||||
lon,
|
||||
variety,
|
||||
temp_min_c,
|
||||
temp_max_c,
|
||||
temp_mean_c,
|
||||
precipitation_mm,
|
||||
humidity_max_pct,
|
||||
cloud_cover_mean_pct,
|
||||
wind_max_speed_ms,
|
||||
et0_mm,
|
||||
vpd_max_kpa,
|
||||
is_frost,
|
||||
is_heat_stress,
|
||||
is_drought,
|
||||
is_high_vpd,
|
||||
in_growing_season,
|
||||
/* Rolling precipitation — w7 = trailing 7 days, w30 = trailing 30 days */
|
||||
SUM(precipitation_mm) OVER w7 AS precip_sum_7d_mm,
|
||||
SUM(precipitation_mm) OVER w30 AS precip_sum_30d_mm,
|
||||
/* Rolling temperature baseline */
|
||||
AVG(temp_mean_c) OVER w30 AS temp_mean_30d_c,
|
||||
/* Temperature anomaly: today vs trailing 30-day mean */
|
||||
temp_mean_c - AVG(temp_mean_c) OVER w30 AS temp_anomaly_c,
|
||||
/* Water balance: net daily water gain/loss (precipitation minus evapotranspiration) */
|
||||
precipitation_mm - et0_mm AS water_balance_mm,
|
||||
SUM(precipitation_mm - et0_mm) OVER w7 AS water_balance_7d_mm,
|
||||
/* Gaps-and-islands group markers for streak counting. */
|
||||
/* Pattern: ROW_NUMBER() - running_count_of_true creates a stable group ID */
|
||||
/* for each consecutive run of TRUE. Rows where flag=FALSE get a unique group ID */
|
||||
/* (so their streak length stays 0 after the CASE in with_streaks). */
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_drought THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _drought_group,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_heat_stress THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _heat_group,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_high_vpd THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _vpd_group
|
||||
FROM foundation.fct_weather_daily
|
||||
WHERE
|
||||
observation_date BETWEEN @start_ds AND @end_ds
|
||||
WINDOW
|
||||
w7 AS (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||||
),
|
||||
w30 AS (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||||
)
|
||||
), with_streaks AS (
|
||||
SELECT
|
||||
base.*,
|
||||
/* Drought streak: number of consecutive dry days ending on observation_date. */
|
||||
/* Returns 0 when flag is FALSE (not a drought day). */
|
||||
CASE
|
||||
WHEN NOT is_drought
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _drought_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS drought_streak_days,
|
||||
/* Heat stress streak: consecutive days with temp_max > 35°C */
|
||||
CASE
|
||||
WHEN NOT is_heat_stress
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _heat_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS heat_streak_days,
|
||||
/* VPD stress streak: consecutive days with vpd_max > 1.5 kPa */
|
||||
CASE
|
||||
WHEN NOT is_high_vpd
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _vpd_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS vpd_streak_days
|
||||
FROM base
|
||||
)
|
||||
SELECT
|
||||
observation_date,
|
||||
location_id,
|
||||
location_name,
|
||||
country,
|
||||
lat,
|
||||
lon,
|
||||
variety,
|
||||
temp_min_c,
|
||||
temp_max_c,
|
||||
temp_mean_c,
|
||||
precipitation_mm,
|
||||
humidity_max_pct,
|
||||
cloud_cover_mean_pct,
|
||||
wind_max_speed_ms,
|
||||
et0_mm,
|
||||
vpd_max_kpa,
|
||||
is_frost,
|
||||
is_heat_stress,
|
||||
is_drought,
|
||||
is_high_vpd,
|
||||
in_growing_season,
|
||||
ROUND(precip_sum_7d_mm, 2) AS precip_sum_7d_mm,
|
||||
ROUND(precip_sum_30d_mm, 2) AS precip_sum_30d_mm,
|
||||
ROUND(temp_mean_30d_c, 2) AS temp_mean_30d_c,
|
||||
ROUND(temp_anomaly_c, 2) AS temp_anomaly_c,
|
||||
ROUND(water_balance_mm, 2) AS water_balance_mm,
|
||||
ROUND(water_balance_7d_mm, 2) AS water_balance_7d_mm,
|
||||
drought_streak_days,
|
||||
heat_streak_days,
|
||||
vpd_streak_days,
|
||||
/* Composite crop stress index (0–100).
|
||||
Weights: drought streak 30%, water deficit 25%, heat streak 20%,
|
||||
VPD streak 15%, frost (binary) 10%.
|
||||
Each component is normalized to [0,1] then capped before weighting:
|
||||
drought: 14 days = fully stressed
|
||||
water: 20mm 7d deficit = fully stressed
|
||||
heat: 7 days = fully stressed
|
||||
vpd: 7 days = fully stressed
|
||||
frost: binary (Arabica highland catastrophic event) */
|
||||
ROUND(
|
||||
GREATEST(0.0, LEAST(100.0,
|
||||
LEAST(1.0, drought_streak_days / 14.0) * 30.0
|
||||
+ LEAST(1.0, GREATEST(0.0, -water_balance_7d_mm) / 20.0) * 25.0
|
||||
+ LEAST(1.0, heat_streak_days / 7.0) * 20.0
|
||||
+ LEAST(1.0, vpd_streak_days / 7.0) * 15.0
|
||||
+ CASE WHEN is_frost THEN 10.0 ELSE 0.0 END
|
||||
)),
|
||||
1
|
||||
) AS crop_stress_index
|
||||
FROM with_streaks
|
||||
ORDER BY
|
||||
location_id,
|
||||
observation_date
|
||||
@@ -0,0 +1,95 @@
|
||||
MODEL (
|
||||
name staging.psdalldata__commodity,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column ingest_date
|
||||
),
|
||||
start '2006-08-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH cast_dtypes AS (
|
||||
SELECT
|
||||
src.commodity_code::INT AS commodity_code,
|
||||
COALESCE(commodity_name, commodity_description) AS commodity_name,
|
||||
country_code::TEXT AS country_code,
|
||||
country_name,
|
||||
market_year::INT AS market_year,
|
||||
calendar_year::INT AS calendar_year,
|
||||
month::INT AS month,
|
||||
src.attribute_id::INT AS attribute_id,
|
||||
COALESCE(attribute_name, attribute_description) AS attribute_name,
|
||||
src.unit_id::INT AS unit_id,
|
||||
COALESCE(unit_name, unit_description) AS unit_name,
|
||||
value::REAL AS value,
|
||||
filename
|
||||
FROM READ_CSV(
|
||||
@psd_glob(),
|
||||
compression = 'gzip',
|
||||
header = TRUE,
|
||||
union_by_name = TRUE,
|
||||
filename = TRUE,
|
||||
all_varchar = TRUE,
|
||||
max_line_size = 10000000
|
||||
) AS src
|
||||
LEFT JOIN seeds.psd_commodity_codes
|
||||
ON seeds.psd_commodity_codes.commodity_code = src.commodity_code::INT
|
||||
LEFT JOIN seeds.psd_unit_of_measure_codes
|
||||
ON seeds.psd_unit_of_measure_codes.unit_id = src.unit_id::INT
|
||||
LEFT JOIN seeds.psd_attribute_codes
|
||||
ON seeds.psd_attribute_codes.attribute_id = src.attribute_id::INT
|
||||
), metadata_and_deduplication AS (
|
||||
SELECT
|
||||
ANY_VALUE(commodity_code) AS commodity_code,
|
||||
ANY_VALUE(commodity_name) AS commodity_name,
|
||||
ANY_VALUE(country_code) AS country_code,
|
||||
ANY_VALUE(country_name) AS country_name,
|
||||
ANY_VALUE(market_year) AS market_year,
|
||||
ANY_VALUE(calendar_year) AS calendar_year,
|
||||
ANY_VALUE(month) AS month,
|
||||
ANY_VALUE(attribute_id) AS attribute_id,
|
||||
ANY_VALUE(attribute_name) AS attribute_name,
|
||||
ANY_VALUE(unit_id) AS unit_id,
|
||||
ANY_VALUE(unit_name) AS unit_name,
|
||||
ANY_VALUE(value) AS value,
|
||||
HASH(
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
calendar_year,
|
||||
month,
|
||||
attribute_id,
|
||||
attribute_name,
|
||||
unit_id,
|
||||
unit_name,
|
||||
value
|
||||
) AS hkey,
|
||||
ANY_VALUE(
|
||||
MAKE_DATE(STR_SPLIT(filename, '/')[-3]::INT, STR_SPLIT(filename, '/')[-2]::INT, 1)
|
||||
) AS ingest_date,
|
||||
ANY_VALUE(
|
||||
CASE WHEN month <> 0 THEN LAST_DAY(MAKE_DATE(market_year, month, 1)) ELSE NULL END
|
||||
) AS market_date_month_end
|
||||
FROM cast_dtypes
|
||||
GROUP BY
|
||||
hkey
|
||||
)
|
||||
SELECT
|
||||
hkey,
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
calendar_year,
|
||||
month,
|
||||
attribute_id,
|
||||
attribute_name,
|
||||
unit_id,
|
||||
unit_name,
|
||||
value,
|
||||
ingest_date
|
||||
FROM metadata_and_deduplication
|
||||
WHERE
|
||||
ingest_date BETWEEN @start_ds AND @end_ds
|
||||
19
transform/sqlmesh_beanflows/pyproject.toml
Normal file
19
transform/sqlmesh_beanflows/pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[project]
|
||||
name = "sqlmesh_beanflows"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
authors = [
|
||||
{ name = "Deeman", email = "hendriknote@gmail.com" }
|
||||
]
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"sqlmesh[duckdb,lsp]>=0.200.0",
|
||||
]
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["sqlmesh_beanflows"]
|
||||
82
transform/sqlmesh_beanflows/readme.md
Normal file
82
transform/sqlmesh_beanflows/readme.md
Normal file
@@ -0,0 +1,82 @@
|
||||
# Materia SQLMesh Transform Layer
|
||||
|
||||
Data transformation pipeline using SQLMesh and DuckDB, implementing a 3-layer architecture.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# From repo root
|
||||
|
||||
# Plan changes (dev environment)
|
||||
uv run sqlmesh -p transform/sqlmesh_materia plan
|
||||
|
||||
# Apply to production
|
||||
uv run sqlmesh -p transform/sqlmesh_materia plan prod
|
||||
|
||||
# Run model tests
|
||||
uv run sqlmesh -p transform/sqlmesh_materia test
|
||||
|
||||
# Format SQL
|
||||
uv run sqlmesh -p transform/sqlmesh_materia format
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
### 3-Layer Data Model
|
||||
|
||||
```
|
||||
landing/ ← immutable files (extraction output)
|
||||
├── psd/{year}/{month}/ ← USDA PSD
|
||||
├── cot/{year}/ ← CFTC COT
|
||||
├── prices/coffee_kc/ ← KC=F daily prices
|
||||
├── ice_stocks/ ← ICE daily warehouse stocks
|
||||
├── ice_aging/ ← ICE monthly aging report
|
||||
└── ice_stocks_by_port/ ← ICE historical EOM by port
|
||||
|
||||
staging/ ← read_csv + seed joins + cast (PSD)
|
||||
└── staging.psdalldata__commodity
|
||||
|
||||
seeds/ ← static lookup CSVs (PSD code mappings)
|
||||
├── seeds.psd_commodity_codes
|
||||
├── seeds.psd_attribute_codes
|
||||
└── seeds.psd_unit_of_measure_codes
|
||||
|
||||
foundation/ ← read_csv + cast + dedup (prices, COT, ICE)
|
||||
├── foundation.fct_coffee_prices
|
||||
├── foundation.fct_cot_positioning
|
||||
├── foundation.fct_ice_warehouse_stocks
|
||||
├── foundation.fct_ice_aging_stocks
|
||||
├── foundation.fct_ice_warehouse_stocks_by_port
|
||||
└── foundation.dim_commodity
|
||||
|
||||
serving/ ← pre-aggregated for web app
|
||||
├── serving.coffee_prices
|
||||
├── serving.cot_positioning
|
||||
├── serving.ice_warehouse_stocks
|
||||
├── serving.ice_aging_stocks
|
||||
├── serving.ice_warehouse_stocks_by_port
|
||||
└── serving.commodity_metrics
|
||||
```
|
||||
|
||||
### Layer responsibilities
|
||||
|
||||
**staging/** — PSD only: reads landing CSVs directly via `@psd_glob()`, joins seed lookup tables, casts types, deduplicates. Uses INCREMENTAL_BY_TIME_RANGE (ingest_date derived from filename path).
|
||||
|
||||
**seeds/** — Static lookup tables (commodity codes, attribute codes, unit of measure) loaded from `seeds/*.csv`. Referenced by staging.
|
||||
|
||||
**foundation/** — All other sources (prices, COT, ICE): reads landing data (e.g. CSVs) directly via glob macros, casts types, deduplicates. Uses INCREMENTAL_BY_TIME_RANGE. Also holds `dim_commodity` (the cross-source identity mapping).
|
||||
|
||||
**serving/** — Analytics-ready aggregates consumed by the web app via `analytics.duckdb`. Pre-computes moving averages, COT indices, MoM changes. These are the only tables the web app reads.
|
||||
|
||||
### Why no raw layer?
|
||||
|
||||
Landing files are immutable and content-addressed — the landing directory is the audit trail. A SQL raw layer would just duplicate file bytes into DuckDB with no added value. The first SQL layer reads directly from landing.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `LANDING_DIR` | `data/landing` | Root of the landing zone |
|
||||
| `DUCKDB_PATH` | `local.duckdb` | DuckDB file (SQLMesh exclusive write access) |
|
||||
|
||||
The web app reads from a separate `analytics.duckdb` via `export_serving.py`.
|
||||
0
transform/sqlmesh_beanflows/seeds/.gitkeep
Normal file
0
transform/sqlmesh_beanflows/seeds/.gitkeep
Normal file
@@ -0,0 +1,57 @@
|
||||
commodity_name,exchange_code,exchange
|
||||
Crude Oil WTI,CL,CME
|
||||
Crude Oil Brent,BZ,ICE
|
||||
Gasoline RBOB,RB,CME
|
||||
Heating Oil,HO,CME
|
||||
Natural Gas,NG,CME
|
||||
Ethanol,CU,CME
|
||||
Cocoa,CC,ICE
|
||||
Cotton,CT,ICE
|
||||
Orange Juice,FCOJ-A,ICE
|
||||
Coffee,KC,ICE
|
||||
Lumber,LBR,ICE
|
||||
Sugar,SB,ICE
|
||||
European Gas TTF,TTF,ICE
|
||||
European Union Emissions Allowance,ECF,ICE
|
||||
Gold,GC,CME
|
||||
Silver,SI,CME
|
||||
Platinum,PL,CME
|
||||
Copper,HG,CME
|
||||
Palladium,PA,CME
|
||||
Live Cattle,LE,CME
|
||||
Feeder Cattle,GF,CME
|
||||
Lean Hogs,HE,CME
|
||||
Corn,ZC,CME
|
||||
Soybean Oil,ZL,CME
|
||||
Soybean meal,ZM,CME
|
||||
Oats,ZO,CME
|
||||
Rough Rice,ZR,CME
|
||||
Soybeans,ZS,CME
|
||||
Wheat,ZW,CME
|
||||
Canola,RS,ICE
|
||||
Rebar,RB,SHFE
|
||||
Hot-Rolled Coil,HC,SHFE
|
||||
Nickel,NI,SHFE
|
||||
Tin,SN,SHFE
|
||||
Aluminum,AL,SHFE
|
||||
Zinc,ZN,SHFE
|
||||
Natural Rubber,RU,SHFE
|
||||
Bitumen,BU,SHFE
|
||||
Iron Ore,I,DCE
|
||||
Palm Oil,P,DCE
|
||||
Eggs,JD,DCE
|
||||
Coking Coal,JM,DCE
|
||||
Polyvinyl Chloride (PVC),V,DCE
|
||||
White Sugar,SR,ZCE
|
||||
Cotton,CF,ZCE
|
||||
Apple,AP,ZCE
|
||||
PTA,TA,ZCE
|
||||
Methanol,MA,ZCE
|
||||
LME Aluminum,AH,LME
|
||||
LME Copper,CA,LME
|
||||
LME Lead,PB,LME
|
||||
LME Nickel,NI,LME
|
||||
LME Tin,SN,LME
|
||||
LME Zinc,ZS,LME
|
||||
Iron Ore,TIO,SGX
|
||||
Rubber,TSR,SGX
|
||||
|
2
transform/sqlmesh_beanflows/seeds/dim_commodity.csv
Normal file
2
transform/sqlmesh_beanflows/seeds/dim_commodity.csv
Normal file
@@ -0,0 +1,2 @@
|
||||
usda_commodity_code;cftc_commodity_code;commodity_name;commodity_group
|
||||
"0711100";"083";"Coffee, Green";"Softs"
|
||||
|
133
transform/sqlmesh_beanflows/seeds/psd_attribute_codes.csv
Normal file
133
transform/sqlmesh_beanflows/seeds/psd_attribute_codes.csv
Normal file
@@ -0,0 +1,133 @@
|
||||
attribute_id;attribute_name
|
||||
1;Area Planted
|
||||
4;Area Harvested
|
||||
5;Catch For Reduction
|
||||
6;Cows In Milk
|
||||
7;Crush
|
||||
10;Total Grape Crush
|
||||
13;Deliv. To Processors
|
||||
16;Total Trees
|
||||
17;Bearing Trees
|
||||
19;Non-Bearing Trees
|
||||
20;Beginning Stocks
|
||||
22;Sow Beginning Stocks
|
||||
23;Dairy Cows Beg. Stocks
|
||||
24;Begin Stock (Ctrl App)
|
||||
25;Beef Cows Beg. Stocks
|
||||
26;Begin Stock (Other)
|
||||
28;Production
|
||||
29;Arabica Production
|
||||
30;Beet Sugar Production
|
||||
31;Commercial Production
|
||||
32;Cows Milk Production
|
||||
33;Farm Sales Weight Prod
|
||||
34;Filter Production
|
||||
40;Prod. from Wine Grapes
|
||||
43;Cane Sugar Production
|
||||
47;Non-Comm. Production
|
||||
48;Non-Filter Production
|
||||
49;Other Milk Production
|
||||
51;Prod. from Tabl Grapes
|
||||
53;Robusta Production
|
||||
54;Rough Production
|
||||
56;Other Production
|
||||
57;Imports
|
||||
58;Bean Imports
|
||||
62;Intra-EU Imports
|
||||
63;MY Imp. from U.S.
|
||||
64;Raw Imports
|
||||
65;U.S. Leaf Imports
|
||||
70;MY Imp. from EU
|
||||
71;Other Imports
|
||||
74;Refined Imp.(Raw Val)
|
||||
75;Roast & Ground Imports
|
||||
78;CY Imports
|
||||
81;TY Imports
|
||||
82;Soluble Imports
|
||||
83;CY Imp. from U.S.
|
||||
84;TY Imp. from U.S.
|
||||
86;Total Supply
|
||||
87;CY Exp. to U.S.
|
||||
88;Exports
|
||||
89;Raw Exports
|
||||
90;Bean Exports
|
||||
94;Intra EU Exports
|
||||
95;Intra-EU Exports
|
||||
97;MY Exp. to EU
|
||||
99;Refined Exp.(Raw Val)
|
||||
104;Other Exports
|
||||
107;Roast & Ground Exports
|
||||
110;CY Exports
|
||||
113;TY Exports
|
||||
114;Soluble Exports
|
||||
116;Slaughter (Reference)
|
||||
117;Total Slaughter
|
||||
118;Cow Slaughter
|
||||
120;Inventory (Reference)
|
||||
121;Sow Slaughter
|
||||
122;Calf Slaughter
|
||||
124;Other Slaughter
|
||||
125;Domestic Consumption
|
||||
126;Total Disappearance
|
||||
128;Dom. Leaf Consumption
|
||||
129;Dom.Consump(Cntrl App)
|
||||
130;Feed Dom. Consumption
|
||||
131;Fluid Use Dom. Consum.
|
||||
132;For Processing
|
||||
133;Fresh Dom. Consumption
|
||||
135;Fresh Dom. Consumption
|
||||
138;Human Consumption
|
||||
139;Human Dom. Consumption
|
||||
140;Industrial Dom. Cons.
|
||||
141;Rst,Ground Dom. Consum
|
||||
142;Domestic Use
|
||||
143;Utilization for Sugar
|
||||
145;Dom.Consump(Other)
|
||||
147;Factory Use Consum.
|
||||
149;Food Use Dom. Cons.
|
||||
150;Loss
|
||||
151;Other Disappearance
|
||||
152;Other Use, Losses
|
||||
154;Soluble Dom. Cons.
|
||||
155;U.S. Leaf Dom. Cons.
|
||||
157;Utilizatn for Alcohol
|
||||
158;Feed Use Dom. Consum.
|
||||
161;Feed Waste Dom. Cons.
|
||||
167;Other Foreign Cons.
|
||||
169;Withdrawal From Market
|
||||
172;Loss and Residual
|
||||
173;Total Disappearance
|
||||
174;Total Use
|
||||
175;Total Utilization
|
||||
176;Ending Stocks
|
||||
177;End Stocks (Cntrl App)
|
||||
178;Total Distribution
|
||||
179;End Stocks (Other)
|
||||
181;Extr. Rate, 999.9999
|
||||
182;Milling Rate (.9999)
|
||||
183;Seed to Lint Ratio
|
||||
184;YieldYield (Rough)
|
||||
192;FSI Consumption
|
||||
194;SME
|
||||
195;Stocks-to-UseStock to Use %
|
||||
196;Exportable Production
|
||||
198;Balance
|
||||
199;Inventory Balance
|
||||
200;Inventory Change
|
||||
201;Import Change
|
||||
202;Export Change
|
||||
203;Consumption Change
|
||||
204;Production Change
|
||||
205;Sow Change
|
||||
206;Cow Change
|
||||
207;Production to Cows
|
||||
208;Production to Sows
|
||||
209;Slaughter to Inventory
|
||||
210;Weights
|
||||
211;Population
|
||||
212;Per Capita Consumption
|
||||
213;Slaughter to Total Supply
|
||||
214;Imports Percent Consumption
|
||||
215;Exports Percent Production
|
||||
220;Annual % Change Per Cap. Cons.
|
||||
223;Stocks to Use (Months)
|
||||
|
@@ -0,0 +1,57 @@
|
||||
commodity_name,exchange_code,exchange,commodity_code
|
||||
Crude Oil WTI,CL,CME,NA
|
||||
Crude Oil Brent,BZ,ICE,NA
|
||||
Gasoline RBOB,RB,CME,NA
|
||||
Heating Oil,HO,CME,NA
|
||||
Natural Gas,NG,CME,NA
|
||||
Ethanol,CU,CME,NA
|
||||
Cocoa,CC,ICE,NA
|
||||
Cotton,CT,ICE,2631000
|
||||
Orange Juice,FCOJ-A,ICE,0585100
|
||||
Coffee,KC,ICE,0711100
|
||||
Lumber,LBR,ICE,NA
|
||||
Sugar,SB,ICE,0612000
|
||||
European Gas TTF,TTF,ICE,NA
|
||||
European Union Emissions Allowance,ECF,ICE,NA
|
||||
Gold,GC,CME,NA
|
||||
Silver,SI,CME,NA
|
||||
Platinum,PL,CME,NA
|
||||
Copper,HG,CME,NA
|
||||
Palladium,PA,CME,NA
|
||||
Live Cattle,LE,CME,0011000
|
||||
Feeder Cattle,GF,CME,0011000
|
||||
Lean Hogs,HE,CME,NA
|
||||
Corn,ZC,CME,0440000
|
||||
Soybean Oil,ZL,CME,4232000
|
||||
Soybean meal,ZM,CME,0813100
|
||||
Oats,ZO,CME,0452000
|
||||
Rough Rice,ZR,CME,0422110
|
||||
Soybeans,ZS,CME,NA
|
||||
Wheat,ZW,CME,0410000
|
||||
Canola,RS,ICE,2226000
|
||||
Rebar,RB,SHFE,NA
|
||||
Hot-Rolled Coil,HC,SHFE,NA
|
||||
Nickel,NI,SHFE,NA
|
||||
Tin,SN,SHFE,NA
|
||||
Aluminum,AL,SHFE,NA
|
||||
Zinc,ZN,SHFE,NA
|
||||
Natural Rubber,RU,SHFE,NA
|
||||
Bitumen,BU,SHFE,NA
|
||||
Iron Ore,I,DCE,NA
|
||||
Palm Oil,P,DCE,4243000
|
||||
Eggs,JD,DCE,NA
|
||||
Coking Coal,JM,DCE,NA
|
||||
Polyvinyl Chloride (PVC),V,DCE,NA
|
||||
White Sugar,SR,ZCE,0612000
|
||||
Cotton,CF,ZCE,2631000
|
||||
Apple,AP,ZCE,0574000
|
||||
PTA,TA,ZCE,NA
|
||||
Methanol,MA,ZCE,NA
|
||||
LME Aluminum,AH,LME,NA
|
||||
LME Copper,CA,LME,NA
|
||||
LME Lead,PB,LME,NA
|
||||
LME Nickel,NI,LME,NA
|
||||
LME Tin,SN,LME,NA
|
||||
LME Zinc,ZS,LME,NA
|
||||
Iron Ore,TIO,SGX,NA
|
||||
Rubber,TSR,SGX,NA
|
||||
|
65
transform/sqlmesh_beanflows/seeds/psd_commodity_codes.csv
Normal file
65
transform/sqlmesh_beanflows/seeds/psd_commodity_codes.csv
Normal file
@@ -0,0 +1,65 @@
|
||||
commodity_code; commodity_name
|
||||
0577400;Almonds, Shelled Basis
|
||||
0011000;Animal Numbers, Cattle
|
||||
0013000;Animal Numbers, Swine
|
||||
0574000;Apples, Fresh
|
||||
0430000;Barley
|
||||
0579305;Cherries (Sweet&Sour), Fresh
|
||||
0711100;Coffee, Green
|
||||
0440000;Corn
|
||||
2631000;Cotton
|
||||
0000000;Cotton (Metric Tons)
|
||||
0230000;Dairy, Butter
|
||||
0240000;Dairy, Cheese
|
||||
0224400;Dairy, Dry Whole Milk Powder
|
||||
0223000;Dairy, Milk, Fluid
|
||||
0224200;Dairy, Milk, Nonfat Dry
|
||||
0572220;Grapefruit, Fresh
|
||||
0575100;Grapes, Fresh Table
|
||||
0572120;Lemons/Limes, Fresh
|
||||
0813700;Meal, Copra
|
||||
0813300;Meal, Cottonseed
|
||||
0814200;Meal, Fish
|
||||
0813800;Meal, Palm Kernel
|
||||
0813200;Meal, Peanut
|
||||
0813600;Meal, Rapeseed
|
||||
0813100;Meal, Soybean
|
||||
0813101;Meal, Soybean (Local)
|
||||
0813500;Meal, Sunflowerseed
|
||||
0111000;Meat, Beef and Veal
|
||||
0115000;Meat, Chicken
|
||||
0113000;Meat, Swine
|
||||
0459100;Millet
|
||||
0459900;Mixed Grain
|
||||
0452000;Oats
|
||||
4242000;Oil, Coconut
|
||||
4233000;Oil, Cottonseed
|
||||
4235000;Oil, Olive
|
||||
4243000;Oil, Palm
|
||||
4244000;Oil, Palm Kernel
|
||||
4234000;Oil, Peanut
|
||||
4239100;Oil, Rapeseed
|
||||
4232000;Oil, Soybean
|
||||
4232001;Oil, Soybean (Local)
|
||||
4236000;Oil, Sunflowerseed
|
||||
2231000;Oilseed, Copra
|
||||
2223000;Oilseed, Cottonseed
|
||||
2232000;Oilseed, Palm Kernel
|
||||
2221000;Oilseed, Peanut
|
||||
2226000;Oilseed, Rapeseed
|
||||
2222000;Oilseed, Soybean
|
||||
2222001;Oilseed, Soybean (Local)
|
||||
2224000;Oilseed, Sunflowerseed
|
||||
0585100;Orange Juice
|
||||
0571120;Oranges, Fresh
|
||||
0579309;Peaches & Nectarines, Fresh
|
||||
0579220;Pears, Fresh
|
||||
0577907;Pistachios, Inshell Basis
|
||||
0114200;Poultry, Meat, Broiler
|
||||
0422110;Rice, Milled
|
||||
0451000;Rye
|
||||
0459200;Sorghum
|
||||
0612000;Sugar, Centrifugal
|
||||
0571220;Tangerines/Mandarins, Fresh
|
||||
0577901;Walnuts, Inshell Basis
|
||||
0410000;Wheat
|
||||
|
@@ -0,0 +1,43 @@
|
||||
unit_id; unit_name
|
||||
1;(1000 BUSHES)
|
||||
2;(1000 60 KG BAGS)
|
||||
3;(1000 COLONIES)
|
||||
4;(1000 HA)
|
||||
5;(1000 HEAD)
|
||||
6;(1000 HL)
|
||||
7;(1000 MT CWE)
|
||||
8;(1000 MT)
|
||||
9;(1000 PCS)
|
||||
10;(1000 TREES)
|
||||
11;(Dec. Fraction)
|
||||
12;(HA)
|
||||
13;(HECTARES)
|
||||
14;(KG)
|
||||
15;(MIL HEAD)
|
||||
16;(MIL PCS)
|
||||
17;(MILLION TREES)
|
||||
18;(MT RAW EQ)
|
||||
19;(MT RAW EW)
|
||||
20;(MT RE)
|
||||
21;(MT)
|
||||
22;(MT, Net Weight)
|
||||
23;(PERCENT)
|
||||
24;(RATIO)
|
||||
25;(1000 CUBIC METERS)
|
||||
26;(MT/HA)
|
||||
27;1000 480 lb. Bales
|
||||
28;(Bales/HA)
|
||||
29;(KG/HA)
|
||||
30;ACRES
|
||||
31;BUSHELS
|
||||
32;HUNDREDWEIGHT
|
||||
33;MILLING RATE
|
||||
34;BUSHELS/TON
|
||||
35;IMPORT MILLING RATE
|
||||
36;Bushels
|
||||
37;SHORT TONS
|
||||
38;MILLION LBS
|
||||
39;BILLION LBS
|
||||
40;(HEAD)
|
||||
41;(PEOPLE)
|
||||
42;(MONTHS)
|
||||
|
13
transform/sqlmesh_beanflows/seeds/weather_locations.csv
Normal file
13
transform/sqlmesh_beanflows/seeds/weather_locations.csv
Normal file
@@ -0,0 +1,13 @@
|
||||
location_id;name;country;lat;lon;variety
|
||||
brazil_minas_gerais;Minas Gerais;BR;-19.9167;-43.9345;Arabica
|
||||
brazil_parana;Paraná;BR;-23.4205;-51.9330;Arabica
|
||||
vietnam_highlands;Central Highlands;VN;12.6667;108.0500;Robusta
|
||||
colombia_huila;Huila;CO;2.5359;-75.5277;Arabica
|
||||
ethiopia_sidama;Sidama;ET;6.7612;38.4721;Arabica
|
||||
honduras_copan;Copán;HN;14.8333;-89.1500;Arabica
|
||||
guatemala_antigua;Antigua;GT;14.5586;-90.7295;Arabica
|
||||
indonesia_sumatra;Sumatra;ID;3.5952;98.6722;Robusta
|
||||
brazil_espirito_santo;Espírito Santo;BR;-19.3908;-40.0668;Robusta
|
||||
peru_jaen;Jaén;PE;-5.7064;-78.8077;Arabica
|
||||
uganda_elgon;Mount Elgon;UG;1.0826;34.1751;Robusta
|
||||
ivory_coast_daloa;Daloa;CI;6.8774;-6.4502;Robusta
|
||||
|
0
transform/sqlmesh_beanflows/tests/.gitkeep
Normal file
0
transform/sqlmesh_beanflows/tests/.gitkeep
Normal file
Reference in New Issue
Block a user