feat(extract): add OpenWeatherMap daily weather extractor

Adds extract/openweathermap package with daily weather extraction for 8
coffee-growing regions (Brazil, Vietnam, Colombia, Ethiopia, Honduras,
Guatemala, Indonesia). Feeds crop stress signal for commodity sentiment score.

Extractor:
- OWM One Call API 3.0 / Day Summary — one JSON.gz per (location, date)
- extract_weather: daily, fetches yesterday + today (16 calls max)
- extract_weather_backfill: fills 2020-01-01 to yesterday, capped at 500
  calls/run with resume cursor '{location_id}:{date}' for crash safety
- Full idempotency via file existence check; state tracking via extract_core

SQLMesh:
- seeds.weather_locations (8 regions with lat/lon/variety)
- foundation.fct_weather_daily: INCREMENTAL_BY_TIME_RANGE, grain
  (location_id, observation_date), dedup via hash key, crop stress flags:
  is_frost (<2°C), is_heat_stress (>35°C), is_drought (<1mm), in_growing_season

Landing path: LANDING_DIR/weather/{location_id}/{year}/{date}.json.gz

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-25 22:40:27 +01:00
parent c3c8333407
commit 08e74665bb
31 changed files with 1377 additions and 915 deletions

View File

@@ -1,41 +1,32 @@
-- Serving mart: COT positioning for Coffee C futures, analytics-ready.
--
-- Joins foundation.fct_cot_positioning with foundation.dim_commodity so
-- the coffee filter is driven by the dimension (not a hardcoded CFTC code).
-- Adds derived analytics used by the dashboard and API:
-- - Normalized positioning (% of open interest)
-- - Long/short ratio
-- - Week-over-week momentum
-- - COT Index over 26-week and 52-week trailing windows (0=bearish, 100=bullish)
--
-- Grain: one row per report_date for Coffee C futures.
-- Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections.
/* Serving mart: COT positioning for Coffee C futures, analytics-ready. */ /* Joins foundation.fct_cot_positioning with foundation.dim_commodity so */ /* the coffee filter is driven by the dimension (not a hardcoded CFTC code). */ /* Adds derived analytics used by the dashboard and API: */ /* - Normalized positioning (% of open interest) */ /* - Long/short ratio */ /* - Week-over-week momentum */ /* - COT Index over 26-week and 52-week trailing windows (0=bearish, 100=bullish) */ /* Grain: one row per report_date for Coffee C futures. */ /* Latest revision per date: MAX(ingest_date) used to deduplicate CFTC corrections. */
MODEL (
name serving.cot_positioning,
kind INCREMENTAL_BY_TIME_RANGE (
time_column report_date
),
grain (report_date),
grain (
report_date
),
start '2006-06-13',
cron '@daily'
);
WITH latest_revision AS (
-- Pick the most recently ingested row when CFTC issues corrections
SELECT f.*
FROM foundation.fct_cot_positioning f
INNER JOIN foundation.dim_commodity d
/* Pick the most recently ingested row when CFTC issues corrections */
SELECT
f.*
FROM foundation.fct_cot_positioning AS f
INNER JOIN foundation.dim_commodity AS d
ON f.cftc_commodity_code = d.cftc_commodity_code
WHERE d.commodity_name = 'Coffee, Green'
WHERE
d.commodity_name = 'Coffee, Green'
AND f.report_date BETWEEN @start_ds AND @end_ds
QUALIFY ROW_NUMBER() OVER (
PARTITION BY f.report_date, f.cftc_contract_market_code
ORDER BY f.ingest_date DESC
) = 1
),
with_derived AS (
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY f.report_date, f.cftc_contract_market_code
ORDER BY f.ingest_date DESC
) = 1
), with_derived AS (
SELECT
report_date,
market_and_exchange_name,
@@ -43,9 +34,7 @@ with_derived AS (
cftc_contract_market_code,
contract_units,
ingest_date,
-- Absolute positions (contracts)
open_interest,
open_interest, /* Absolute positions (contracts) */
managed_money_long,
managed_money_short,
managed_money_spread,
@@ -64,77 +53,52 @@ with_derived AS (
nonreportable_long,
nonreportable_short,
nonreportable_net,
-- Normalized: managed money net as % of open interest
-- Removes size effects and makes cross-period comparison meaningful
round(
managed_money_net::float / NULLIF(open_interest, 0) * 100,
2
) AS managed_money_net_pct_of_oi,
-- Long/short ratio: >1 = more bulls than bears in managed money
round(
managed_money_long::float / NULLIF(managed_money_short, 0),
3
) AS managed_money_long_short_ratio,
-- Weekly changes
change_open_interest,
ROUND(managed_money_net::REAL / NULLIF(open_interest, 0) * 100, 2) AS managed_money_net_pct_of_oi, /* Normalized: managed money net as % of open interest */ /* Removes size effects and makes cross-period comparison meaningful */
ROUND(managed_money_long::REAL / NULLIF(managed_money_short, 0), 3) AS managed_money_long_short_ratio, /* Long/short ratio: >1 = more bulls than bears in managed money */
change_open_interest, /* Weekly changes */
change_managed_money_long,
change_managed_money_short,
change_managed_money_net,
change_prod_merc_long,
change_prod_merc_short,
-- Week-over-week momentum in managed money net (via LAG)
managed_money_net - LAG(managed_money_net, 1) OVER (
ORDER BY report_date
) AS managed_money_net_wow,
-- Concentration
concentration_top4_long_pct,
managed_money_net /* Week-over-week momentum in managed money net (via LAG) */ - LAG(managed_money_net, 1) OVER (ORDER BY report_date) AS managed_money_net_wow,
concentration_top4_long_pct, /* Concentration */
concentration_top4_short_pct,
concentration_top8_long_pct,
concentration_top8_short_pct,
-- Trader counts
traders_total,
traders_total, /* Trader counts */
traders_managed_money_long,
traders_managed_money_short,
traders_managed_money_spread,
-- COT Index (26-week): where is current net vs. trailing 26 weeks?
-- 0 = most bearish extreme, 100 = most bullish extreme
-- Industry-standard sentiment gauge (equivalent to RSI for positioning)
CASE
WHEN MAX(managed_money_net) OVER w26 = MIN(managed_money_net) OVER w26
THEN 50.0
ELSE round(
(managed_money_net - MIN(managed_money_net) OVER w26)::float
/ (MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26)
* 100,
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w26
)::REAL / (
MAX(managed_money_net) OVER w26 - MIN(managed_money_net) OVER w26
) * 100,
1
)
END AS cot_index_26w,
-- COT Index (52-week): longer-term positioning context
END AS cot_index_26w, /* COT Index (26-week): where is current net vs. trailing 26 weeks? */ /* 0 = most bearish extreme, 100 = most bullish extreme */ /* Industry-standard sentiment gauge (equivalent to RSI for positioning) */
CASE
WHEN MAX(managed_money_net) OVER w52 = MIN(managed_money_net) OVER w52
THEN 50.0
ELSE round(
(managed_money_net - MIN(managed_money_net) OVER w52)::float
/ (MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52)
* 100,
THEN 50.0
ELSE ROUND(
(
managed_money_net - MIN(managed_money_net) OVER w52
)::REAL / (
MAX(managed_money_net) OVER w52 - MIN(managed_money_net) OVER w52
) * 100,
1
)
END AS cot_index_52w
END AS cot_index_52w /* COT Index (52-week): longer-term positioning context */
FROM latest_revision
WINDOW
w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW),
w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
WINDOW w26 AS (ORDER BY report_date ROWS BETWEEN 25 PRECEDING AND CURRENT ROW), w52 AS (ORDER BY report_date ROWS BETWEEN 51 PRECEDING AND CURRENT ROW)
)
SELECT *
SELECT
*
FROM with_derived
ORDER BY report_date
ORDER BY
report_date