feat(extract): add OpenWeatherMap daily weather extractor
Adds extract/openweathermap package with daily weather extraction for 8
coffee-growing regions (Brazil, Vietnam, Colombia, Ethiopia, Honduras,
Guatemala, Indonesia). Feeds crop stress signal for commodity sentiment score.
Extractor:
- OWM One Call API 3.0 / Day Summary — one JSON.gz per (location, date)
- extract_weather: daily, fetches yesterday + today (16 calls max)
- extract_weather_backfill: fills 2020-01-01 to yesterday, capped at 500
calls/run with resume cursor '{location_id}:{date}' for crash safety
- Full idempotency via file existence check; state tracking via extract_core
SQLMesh:
- seeds.weather_locations (8 regions with lat/lon/variety)
- foundation.fct_weather_daily: INCREMENTAL_BY_TIME_RANGE, grain
(location_id, observation_date), dedup via hash key, crop stress flags:
is_frost (<2°C), is_heat_stress (>35°C), is_drought (<1mm), in_growing_season
Landing path: LANDING_DIR/weather/{location_id}/{year}/{date}.json.gz
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,106 +1,126 @@
|
||||
MODEL (
|
||||
name serving.commodity_metrics,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column ingest_date
|
||||
),
|
||||
start '2006-08-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
-- CTE to calculate country-level derived metrics
|
||||
WITH country_metrics AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
-- Derived metrics per country, mirroring Python script
|
||||
(Production + Imports - Exports) AS Net_Supply,
|
||||
(Exports - Imports) AS Trade_Balance,
|
||||
(Production + Imports - Exports) - Total_Distribution AS Supply_Demand_Balance,
|
||||
-- Handle division by zero for Stock-to-Use Ratio
|
||||
(Ending_Stocks / NULLIF(Total_Distribution, 0)) * 100 AS Stock_to_Use_Ratio_pct,
|
||||
-- Calculate Production YoY percentage change using a window function
|
||||
(Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date)) / NULLIF(LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date), 0) * 100 AS Production_YoY_pct
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
),
|
||||
global_aggregates AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
NULL::TEXT AS country_code, -- Use NULL for global aggregates
|
||||
'Global' AS country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
SUM(Production) AS Production,
|
||||
SUM(Imports) AS Imports,
|
||||
SUM(Exports) AS Exports,
|
||||
SUM(Total_Distribution) AS Total_Distribution,
|
||||
SUM(Ending_Stocks) AS Ending_Stocks
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
GROUP BY
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
market_year,
|
||||
ingest_date
|
||||
),
|
||||
-- CTE to calculate derived metrics for global aggregates
|
||||
global_metrics AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
(Production + Imports - Exports) AS Net_Supply,
|
||||
(Exports - Imports) AS Trade_Balance,
|
||||
(Production + Imports - Exports) - Total_Distribution AS Supply_Demand_Balance,
|
||||
(Ending_Stocks / NULLIF(Total_Distribution, 0)) * 100 AS Stock_to_Use_Ratio_pct,
|
||||
(Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date)) / NULLIF(LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date), 0) * 100 AS Production_YoY_pct
|
||||
FROM global_aggregates
|
||||
)
|
||||
-- Combine country-level and global-level data into a single output
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
Net_Supply,
|
||||
Trade_Balance,
|
||||
Supply_Demand_Balance,
|
||||
Stock_to_Use_Ratio_pct,
|
||||
Production_YoY_pct
|
||||
FROM (
|
||||
SELECT
|
||||
*
|
||||
FROM country_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM global_metrics
|
||||
) AS combined_data
|
||||
ORDER BY
|
||||
commodity_name,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date;
|
||||
MODEL (
|
||||
name serving.commodity_metrics,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column ingest_date
|
||||
),
|
||||
start '2006-08-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
/* CTE to calculate country-level derived metrics */
|
||||
WITH country_metrics AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) AS Net_Supply, /* Derived metrics per country, mirroring Python script */
|
||||
(
|
||||
Exports - Imports
|
||||
) AS Trade_Balance,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) - Total_Distribution AS Supply_Demand_Balance,
|
||||
(
|
||||
Ending_Stocks / NULLIF(Total_Distribution, 0)
|
||||
) /* Handle division by zero for Stock-to-Use Ratio */ * 100 AS Stock_to_Use_Ratio_pct,
|
||||
(
|
||||
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date)
|
||||
) /* Calculate Production YoY percentage change using a window function */ / NULLIF(
|
||||
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY market_year, ingest_date),
|
||||
0
|
||||
) * 100 AS Production_YoY_pct
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
), global_aggregates AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
NULL::TEXT AS country_code, /* Use NULL for global aggregates */
|
||||
'Global' AS country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
SUM(Production) AS Production,
|
||||
SUM(Imports) AS Imports,
|
||||
SUM(Exports) AS Exports,
|
||||
SUM(Total_Distribution) AS Total_Distribution,
|
||||
SUM(Ending_Stocks) AS Ending_Stocks
|
||||
FROM cleaned.psdalldata__commodity_pivoted
|
||||
GROUP BY
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
market_year,
|
||||
ingest_date
|
||||
), global_metrics /* CTE to calculate derived metrics for global aggregates */ AS (
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) AS Net_Supply,
|
||||
(
|
||||
Exports - Imports
|
||||
) AS Trade_Balance,
|
||||
(
|
||||
Production + Imports - Exports
|
||||
) - Total_Distribution AS Supply_Demand_Balance,
|
||||
(
|
||||
Ending_Stocks / NULLIF(Total_Distribution, 0)
|
||||
) * 100 AS Stock_to_Use_Ratio_pct,
|
||||
(
|
||||
Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date)
|
||||
) / NULLIF(
|
||||
LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY market_year, ingest_date),
|
||||
0
|
||||
) * 100 AS Production_YoY_pct
|
||||
FROM global_aggregates
|
||||
)
|
||||
/* Combine country-level and global-level data into a single output */
|
||||
SELECT
|
||||
commodity_code,
|
||||
commodity_name,
|
||||
country_code,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date,
|
||||
Production,
|
||||
Imports,
|
||||
Exports,
|
||||
Total_Distribution,
|
||||
Ending_Stocks,
|
||||
Net_Supply,
|
||||
Trade_Balance,
|
||||
Supply_Demand_Balance,
|
||||
Stock_to_Use_Ratio_pct,
|
||||
Production_YoY_pct
|
||||
FROM (
|
||||
SELECT
|
||||
*
|
||||
FROM country_metrics
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM global_metrics
|
||||
) AS combined_data
|
||||
ORDER BY
|
||||
commodity_name,
|
||||
country_name,
|
||||
market_year,
|
||||
ingest_date
|
||||
Reference in New Issue
Block a user