feat(transform): add serving.weather_daily with rolling analytics and crop stress index
Incremental serving model for 12 coffee-growing locations. Adds: - Rolling aggregates: precip_sum_7d/30d, temp_mean_30d, temp_anomaly, water_balance_7d - Gaps-and-islands streak counters: drought_streak_days, heat_streak_days, vpd_streak_days - Composite crop_stress_index 0–100 (drought 30%, water deficit 25%, heat 20%, VPD 15%, frost 10%) - lookback 90: ensures rolling windows and streak counters see sufficient history on daily runs Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
187
transform/sqlmesh_materia/models/serving/weather_daily.sql
Normal file
187
transform/sqlmesh_materia/models/serving/weather_daily.sql
Normal file
@@ -0,0 +1,187 @@
|
||||
/* Serving mart: daily weather analytics for 12 coffee-growing regions. */
|
||||
/* Source: foundation.fct_weather_daily (already has seed join for location metadata). */
|
||||
/* Adds rolling aggregates, water balance, gaps-and-islands streak counters, */
|
||||
/* and a composite crop stress index (0–100) as a single severity gauge. */
|
||||
/* Grain: (location_id, observation_date) */
|
||||
/* Lookback 90: rolling windows reach up to 30 days, streak counters can extend */
|
||||
/* up to ~90 days; without lookback a daily run sees only 1 row and all window */
|
||||
/* functions degrade to single-row values. */
|
||||
MODEL (
|
||||
name serving.weather_daily,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column observation_date,
|
||||
lookback 90
|
||||
),
|
||||
grain (location_id, observation_date),
|
||||
start '2020-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
observation_date,
|
||||
location_id,
|
||||
location_name,
|
||||
country,
|
||||
lat,
|
||||
lon,
|
||||
variety,
|
||||
temp_min_c,
|
||||
temp_max_c,
|
||||
temp_mean_c,
|
||||
precipitation_mm,
|
||||
humidity_max_pct,
|
||||
cloud_cover_mean_pct,
|
||||
wind_max_speed_ms,
|
||||
et0_mm,
|
||||
vpd_max_kpa,
|
||||
is_frost,
|
||||
is_heat_stress,
|
||||
is_drought,
|
||||
is_high_vpd,
|
||||
in_growing_season,
|
||||
/* Rolling precipitation — w7 = trailing 7 days, w30 = trailing 30 days */
|
||||
SUM(precipitation_mm) OVER w7 AS precip_sum_7d_mm,
|
||||
SUM(precipitation_mm) OVER w30 AS precip_sum_30d_mm,
|
||||
/* Rolling temperature baseline */
|
||||
AVG(temp_mean_c) OVER w30 AS temp_mean_30d_c,
|
||||
/* Temperature anomaly: today vs trailing 30-day mean */
|
||||
temp_mean_c - AVG(temp_mean_c) OVER w30 AS temp_anomaly_c,
|
||||
/* Water balance: net daily water gain/loss (precipitation minus evapotranspiration) */
|
||||
precipitation_mm - et0_mm AS water_balance_mm,
|
||||
SUM(precipitation_mm - et0_mm) OVER w7 AS water_balance_7d_mm,
|
||||
/* Gaps-and-islands group markers for streak counting. */
|
||||
/* Pattern: ROW_NUMBER() - running_count_of_true creates a stable group ID */
|
||||
/* for each consecutive run of TRUE. Rows where flag=FALSE get a unique group ID */
|
||||
/* (so their streak length stays 0 after the CASE in with_streaks). */
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_drought THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _drought_group,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_heat_stress THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _heat_group,
|
||||
ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
) - SUM(
|
||||
CASE WHEN is_high_vpd THEN 1 ELSE 0 END
|
||||
) OVER (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS UNBOUNDED PRECEDING
|
||||
) AS _vpd_group
|
||||
FROM foundation.fct_weather_daily
|
||||
WHERE
|
||||
observation_date BETWEEN @start_ds AND @end_ds
|
||||
WINDOW
|
||||
w7 AS (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||||
),
|
||||
w30 AS (
|
||||
PARTITION BY location_id
|
||||
ORDER BY observation_date
|
||||
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||||
)
|
||||
), with_streaks AS (
|
||||
SELECT
|
||||
base.*,
|
||||
/* Drought streak: number of consecutive dry days ending on observation_date. */
|
||||
/* Returns 0 when flag is FALSE (not a drought day). */
|
||||
CASE
|
||||
WHEN NOT is_drought
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _drought_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS drought_streak_days,
|
||||
/* Heat stress streak: consecutive days with temp_max > 35°C */
|
||||
CASE
|
||||
WHEN NOT is_heat_stress
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _heat_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS heat_streak_days,
|
||||
/* VPD stress streak: consecutive days with vpd_max > 1.5 kPa */
|
||||
CASE
|
||||
WHEN NOT is_high_vpd
|
||||
THEN 0
|
||||
ELSE ROW_NUMBER() OVER (
|
||||
PARTITION BY location_id, _vpd_group
|
||||
ORDER BY observation_date
|
||||
)
|
||||
END AS vpd_streak_days
|
||||
FROM base
|
||||
)
|
||||
SELECT
|
||||
observation_date,
|
||||
location_id,
|
||||
location_name,
|
||||
country,
|
||||
lat,
|
||||
lon,
|
||||
variety,
|
||||
temp_min_c,
|
||||
temp_max_c,
|
||||
temp_mean_c,
|
||||
precipitation_mm,
|
||||
humidity_max_pct,
|
||||
cloud_cover_mean_pct,
|
||||
wind_max_speed_ms,
|
||||
et0_mm,
|
||||
vpd_max_kpa,
|
||||
is_frost,
|
||||
is_heat_stress,
|
||||
is_drought,
|
||||
is_high_vpd,
|
||||
in_growing_season,
|
||||
ROUND(precip_sum_7d_mm, 2) AS precip_sum_7d_mm,
|
||||
ROUND(precip_sum_30d_mm, 2) AS precip_sum_30d_mm,
|
||||
ROUND(temp_mean_30d_c, 2) AS temp_mean_30d_c,
|
||||
ROUND(temp_anomaly_c, 2) AS temp_anomaly_c,
|
||||
ROUND(water_balance_mm, 2) AS water_balance_mm,
|
||||
ROUND(water_balance_7d_mm, 2) AS water_balance_7d_mm,
|
||||
drought_streak_days,
|
||||
heat_streak_days,
|
||||
vpd_streak_days,
|
||||
/* Composite crop stress index (0–100).
|
||||
Weights: drought streak 30%, water deficit 25%, heat streak 20%,
|
||||
VPD streak 15%, frost (binary) 10%.
|
||||
Each component is normalized to [0,1] then capped before weighting:
|
||||
drought: 14 days = fully stressed
|
||||
water: 20mm 7d deficit = fully stressed
|
||||
heat: 7 days = fully stressed
|
||||
vpd: 7 days = fully stressed
|
||||
frost: binary (Arabica highland catastrophic event) */
|
||||
ROUND(
|
||||
GREATEST(0.0, LEAST(100.0,
|
||||
LEAST(1.0, drought_streak_days / 14.0) * 30.0
|
||||
+ LEAST(1.0, GREATEST(0.0, -water_balance_7d_mm) / 20.0) * 25.0
|
||||
+ LEAST(1.0, heat_streak_days / 7.0) * 20.0
|
||||
+ LEAST(1.0, vpd_streak_days / 7.0) * 15.0
|
||||
+ CASE WHEN is_frost THEN 10.0 ELSE 0.0 END
|
||||
)),
|
||||
1
|
||||
) AS crop_stress_index
|
||||
FROM with_streaks
|
||||
ORDER BY
|
||||
location_id,
|
||||
observation_date
|
||||
Reference in New Issue
Block a user