From 918b0071b13358c30f73c428984a51271233e750 Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:22:01 +0000 Subject: [PATCH 1/6] Update file Commodity Exchange Codes.xls --- .../staging/Commodity Exchange Codes.xls | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls diff --git a/transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls b/transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls new file mode 100644 index 0000000..e457cea --- /dev/null +++ b/transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls @@ -0,0 +1,57 @@ +Commodity Name Exchange Code Exchange +Crude Oil WTI CL CME +Crude Oil Brent BZ ICE +Gasoline RBOB RB CME +Heating Oil HO CME +Natural Gas NG CME +Ethanol CU CME +Cocoa CC ICE +Cotton CT ICE +Orange Juice FCOJ-A ICE +Coffee KC ICE +Lumber LBR ICE +Sugar SB ICE +European Gas TTF TTC ICE +European Union Emissions Allowance ECF ICE +Gold GC CME +Silver SI CME +Platinum PL CME +Copper HG CME +Palladium PA CME +Live Cattle LE CME +Feeder Cattle GF CME +Lean Hogs HE CME +Corn ZC CME +Soybean oil ZL CME +Soybean meal ZM CME +Oats ZO CME +Rough Rice ZR CME +Soybeans ZS CME +Wheat ZW CME +Canola RS ICE +Rebar RB SHFE +Hot-Rolled Coil HC SHFE +Nickel NI SHFE +Tin SN SHFE +Aluminum AL SHFE +Zinc ZN SHFE +Natural Rubber RU SHFE +Bitumen BU SHFE +Iron Ore I DCE +Palm Oil P DCE +Eggs JD DCE +Coking Coal JM DCE +Polyvinyl Chloride (PVC) V DCE +White Sugar SR ZCE +Cotton CF ZCE +Apple AP ZCE +PTA TA ZCE +Methanol MA ZCE +LME Aluminum AH LME +LME Copper CA LME +LME Lead PB LME +LME Nickel NI LME +LME Tin SN LME +LME Zinc ZS LME +Iron Ore TIO SGX +Rubber TSR SGX \ No newline at end of file From 4ad4386ccc4bb7967383099ef19ae3416378b07f Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:24:26 +0000 Subject: [PATCH 2/6] Update 2 files - /transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls - /transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv --- .../commodity_exchange_codes.csv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename transform/sqlmesh_materia/{models/staging/Commodity Exchange Codes.xls => seeds/commodity_exchange_codes.csv} (100%) diff --git a/transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls b/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv similarity index 100% rename from transform/sqlmesh_materia/models/staging/Commodity Exchange Codes.xls rename to transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv From 9d7cc4e1fb60adb444f7b8b7b304d416664e9049 Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:26:19 +0000 Subject: [PATCH 3/6] Update file commodity_exchange_codes.csv --- .../seeds/commodity_exchange_codes.csv | 114 +++++++++--------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv b/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv index e457cea..4ac822f 100644 --- a/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv +++ b/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv @@ -1,57 +1,57 @@ -Commodity Name Exchange Code Exchange -Crude Oil WTI CL CME -Crude Oil Brent BZ ICE -Gasoline RBOB RB CME -Heating Oil HO CME -Natural Gas NG CME -Ethanol CU CME -Cocoa CC ICE -Cotton CT ICE -Orange Juice FCOJ-A ICE -Coffee KC ICE -Lumber LBR ICE -Sugar SB ICE -European Gas TTF TTC ICE -European Union Emissions Allowance ECF ICE -Gold GC CME -Silver SI CME -Platinum PL CME -Copper HG CME -Palladium PA CME -Live Cattle LE CME -Feeder Cattle GF CME -Lean Hogs HE CME -Corn ZC CME -Soybean oil ZL CME -Soybean meal ZM CME -Oats ZO CME -Rough Rice ZR CME -Soybeans ZS CME -Wheat ZW CME -Canola RS ICE -Rebar RB SHFE -Hot-Rolled Coil HC SHFE -Nickel NI SHFE -Tin SN SHFE -Aluminum AL SHFE -Zinc ZN SHFE -Natural Rubber RU SHFE -Bitumen BU SHFE -Iron Ore I DCE -Palm Oil P DCE -Eggs JD DCE -Coking Coal JM DCE -Polyvinyl Chloride (PVC) V DCE -White Sugar SR ZCE -Cotton CF ZCE -Apple AP ZCE -PTA TA ZCE -Methanol MA ZCE -LME Aluminum AH LME -LME Copper CA LME -LME Lead PB LME -LME Nickel NI LME -LME Tin SN LME -LME Zinc ZS LME -Iron Ore TIO SGX -Rubber TSR SGX \ No newline at end of file +Commodity Name,Exchange Code,Exchange +Crude Oil WTI,CL,CME +Crude Oil Brent,BZ,ICE +Gasoline RBOB,RB,CME +Heating Oil,HO,CME +Natural Gas,NG,CME +Ethanol,CU,CME +Cocoa,CC,ICE +Cotton,CT,ICE +Orange Juice,FCOJ-A,ICE +Coffee,KC,ICE +Lumber,LBR,ICE +Sugar,SB,ICE +European Gas TTF,TTF,ICE +European Union Emissions Allowance,ECF,ICE +Gold,GC,CME +Silver,SI,CME +Platinum,PL,CME +Copper,HG,CME +Palladium,PA,CME +Live Cattle,LE,CME +Feeder Cattle,GF,CME +Lean Hogs,HE,CME +Corn,ZC,CME +Soybean Oil,ZL,CME +Soybean meal,ZM,CME +Oats,ZO,CME +Rough Rice,ZR,CME +Soybeans,ZS,CME +Wheat,ZW,CME +Canola,RS,ICE +Rebar,RB,SHFE +Hot-Rolled Coil,HC,SHFE +Nickel,NI,SHFE +Tin,SN,SHFE +Aluminum,AL,SHFE +Zinc,ZN,SHFE +Natural Rubber,RU,SHFE +Bitumen,BU,SHFE +Iron Ore,I,DCE +Palm Oil,P,DCE +Eggs,JD,DCE +Coking Coal,JM,DCE +Polyvinyl Chloride (PVC),V,DCE +White Sugar,SR,ZCE +Cotton,CF,ZCE +Apple,AP,ZCE +PTA,TA,ZCE +Methanol,MA,ZCE +LME Aluminum,AH,LME +LME Copper,CA,LME +LME Lead,PB,LME +LME Nickel,NI,LME +LME Tin,SN,LME +LME Zinc,ZS,LME +Iron Ore,TIO,SGX +Rubber,TSR,SGX \ No newline at end of file From 82b27e7c558e9be3e799a2398259a916c65a4e69 Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:41:48 +0000 Subject: [PATCH 4/6] Update 2 files - /transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv - /transform/sqlmesh_materia/seeds/psd_codes_exchange_codes_merge.csv --- .../seeds/commodity_exchange_codes.csv | 2 +- .../seeds/psd_codes_exchange_codes_merge.csv | 57 +++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 transform/sqlmesh_materia/seeds/psd_codes_exchange_codes_merge.csv diff --git a/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv b/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv index 4ac822f..30fbb84 100644 --- a/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv +++ b/transform/sqlmesh_materia/seeds/commodity_exchange_codes.csv @@ -1,4 +1,4 @@ -Commodity Name,Exchange Code,Exchange +commodity_name,exchange_code,exchange Crude Oil WTI,CL,CME Crude Oil Brent,BZ,ICE Gasoline RBOB,RB,CME diff --git a/transform/sqlmesh_materia/seeds/psd_codes_exchange_codes_merge.csv b/transform/sqlmesh_materia/seeds/psd_codes_exchange_codes_merge.csv new file mode 100644 index 0000000..625c6cf --- /dev/null +++ b/transform/sqlmesh_materia/seeds/psd_codes_exchange_codes_merge.csv @@ -0,0 +1,57 @@ +commodity_name,exchange_code,exchange,commodity_code +Crude Oil WTI,CL,CME,NA +Crude Oil Brent,BZ,ICE,NA +Gasoline RBOB,RB,CME,NA +Heating Oil,HO,CME,NA +Natural Gas,NG,CME,NA +Ethanol,CU,CME,NA +Cocoa,CC,ICE,NA +Cotton,CT,ICE,2631000 +Orange Juice,FCOJ-A,ICE,0585100 +Coffee,KC,ICE,0711100 +Lumber,LBR,ICE,NA +Sugar,SB,ICE,0612000 +European Gas TTF,TTF,ICE,NA +European Union Emissions Allowance,ECF,ICE,NA +Gold,GC,CME,NA +Silver,SI,CME,NA +Platinum,PL,CME,NA +Copper,HG,CME,NA +Palladium,PA,CME,NA +Live Cattle,LE,CME,0011000 +Feeder Cattle,GF,CME,0011000 +Lean Hogs,HE,CME,NA +Corn,ZC,CME,0440000 +Soybean Oil,ZL,CME,4232000 +Soybean meal,ZM,CME,0813100 +Oats,ZO,CME,0452000 +Rough Rice,ZR,CME,0422110 +Soybeans,ZS,CME,NA +Wheat,ZW,CME,0410000 +Canola,RS,ICE,2226000 +Rebar,RB,SHFE,NA +Hot-Rolled Coil,HC,SHFE,NA +Nickel,NI,SHFE,NA +Tin,SN,SHFE,NA +Aluminum,AL,SHFE,NA +Zinc,ZN,SHFE,NA +Natural Rubber,RU,SHFE,NA +Bitumen,BU,SHFE,NA +Iron Ore,I,DCE,NA +Palm Oil,P,DCE,4243000 +Eggs,JD,DCE,NA +Coking Coal,JM,DCE,NA +Polyvinyl Chloride (PVC),V,DCE,NA +White Sugar,SR,ZCE,0612000 +Cotton,CF,ZCE,2631000 +Apple,AP,ZCE,0574000 +PTA,TA,ZCE,NA +Methanol,MA,ZCE,NA +LME Aluminum,AH,LME,NA +LME Copper,CA,LME,NA +LME Lead,PB,LME,NA +LME Nickel,NI,LME,NA +LME Tin,SN,LME,NA +LME Zinc,ZS,LME,NA +Iron Ore,TIO,SGX,NA +Rubber,TSR,SGX,NA \ No newline at end of file From 1c87488cc736009c7ec2bdf101136db3197dd812 Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:45:34 +0000 Subject: [PATCH 5/6] Update 4 files - /transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql - /transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql - /transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql - /transform/sqlmesh_materia/models/staging/stg_psd_alldata_0.sql --- .../models/staging/{stg_psd_alldata.sql => stg_psd_alldata_0.sql} | 0 .../models/staging/stg_psd_alldata_1_filter_silver_layer.sql | 0 .../models/staging/stg_psd_alldata_2_filter_gold_layer.sql | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename transform/sqlmesh_materia/models/staging/{stg_psd_alldata.sql => stg_psd_alldata_0.sql} (100%) create mode 100644 transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql create mode 100644 transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_0.sql similarity index 100% rename from transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql rename to transform/sqlmesh_materia/models/staging/stg_psd_alldata_0.sql diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql new file mode 100644 index 0000000..e69de29 diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql new file mode 100644 index 0000000..e69de29 From 5588be152b92242990c4c0790dd1772145923412 Mon Sep 17 00:00:00 2001 From: Simon Dmsn Date: Fri, 1 Aug 2025 14:52:55 +0000 Subject: [PATCH 6/6] Update 3 files - /notebooks/03_Extraction.ipynb - /transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql - /transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql --- notebooks/03_Extraction.ipynb | 5 +- .../stg_psd_alldata_1_filter_silver_layer.sql | 64 ++++++++++ .../stg_psd_alldata_2_filter_gold_layer.sql | 110 ++++++++++++++++++ 3 files changed, 176 insertions(+), 3 deletions(-) diff --git a/notebooks/03_Extraction.ipynb b/notebooks/03_Extraction.ipynb index 9f2ec50..cebf785 100644 --- a/notebooks/03_Extraction.ipynb +++ b/notebooks/03_Extraction.ipynb @@ -24,7 +24,7 @@ "\n", "data = \"../data/\"\n", "df = pd.read_csv(\"../data/psd_alldata.csv\", encoding=\"latin1\")\n", - "\"\"\"\n", + "\n", "df.rename(columns={\n", " 'Commodity_Description': 'commodity',\n", " 'Country_Name': 'country',\n", @@ -115,8 +115,7 @@ " 'Total Distribution', 'Ending Stocks', 'Net Supply',\n", " 'Supply-Demand Balance', 'Stock-to-Use Ratio (%)']]\n", "combined_global.to_csv(\"global_summary_all.csv\", index=False)\n", - "print(\"🌐 Combined global summary saved as 'global_summary_all.csv'\")\n", - "\"\"\"" + "print(\"🌐 Combined global summary saved as 'global_summary_all.csv'\")\n" ] }, { diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql index e69de29..0d77482 100644 --- a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql +++ b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_1_filter_silver_layer.sql @@ -0,0 +1,64 @@ +/* + * Silver layer: Pivots the raw PSD data into a wide format, + * with key attributes ('Production', 'Imports', etc.) as columns. + * This is equivalent to step 2 of the Python script 03_Extraction. + */ +MODEL ( + name transform.sqlmesh_materia.models.staging.stg_psd_alldata_1_filter_silver_layer, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ingest_date + ), + start '2006-08-01', + cron '@daily' +); + +SELECT + commodity_code, + commodity_name, + country_code, + country_name, + ingest_date, + -- Replicate the Python script's pivot by using conditional aggregation + -- This creates a single row for each commodity-country-date combination + COALESCE(SUM(CASE WHEN attribute_name = 'Production' THEN value END), 0) AS Production, + COALESCE(SUM(CASE WHEN attribute_name = 'Imports' THEN value END), 0) AS Imports, + COALESCE(SUM(CASE WHEN attribute_name = 'Exports' THEN value END), 0) AS Exports, + COALESCE(SUM(CASE WHEN attribute_name = 'Total Distribution' THEN value END), 0) AS Total_Distribution, + COALESCE(SUM(CASE WHEN attribute_name = 'Ending Stocks' THEN value END), 0) AS Ending_Stocks, + COALESCE(SUM(CASE WHEN attribute_name = 'Beginning Stocks' THEN value END), 0) AS Beginning_Stocks, + COALESCE(SUM(CASE WHEN attribute_name = 'Total Supply' THEN value END), 0) AS Total_Supply, + COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Consumption' THEN value END), 0) AS Domestic_Consumption, + COALESCE(SUM(CASE WHEN attribute_name = 'Domestic Demand' THEN value END), 0) AS Domestic_Demand, + COALESCE(SUM(CASE WHEN attribute_name = 'Food Use' THEN value END), 0) AS Food_Use, + COALESCE(SUM(CASE WHEN attribute_name = 'Industrial Use' THEN value END), 0) AS Industrial_Use, + COALESCE(SUM(CASE WHEN attribute_name = 'Seed Use' THEN value END), 0) AS Seed_Use, + COALESCE(SUM(CASE WHEN attribute_name = 'Waste' THEN value END), 0) AS Waste, + COALESCE(SUM(CASE WHEN attribute_name = 'Feed Use' THEN value END), 0) AS Feed_Use +FROM transform.sqlmesh_materia.models.staging.stg_psd_alldata_0 +-- Filter for the specific attributes used in the pivot table for efficiency +WHERE attribute_name IN ( + 'Production', + 'Imports', + 'Exports', + 'Total Distribution', + 'Ending Stocks', + 'Beginning Stocks', + 'Total Supply', + 'Domestic Consumption', + 'Domestic Demand', + 'Food Use', + 'Industrial Use', + 'Seed Use', + 'Waste', + 'Feed Use' + ) +GROUP BY + commodity_code, + commodity_name, + country_code, + country_name, + ingest_date +ORDER BY + commodity_name, + country_name, + ingest_date; \ No newline at end of file diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql index e69de29..ccf5ca7 100644 --- a/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql +++ b/transform/sqlmesh_materia/models/staging/stg_psd_alldata_2_filter_gold_layer.sql @@ -0,0 +1,110 @@ +/* + * Gold layer: Calculates derived metrics like Net Supply, Trade Balance, + * and Stock-to-Use Ratio based on the pivoted silver layer data. + * This also includes the global aggregates, mimicking steps 3 and 4 + * of the Python script 03_Extraction. + */ +MODEL ( + name transform.sqlmesh_materia.models.staging.stg_psd_alldata_2_filter_gold_layer, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ingest_date + ), + start '2006-08-01', + cron '@daily' +); + +-- CTE to calculate country-level derived metrics +WITH country_metrics AS ( + SELECT + commodity_code, + commodity_name, + country_code, + country_name, + ingest_date, + Production, + Imports, + Exports, + Total_Distribution, + Ending_Stocks, + -- Derived metrics per country, mirroring Python script + (Production + Imports - Exports) AS Net_Supply, + (Exports - Imports) AS Trade_Balance, + (Production + Imports - Exports) - Total_Distribution AS Supply_Demand_Balance, + -- Handle division by zero for Stock-to-Use Ratio + (Ending_Stocks / NULLIF(Total_Distribution, 0)) * 100 AS Stock_to_Use_Ratio_pct, + -- Calculate Production YoY percentage change using a window function + (Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY ingest_date)) / NULLIF(LAG(Production, 1, 0) OVER (PARTITION BY commodity_code, country_code ORDER BY ingest_date), 0) * 100 AS Production_YoY_pct + FROM transform.sqlmesh_materia.models.staging.stg_psd_alldata_1_filter_silver_layer +), +-- CTE to calculate global aggregates by summing up country-level data +global_aggregates AS ( + SELECT + commodity_code, + commodity_name, + NULL::TEXT AS country_code, -- Use NULL for global aggregates + 'Global' AS country_name, + ingest_date, + SUM(Production) AS Production, + SUM(Imports) AS Imports, + SUM(Exports) AS Exports, + SUM(Total_Distribution) AS Total_Distribution, + SUM(Ending_Stocks) AS Ending_Stocks + FROM transform.sqlmesh_materia.models.staging.stg_psd_alldata_1_filter_silver_layer + GROUP BY + commodity_code, + commodity_name, + ingest_date +), +-- CTE to calculate derived metrics for global aggregates +global_metrics AS ( + SELECT + commodity_code, + commodity_name, + country_code, + country_name, + ingest_date, + Production, + Imports, + Exports, + Total_Distribution, + Ending_Stocks, + (Production + Imports - Exports) AS Net_Supply, + (Exports - Imports) AS Trade_Balance, + (Production + Imports - Exports) - Total_Distribution AS Supply_Demand_Balance, + (Ending_Stocks / NULLIF(Total_Distribution, 0)) * 100 AS Stock_to_Use_Ratio_pct, + (Production - LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY ingest_date)) / NULLIF(LAG(Production, 1, 0) OVER (PARTITION BY commodity_code ORDER BY ingest_date), 0) * 100 AS Production_YoY_pct + FROM global_aggregates +) +-- Combine country-level and global-level data into a single output +SELECT + hkey, + commodity_code, + commodity_name, + country_code, + country_name, + ingest_date, + Production, + Imports, + Exports, + Total_Distribution, + Ending_Stocks, + Net_Supply, + Trade_Balance, + Supply_Demand_Balance, + Stock_to_Use_Ratio_pct, + Production_YoY_pct +FROM ( + SELECT + @GENERATE_SURROGATE_KEY(commodity_code, country_code, ingest_date) AS hkey, + * + FROM country_metrics + UNION ALL + SELECT + @GENERATE_SURROGATE_KEY(commodity_code, country_name, ingest_date) AS hkey, + * + FROM global_metrics +) AS combined_data +ORDER BY + commodity_name, + country_name, + ingest_date; \ No newline at end of file