diff --git a/transform/sqlmesh_materia/models/raw/psd_attribute_codes.sql b/transform/sqlmesh_materia/models/raw/psd_attribute_codes.sql new file mode 100644 index 0000000..b187f53 --- /dev/null +++ b/transform/sqlmesh_materia/models/raw/psd_attribute_codes.sql @@ -0,0 +1,9 @@ +MODEL ( + name raw.psd_attribute_codes, + kind SEED ( + path '$root/seeds/psd_attribute_codes.csv', + csv_settings ( + delimiter = ';' +) + ) +); diff --git a/transform/sqlmesh_materia/models/raw/psd_commodity_codes.sql b/transform/sqlmesh_materia/models/raw/psd_commodity_codes.sql new file mode 100644 index 0000000..a50ab04 --- /dev/null +++ b/transform/sqlmesh_materia/models/raw/psd_commodity_codes.sql @@ -0,0 +1,10 @@ +MODEL ( + name raw.psd_commodity_codes, + kind SEED ( + path '$root/seeds/psd_commodity_codes.csv', + csv_settings ( + delimiter = ';' +) + ) +); + diff --git a/transform/sqlmesh_materia/models/raw/psd_data.sql b/transform/sqlmesh_materia/models/raw/psd_data.sql new file mode 100644 index 0000000..fed4822 --- /dev/null +++ b/transform/sqlmesh_materia/models/raw/psd_data.sql @@ -0,0 +1,26 @@ +MODEL ( + name raw.psd_alldata, + kind FULL, + start '2006-08-01', + cron '@daily' +); + +SELECT + * + --format('{}-{}-01',split(filename, '/')[-4],split(filename, '/')[-3])::date as ingest_date + FROM read_csv('zip:///home/deeman/projects/materia/extract/psdonline/src/psdonline/data/**/*.zip/*.csv', header=true, union_by_name=true, filename=true, names = ['commodity_code', 'commodity_description', 'country_code', 'country_name', 'market_year', 'calendar_year', 'month', 'attribute_id', 'attribute_description', 'unit_id', 'unit_description', 'value'], types= +{ +'commodity_code' : 'VARCHAR', +'commodity_description' :'VARCHAR', +'country_code' : 'VARCHAR', +'country_name' : 'VARCHAR', +'market_year' : 'BIGINT' , +'calendar_year' : 'BIGINT' , +'month' : 'VARCHAR', +'attribute_id' : 'VARCHAR', +'attribute_description' :'VARCHAR', +'unit_id' : 'VARCHAR', +'unit_description' : 'VARCHAR', +'value' : 'DOUBLE' +} +) diff --git a/transform/sqlmesh_materia/models/raw/psd_unit_of_measure_codes.sql b/transform/sqlmesh_materia/models/raw/psd_unit_of_measure_codes.sql new file mode 100644 index 0000000..5de5c1c --- /dev/null +++ b/transform/sqlmesh_materia/models/raw/psd_unit_of_measure_codes.sql @@ -0,0 +1,10 @@ +MODEL ( + name raw.psd_unit_of_measure_codes, + kind SEED ( + path '$root/seeds/psd_unit_of_measure_codes.csv', + csv_settings ( + delimiter = ';' +) + ) +); + diff --git a/transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql b/transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql new file mode 100644 index 0000000..1f5a3d3 --- /dev/null +++ b/transform/sqlmesh_materia/models/staging/stg_psd_alldata.sql @@ -0,0 +1,67 @@ +MODEL ( + name staging.psd_alldata, + kind INCREMENTAL_BY_TIME_RANGE ( + time_column ingest_date + ), + start '2006-08-01', + cron '@daily' +); + +SELECT + @GENERATE_SURROGATE_KEY(commodity_code, country_code, market_year, month, attribute_id) as hkey, + commodity_code, + coalesce(commodity_name, commodity_description) as commodity_name, + country_code, + country_name, + market_year, + calendar_year, + month, + attribute_id, + coalesce(attribute_name, attribute_description) as attribute_name, + unit_id, + coalesce(unit_name, unit_description) as unit_name, + value, + filename, + format('{}-{}-01',split(filename, '/')[-4],split(filename, '/')[-3])::date as ingest_date +FROM raw.psd_alldata +left join raw.psd_commodity_codes using (commodity_code) +left join raw.psd_unit_of_measure_codes using (unit_id) +left join raw.psd_attribute_codes using (attribute_id) + + +/* +MIssing commodities +│ 0579311 │ NULL │ +│ 0411000 │ NULL │ +│ 0545900 │ NULL │ +│ 0577903 │ NULL │ +│ 0572920 │ NULL │ +│ 0114300 │ NULL │ +│ 1211000 │ NULL │ +│ 0585200 │ NULL │ +│ 0565905 │ NULL │ +│ 0589901 │ NULL │ +│ 0579401 │ NULL │ +│ 0585120 │ NULL │ +│ 0585700 │ NULL │ +│ 0566100 │ NULL │ +│ 1222000 │ NULL │ +│ 0589903 │ NULL │ +│ 0585300 │ NULL │ +│ 0579301 │ NULL │ +│ 0586111 │ NULL │ +│ 0579701 │ NULL │ +│ 0575200 │ NULL │ +│ 0579901 │ NULL │ +│ 0579500 │ NULL │ +│ 0565901 │ NULL │ +│ 0577500 │ NULL │ +│ 0565903 │ NULL │ +│ 0577905 │ NULL │ +├ +-------------------- +Missing Attributes +│ 221 │ NULL │ +│ 219 │ NULL │ + +*/ diff --git a/transform/sqlmesh_materia/seeds/psd_attribute_codes.csv b/transform/sqlmesh_materia/seeds/psd_attribute_codes.csv new file mode 100644 index 0000000..96ba4b7 --- /dev/null +++ b/transform/sqlmesh_materia/seeds/psd_attribute_codes.csv @@ -0,0 +1,133 @@ +attribute_id;attribute_name +1;Area Planted +4;Area Harvested +5;Catch For Reduction +6;Cows In Milk +7;Crush +10;Total Grape Crush +13;Deliv. To Processors +16;Total Trees +17;Bearing Trees +19;Non-Bearing Trees +20;Beginning Stocks +22;Sow Beginning Stocks +23;Dairy Cows Beg. Stocks +24;Begin Stock (Ctrl App) +25;Beef Cows Beg. Stocks +26;Begin Stock (Other) +28;Production +29;Arabica Production +30;Beet Sugar Production +31;Commercial Production +32;Cows Milk Production +33;Farm Sales Weight Prod +34;Filter Production +40;Prod. from Wine Grapes +43;Cane Sugar Production +47;Non-Comm. Production +48;Non-Filter Production +49;Other Milk Production +51;Prod. from Tabl Grapes +53;Robusta Production +54;Rough Production +56;Other Production +57;Imports +58;Bean Imports +62;Intra-EU Imports +63;MY Imp. from U.S. +64;Raw Imports +65;U.S. Leaf Imports +70;MY Imp. from EU +71;Other Imports +74;Refined Imp.(Raw Val) +75;Roast & Ground Imports +78;CY Imports +81;TY Imports +82;Soluble Imports +83;CY Imp. from U.S. +84;TY Imp. from U.S. +86;Total Supply +87;CY Exp. to U.S. +88;Exports +89;Raw Exports +90;Bean Exports +94;Intra EU Exports +95;Intra-EU Exports +97;MY Exp. to EU +99;Refined Exp.(Raw Val) +104;Other Exports +107;Roast & Ground Exports +110;CY Exports +113;TY Exports +114;Soluble Exports +116;Slaughter (Reference) +117;Total Slaughter +118;Cow Slaughter +120;Inventory (Reference) +121;Sow Slaughter +122;Calf Slaughter +124;Other Slaughter +125;Domestic Consumption +126;Total Disappearance +128;Dom. Leaf Consumption +129;Dom.Consump(Cntrl App) +130;Feed Dom. Consumption +131;Fluid Use Dom. Consum. +132;For Processing +133;Fresh Dom. Consumption +135;Fresh Dom. Consumption +138;Human Consumption +139;Human Dom. Consumption +140;Industrial Dom. Cons. +141;Rst,Ground Dom. Consum +142;Domestic Use +143;Utilization for Sugar +145;Dom.Consump(Other) +147;Factory Use Consum. +149;Food Use Dom. Cons. +150;Loss +151;Other Disappearance +152;Other Use, Losses +154;Soluble Dom. Cons. +155;U.S. Leaf Dom. Cons. +157;Utilizatn for Alcohol +158;Feed Use Dom. Consum. +161;Feed Waste Dom. Cons. +167;Other Foreign Cons. +169;Withdrawal From Market +172;Loss and Residual +173;Total Disappearance +174;Total Use +175;Total Utilization +176;Ending Stocks +177;End Stocks (Cntrl App) +178;Total Distribution +179;End Stocks (Other) +181;Extr. Rate, 999.9999 +182;Milling Rate (.9999) +183;Seed to Lint Ratio +184;YieldYield (Rough) +192;FSI Consumption +194;SME +195;Stocks-to-UseStock to Use % +196;Exportable Production +198;Balance +199;Inventory Balance +200;Inventory Change +201;Import Change +202;Export Change +203;Consumption Change +204;Production Change +205;Sow Change +206;Cow Change +207;Production to Cows +208;Production to Sows +209;Slaughter to Inventory +210;Weights +211;Population +212;Per Capita Consumption +213;Slaughter to Total Supply +214;Imports Percent Consumption +215;Exports Percent Production +220;Annual % Change Per Cap. Cons. +223;Stocks to Use (Months) diff --git a/transform/sqlmesh_materia/seeds/psd_commodity_codes.csv b/transform/sqlmesh_materia/seeds/psd_commodity_codes.csv new file mode 100644 index 0000000..d18b41a --- /dev/null +++ b/transform/sqlmesh_materia/seeds/psd_commodity_codes.csv @@ -0,0 +1,65 @@ +commodity_code; commodity_name +0577400;Almonds, Shelled Basis +0011000;Animal Numbers, Cattle +0013000;Animal Numbers, Swine +0574000;Apples, Fresh +0430000;Barley +0579305;Cherries (Sweet&Sour), Fresh +0711100;Coffee, Green +0440000;Corn +2631000;Cotton +0000000;Cotton (Metric Tons) +0230000;Dairy, Butter +0240000;Dairy, Cheese +0224400;Dairy, Dry Whole Milk Powder +0223000;Dairy, Milk, Fluid +0224200;Dairy, Milk, Nonfat Dry +0572220;Grapefruit, Fresh +0575100;Grapes, Fresh Table +0572120;Lemons/Limes, Fresh +0813700;Meal, Copra +0813300;Meal, Cottonseed +0814200;Meal, Fish +0813800;Meal, Palm Kernel +0813200;Meal, Peanut +0813600;Meal, Rapeseed +0813100;Meal, Soybean +0813101;Meal, Soybean (Local) +0813500;Meal, Sunflowerseed +0111000;Meat, Beef and Veal +0115000;Meat, Chicken +0113000;Meat, Swine +0459100;Millet +0459900;Mixed Grain +0452000;Oats +4242000;Oil, Coconut +4233000;Oil, Cottonseed +4235000;Oil, Olive +4243000;Oil, Palm +4244000;Oil, Palm Kernel +4234000;Oil, Peanut +4239100;Oil, Rapeseed +4232000;Oil, Soybean +4232001;Oil, Soybean (Local) +4236000;Oil, Sunflowerseed +2231000;Oilseed, Copra +2223000;Oilseed, Cottonseed +2232000;Oilseed, Palm Kernel +2221000;Oilseed, Peanut +2226000;Oilseed, Rapeseed +2222000;Oilseed, Soybean +2222001;Oilseed, Soybean (Local) +2224000;Oilseed, Sunflowerseed +0585100;Orange Juice +0571120;Oranges, Fresh +0579309;Peaches & Nectarines, Fresh +0579220;Pears, Fresh +0577907;Pistachios, Inshell Basis +0114200;Poultry, Meat, Broiler +0422110;Rice, Milled +0451000;Rye +0459200;Sorghum +0612000;Sugar, Centrifugal +0571220;Tangerines/Mandarins, Fresh +0577901;Walnuts, Inshell Basis +0410000;Wheat diff --git a/transform/sqlmesh_materia/seeds/psd_unit_of_measure_codes.csv b/transform/sqlmesh_materia/seeds/psd_unit_of_measure_codes.csv new file mode 100644 index 0000000..bf17fc6 --- /dev/null +++ b/transform/sqlmesh_materia/seeds/psd_unit_of_measure_codes.csv @@ -0,0 +1,43 @@ +unit_id; unit_name +1;(1000 BUSHES) +2;(1000 60 KG BAGS) +3;(1000 COLONIES) +4;(1000 HA) +5;(1000 HEAD) +6;(1000 HL) +7;(1000 MT CWE) +8;(1000 MT) +9;(1000 PCS) +10;(1000 TREES) +11;(Dec. Fraction) +12;(HA) +13;(HECTARES) +14;(KG) +15;(MIL HEAD) +16;(MIL PCS) +17;(MILLION TREES) +18;(MT RAW EQ) +19;(MT RAW EW) +20;(MT RE) +21;(MT) +22;(MT, Net Weight) +23;(PERCENT) +24;(RATIO) +25;(1000 CUBIC METERS) +26;(MT/HA) +27;1000 480 lb. Bales +28;(Bales/HA) +29;(KG/HA) +30;ACRES +31;BUSHELS +32;HUNDREDWEIGHT +33;MILLING RATE +34;BUSHELS/TON +35;IMPORT MILLING RATE +36;Bushels +37;SHORT TONS +38;MILLION LBS +39;BILLION LBS +40;(HEAD) +41;(PEOPLE) +42;(MONTHS)