fix seeds; update models

This commit is contained in:
Deeman
2025-07-27 22:49:37 +02:00
parent c0d8f60d1c
commit 641f794d61
8 changed files with 363 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
MODEL (
name raw.psd_attribute_codes,
kind SEED (
path '$root/seeds/psd_attribute_codes.csv',
csv_settings (
delimiter = ';'
)
)
);

View File

@@ -0,0 +1,10 @@
MODEL (
name raw.psd_commodity_codes,
kind SEED (
path '$root/seeds/psd_commodity_codes.csv',
csv_settings (
delimiter = ';'
)
)
);

View File

@@ -0,0 +1,26 @@
MODEL (
name raw.psd_alldata,
kind FULL,
start '2006-08-01',
cron '@daily'
);
SELECT
*
--format('{}-{}-01',split(filename, '/')[-4],split(filename, '/')[-3])::date as ingest_date
FROM read_csv('zip:///home/deeman/projects/materia/extract/psdonline/src/psdonline/data/**/*.zip/*.csv', header=true, union_by_name=true, filename=true, names = ['commodity_code', 'commodity_description', 'country_code', 'country_name', 'market_year', 'calendar_year', 'month', 'attribute_id', 'attribute_description', 'unit_id', 'unit_description', 'value'], types=
{
'commodity_code' : 'VARCHAR',
'commodity_description' :'VARCHAR',
'country_code' : 'VARCHAR',
'country_name' : 'VARCHAR',
'market_year' : 'BIGINT' ,
'calendar_year' : 'BIGINT' ,
'month' : 'VARCHAR',
'attribute_id' : 'VARCHAR',
'attribute_description' :'VARCHAR',
'unit_id' : 'VARCHAR',
'unit_description' : 'VARCHAR',
'value' : 'DOUBLE'
}
)

View File

@@ -0,0 +1,10 @@
MODEL (
name raw.psd_unit_of_measure_codes,
kind SEED (
path '$root/seeds/psd_unit_of_measure_codes.csv',
csv_settings (
delimiter = ';'
)
)
);

View File

@@ -0,0 +1,67 @@
MODEL (
name staging.psd_alldata,
kind INCREMENTAL_BY_TIME_RANGE (
time_column ingest_date
),
start '2006-08-01',
cron '@daily'
);
SELECT
@GENERATE_SURROGATE_KEY(commodity_code, country_code, market_year, month, attribute_id) as hkey,
commodity_code,
coalesce(commodity_name, commodity_description) as commodity_name,
country_code,
country_name,
market_year,
calendar_year,
month,
attribute_id,
coalesce(attribute_name, attribute_description) as attribute_name,
unit_id,
coalesce(unit_name, unit_description) as unit_name,
value,
filename,
format('{}-{}-01',split(filename, '/')[-4],split(filename, '/')[-3])::date as ingest_date
FROM raw.psd_alldata
left join raw.psd_commodity_codes using (commodity_code)
left join raw.psd_unit_of_measure_codes using (unit_id)
left join raw.psd_attribute_codes using (attribute_id)
/*
MIssing commodities
│ 0579311 │ NULL │
│ 0411000 │ NULL │
│ 0545900 │ NULL │
│ 0577903 │ NULL │
│ 0572920 │ NULL │
│ 0114300 │ NULL │
│ 1211000 │ NULL │
│ 0585200 │ NULL │
│ 0565905 │ NULL │
│ 0589901 │ NULL │
│ 0579401 │ NULL │
│ 0585120 │ NULL │
│ 0585700 │ NULL │
│ 0566100 │ NULL │
│ 1222000 │ NULL │
│ 0589903 │ NULL │
│ 0585300 │ NULL │
│ 0579301 │ NULL │
│ 0586111 │ NULL │
│ 0579701 │ NULL │
│ 0575200 │ NULL │
│ 0579901 │ NULL │
│ 0579500 │ NULL │
│ 0565901 │ NULL │
│ 0577500 │ NULL │
│ 0565903 │ NULL │
│ 0577905 │ NULL │
--------------------
Missing Attributes
│ 221 │ NULL │
│ 219 │ NULL │
*/