This commit is contained in:
Deeman
2025-08-25 20:50:25 +02:00
parent a2ffc96aa3
commit f5f2dbc7a5
7 changed files with 318 additions and 486 deletions

View File

@@ -1,26 +1,25 @@
MODEL (
name raw.psd_alldata,
kind FULL,
grain ( commodity_code, country_code, market_year, calendar_year, month, attribute_id,unit_id ),
start '2006-08-01',
cron '@daily'
cron '@daily',
columns (
commodity_code varchar,
commodity_description varchar,
country_code varchar,
country_name varchar,
market_year varchar,
calendar_year varchar,
month varchar,
attribute_id varchar,
attribute_description varchar,
unit_id varchar,
unit_description varchar,
value varchar,
filename varchar
)
);
SELECT
*
--format('{}-{}-01',split(filename, '/')[-4],split(filename, '/')[-3])::date as ingest_date
FROM read_csv('zip:///home/deeman/projects/materia/extract/psdonline/src/psdonline/data/**/*.zip/*.csv', header=true, union_by_name=true, filename=true, names = ['commodity_code', 'commodity_description', 'country_code', 'country_name', 'market_year', 'calendar_year', 'month', 'attribute_id', 'attribute_description', 'unit_id', 'unit_description', 'value'], types=
{
'commodity_code' : 'VARCHAR',
'commodity_description' :'VARCHAR',
'country_code' : 'VARCHAR',
'country_name' : 'VARCHAR',
'market_year' : 'BIGINT' ,
'calendar_year' : 'BIGINT' ,
'month' : 'VARCHAR',
'attribute_id' : 'VARCHAR',
'attribute_description' :'VARCHAR',
'unit_id' : 'VARCHAR',
'unit_description' : 'VARCHAR',
'value' : 'DOUBLE'
}
)
FROM read_csv('zip:///home/deeman/projects/materia/extract/psdonline/src/psdonline/data/**/*.zip/*.csv', header=true, union_by_name=true, filename=true, names = ['commodity_code', 'commodity_description', 'country_code', 'country_name', 'market_year', 'calendar_year', 'month', 'attribute_id', 'attribute_description', 'unit_id', 'unit_description', 'value'], all_varchar=true)