-- Raw Eurostat Urban Audit city population (dataset: urb_cpop1). -- Source: data/landing/eurostat/{year}/{month}/urb_cpop1.json.gz -- Format: Eurostat JSON Statistics API (dimensions + flat value array). -- -- The Eurostat JSON format encodes dimensions separately from values: -- dimension.cities.category.index → maps city code to flat array position -- dimension.time.category.index → maps year to flat array position -- values → flat object {position_str: value} -- -- This model stores one row per (city_code, year) by computing positions. -- Reference: https://wikis.ec.europa.eu/display/EUROSTATHELP/API+Statistics MODEL ( name padelnomics.raw_eurostat_population, kind FULL, cron '@daily', grain (city_code, ref_year) ); WITH raw AS ( SELECT raw_json, filename FROM read_json( @LANDING_DIR || '/eurostat/*/*/urb_cpop1.json.gz', format = 'auto', filename = true, columns = { 'raw_json': 'JSON' } ) ), -- Unnest city codes with their ordinal positions cities AS ( SELECT city_code, (city_pos)::INTEGER AS city_pos, filename, raw_json, (json_extract(raw_json, '$.size[1]'))::INTEGER AS n_times FROM raw, LATERAL ( SELECT key AS city_code, value::INTEGER AS city_pos FROM json_each(json_extract(raw_json, '$.dimension.cities.category.index')) ) ), -- Unnest time (year) values with positions times AS ( SELECT key AS ref_year, value::INTEGER AS time_pos FROM (SELECT raw_json FROM raw LIMIT 1), LATERAL ( SELECT key, value FROM json_each(json_extract(raw_json, '$.dimension.time.category.index')) ) ) SELECT c.city_code, t.ref_year, TRY_CAST( json_extract(c.raw_json, '$.' || (c.city_pos * c.n_times + t.time_pos)::TEXT) AS DOUBLE ) AS population, c.filename AS source_file, CURRENT_DATE AS extracted_date FROM cities c CROSS JOIN times t