git mv all tracked files from the nested padelnomics/ workspace directory to the git repo root. Merged .gitignore files. No code changes — pure path rename. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
65 lines
1.9 KiB
SQL
65 lines
1.9 KiB
SQL
-- Raw Eurostat Urban Audit city population (dataset: urb_cpop1).
|
|
-- Source: data/landing/eurostat/{year}/{month}/urb_cpop1.json.gz
|
|
-- Format: Eurostat JSON Statistics API (dimensions + flat value array).
|
|
--
|
|
-- The Eurostat JSON format encodes dimensions separately from values:
|
|
-- dimension.cities.category.index → maps city code to flat array position
|
|
-- dimension.time.category.index → maps year to flat array position
|
|
-- values → flat object {position_str: value}
|
|
--
|
|
-- This model stores one row per (city_code, year) by computing positions.
|
|
-- Reference: https://wikis.ec.europa.eu/display/EUROSTATHELP/API+Statistics
|
|
|
|
MODEL (
|
|
name padelnomics.raw_eurostat_population,
|
|
kind FULL,
|
|
cron '@daily',
|
|
grain (city_code, ref_year)
|
|
);
|
|
|
|
WITH raw AS (
|
|
SELECT
|
|
raw_json,
|
|
filename
|
|
FROM read_json(
|
|
@LANDING_DIR || '/eurostat/*/*/urb_cpop1.json.gz',
|
|
format = 'auto',
|
|
filename = true,
|
|
columns = { 'raw_json': 'JSON' }
|
|
)
|
|
),
|
|
-- Unnest city codes with their ordinal positions
|
|
cities AS (
|
|
SELECT
|
|
city_code,
|
|
(city_pos)::INTEGER AS city_pos,
|
|
filename,
|
|
raw_json,
|
|
(json_extract(raw_json, '$.size[1]'))::INTEGER AS n_times
|
|
FROM raw,
|
|
LATERAL (
|
|
SELECT key AS city_code, value::INTEGER AS city_pos
|
|
FROM json_each(json_extract(raw_json, '$.dimension.cities.category.index'))
|
|
)
|
|
),
|
|
-- Unnest time (year) values with positions
|
|
times AS (
|
|
SELECT key AS ref_year, value::INTEGER AS time_pos
|
|
FROM (SELECT raw_json FROM raw LIMIT 1),
|
|
LATERAL (
|
|
SELECT key, value
|
|
FROM json_each(json_extract(raw_json, '$.dimension.time.category.index'))
|
|
)
|
|
)
|
|
SELECT
|
|
c.city_code,
|
|
t.ref_year,
|
|
TRY_CAST(
|
|
json_extract(c.raw_json, '$.' || (c.city_pos * c.n_times + t.time_pos)::TEXT)
|
|
AS DOUBLE
|
|
) AS population,
|
|
c.filename AS source_file,
|
|
CURRENT_DATE AS extracted_date
|
|
FROM cities c
|
|
CROSS JOIN times t
|