fix: eurostat JSON-stat parsing + staging model corrections
Eurostat JSON-stat format (4-7 dimension sparse dict with 583K values) causes DuckDB OOM — pre-process in extractor to flat records. Also fix dim_cities unused CTE bug and playtomic venue lat/lon path. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,7 +15,8 @@ eurostat_cities AS (
|
||||
city_code,
|
||||
country_code,
|
||||
population,
|
||||
ref_year
|
||||
ref_year,
|
||||
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
|
||||
FROM staging.stg_population
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY city_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
@@ -30,16 +31,6 @@ venue_counts AS (
|
||||
FROM foundation.dim_venues
|
||||
WHERE city IS NOT NULL AND city != ''
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Eurostat city label mapping to canonical city names
|
||||
-- (Eurostat uses codes like DE001C → Berlin; we keep both)
|
||||
eurostat_labels AS (
|
||||
SELECT DISTINCT
|
||||
city_code,
|
||||
country_code,
|
||||
-- Derive a slug-friendly city name from the code as fallback
|
||||
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
|
||||
FROM eurostat_cities
|
||||
)
|
||||
SELECT
|
||||
ec.city_code,
|
||||
|
||||
Reference in New Issue
Block a user