fix(transform): increase geonames object size limit and remove stale column ref

- stg_population_geonames: add maximum_object_size=40MB to read_json() call;
  geonames cities_global.json.gz is ~30MB, exceeding DuckDB's 16MB default
- dim_locations: remove stale 'population_year AS population_year' column ref;
  stg_population_geonames has ref_year, not population_year — caused BinderException

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-25 09:56:05 +01:00
parent 3c0f57c0fd
commit 55f179ba54
2 changed files with 2 additions and 2 deletions

View File

@@ -35,7 +35,6 @@ locations AS (
admin1_code, admin1_code,
admin2_code, admin2_code,
population, population,
population_year AS population_year,
ref_year ref_year
FROM staging.stg_population_geonames FROM staging.stg_population_geonames
WHERE lat IS NOT NULL AND lon IS NOT NULL WHERE lat IS NOT NULL AND lon IS NOT NULL

View File

@@ -28,7 +28,8 @@ WITH parsed AS (
SELECT UNNEST(rows) AS row SELECT UNNEST(rows) AS row
FROM read_json( FROM read_json(
@LANDING_DIR || '/geonames/*/*/cities_global.json.gz', @LANDING_DIR || '/geonames/*/*/cities_global.json.gz',
auto_detect = true auto_detect = true,
maximum_object_size = 40000000
) )
) )
WHERE (row ->> 'geoname_id') IS NOT NULL WHERE (row ->> 'geoname_id') IS NOT NULL