fix(transform): increase geonames object size limit and remove stale column ref
- stg_population_geonames: add maximum_object_size=40MB to read_json() call; geonames cities_global.json.gz is ~30MB, exceeding DuckDB's 16MB default - dim_locations: remove stale 'population_year AS population_year' column ref; stg_population_geonames has ref_year, not population_year — caused BinderException Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -35,7 +35,6 @@ locations AS (
|
|||||||
admin1_code,
|
admin1_code,
|
||||||
admin2_code,
|
admin2_code,
|
||||||
population,
|
population,
|
||||||
population_year AS population_year,
|
|
||||||
ref_year
|
ref_year
|
||||||
FROM staging.stg_population_geonames
|
FROM staging.stg_population_geonames
|
||||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||||
|
|||||||
@@ -28,7 +28,8 @@ WITH parsed AS (
|
|||||||
SELECT UNNEST(rows) AS row
|
SELECT UNNEST(rows) AS row
|
||||||
FROM read_json(
|
FROM read_json(
|
||||||
@LANDING_DIR || '/geonames/*/*/cities_global.json.gz',
|
@LANDING_DIR || '/geonames/*/*/cities_global.json.gz',
|
||||||
auto_detect = true
|
auto_detect = true,
|
||||||
|
maximum_object_size = 40000000
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
WHERE (row ->> 'geoname_id') IS NOT NULL
|
WHERE (row ->> 'geoname_id') IS NOT NULL
|
||||||
|
|||||||
Reference in New Issue
Block a user