fix(transform): remove blob compat CTE from stg_tennis_courts
The overpass_tennis extractor has written JSONL-only since it was added. The dual-format UNION ALL was backwards-compat debt that broke the transform once no courts.json.gz files exist on the server: IO Error: No files found that match the pattern "data/landing/overpass_tennis/*/*/courts.json.gz" Remove blob_elements CTE and the UNION ALL. Only read JSONL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,12 +2,9 @@
|
|||||||
-- Used as a "racket sport culture" signal in the opportunity score:
|
-- Used as a "racket sport culture" signal in the opportunity score:
|
||||||
-- areas with high tennis court density are prime padel adoption markets.
|
-- areas with high tennis court density are prime padel adoption markets.
|
||||||
--
|
--
|
||||||
-- Supports two landing formats (UNION ALL during migration):
|
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.jsonl.gz
|
||||||
-- New: courts.jsonl.gz — one OSM element per line; nodes have lat/lon directly,
|
-- Format: one OSM element per line; nodes have lat/lon directly,
|
||||||
-- ways/relations have center.lat/center.lon (Overpass out center)
|
-- ways/relations have center.lat/center.lon (Overpass out center)
|
||||||
-- Old: courts.json.gz — {"elements": [...]} blob (UNNEST required)
|
|
||||||
--
|
|
||||||
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.{jsonl,json}.gz
|
|
||||||
|
|
||||||
MODEL (
|
MODEL (
|
||||||
name staging.stg_tennis_courts,
|
name staging.stg_tennis_courts,
|
||||||
@@ -17,8 +14,7 @@ MODEL (
|
|||||||
);
|
);
|
||||||
|
|
||||||
WITH
|
WITH
|
||||||
-- New format: one OSM element per JSONL line
|
parsed AS (
|
||||||
jsonl_elements AS (
|
|
||||||
SELECT
|
SELECT
|
||||||
type AS osm_type,
|
type AS osm_type,
|
||||||
TRY_CAST(id AS BIGINT) AS osm_id,
|
TRY_CAST(id AS BIGINT) AS osm_id,
|
||||||
@@ -47,33 +43,6 @@ jsonl_elements AS (
|
|||||||
)
|
)
|
||||||
WHERE type IS NOT NULL
|
WHERE type IS NOT NULL
|
||||||
),
|
),
|
||||||
-- Old format: {"elements": [...]} blob — kept for transition
|
|
||||||
blob_elements AS (
|
|
||||||
SELECT
|
|
||||||
elem ->> 'type' AS osm_type,
|
|
||||||
(elem ->> 'id')::BIGINT AS osm_id,
|
|
||||||
TRY_CAST(elem ->> 'lat' AS DOUBLE) AS lat,
|
|
||||||
TRY_CAST(elem ->> 'lon' AS DOUBLE) AS lon,
|
|
||||||
elem -> 'tags' ->> 'name' AS name,
|
|
||||||
elem -> 'tags' ->> 'addr:country' AS country_code,
|
|
||||||
elem -> 'tags' ->> 'addr:city' AS city_tag,
|
|
||||||
filename AS source_file,
|
|
||||||
CURRENT_DATE AS extracted_date
|
|
||||||
FROM (
|
|
||||||
SELECT UNNEST(elements) AS elem, filename
|
|
||||||
FROM read_json(
|
|
||||||
@LANDING_DIR || '/overpass_tennis/*/*/courts.json.gz',
|
|
||||||
format = 'auto',
|
|
||||||
filename = true
|
|
||||||
)
|
|
||||||
)
|
|
||||||
WHERE (elem ->> 'type') IS NOT NULL
|
|
||||||
),
|
|
||||||
parsed AS (
|
|
||||||
SELECT * FROM jsonl_elements
|
|
||||||
UNION ALL
|
|
||||||
SELECT * FROM blob_elements
|
|
||||||
),
|
|
||||||
deduped AS (
|
deduped AS (
|
||||||
SELECT *,
|
SELECT *,
|
||||||
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
|
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
|
||||||
|
|||||||
Reference in New Issue
Block a user