fix(transform): remove blob compat CTE from stg_tennis_courts
The overpass_tennis extractor has written JSONL-only since it was added. The dual-format UNION ALL was backwards-compat debt that broke the transform once no courts.json.gz files exist on the server: IO Error: No files found that match the pattern "data/landing/overpass_tennis/*/*/courts.json.gz" Remove blob_elements CTE and the UNION ALL. Only read JSONL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,12 +2,9 @@
|
||||
-- Used as a "racket sport culture" signal in the opportunity score:
|
||||
-- areas with high tennis court density are prime padel adoption markets.
|
||||
--
|
||||
-- Supports two landing formats (UNION ALL during migration):
|
||||
-- New: courts.jsonl.gz — one OSM element per line; nodes have lat/lon directly,
|
||||
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.jsonl.gz
|
||||
-- Format: one OSM element per line; nodes have lat/lon directly,
|
||||
-- ways/relations have center.lat/center.lon (Overpass out center)
|
||||
-- Old: courts.json.gz — {"elements": [...]} blob (UNNEST required)
|
||||
--
|
||||
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.{jsonl,json}.gz
|
||||
|
||||
MODEL (
|
||||
name staging.stg_tennis_courts,
|
||||
@@ -17,8 +14,7 @@ MODEL (
|
||||
);
|
||||
|
||||
WITH
|
||||
-- New format: one OSM element per JSONL line
|
||||
jsonl_elements AS (
|
||||
parsed AS (
|
||||
SELECT
|
||||
type AS osm_type,
|
||||
TRY_CAST(id AS BIGINT) AS osm_id,
|
||||
@@ -47,33 +43,6 @@ jsonl_elements AS (
|
||||
)
|
||||
WHERE type IS NOT NULL
|
||||
),
|
||||
-- Old format: {"elements": [...]} blob — kept for transition
|
||||
blob_elements AS (
|
||||
SELECT
|
||||
elem ->> 'type' AS osm_type,
|
||||
(elem ->> 'id')::BIGINT AS osm_id,
|
||||
TRY_CAST(elem ->> 'lat' AS DOUBLE) AS lat,
|
||||
TRY_CAST(elem ->> 'lon' AS DOUBLE) AS lon,
|
||||
elem -> 'tags' ->> 'name' AS name,
|
||||
elem -> 'tags' ->> 'addr:country' AS country_code,
|
||||
elem -> 'tags' ->> 'addr:city' AS city_tag,
|
||||
filename AS source_file,
|
||||
CURRENT_DATE AS extracted_date
|
||||
FROM (
|
||||
SELECT UNNEST(elements) AS elem, filename
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/overpass_tennis/*/*/courts.json.gz',
|
||||
format = 'auto',
|
||||
filename = true
|
||||
)
|
||||
)
|
||||
WHERE (elem ->> 'type') IS NOT NULL
|
||||
),
|
||||
parsed AS (
|
||||
SELECT * FROM jsonl_elements
|
||||
UNION ALL
|
||||
SELECT * FROM blob_elements
|
||||
),
|
||||
deduped AS (
|
||||
SELECT *,
|
||||
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
|
||||
|
||||
Reference in New Issue
Block a user