fix(data): filter non-Latin city names + score range clamp (Phase F)
- stg_population_geonames: reject CJK/Cyrillic/Arabic city names via regex (fixes "Seelow" showing Japanese characters on map) - dim_locations: filter empty location names after trim - location_profiles: defensive LEAST/GREATEST clamp on both scores (0-100) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -48,6 +48,7 @@ locations AS (
|
|||||||
ref_year
|
ref_year
|
||||||
FROM staging.stg_population_geonames
|
FROM staging.stg_population_geonames
|
||||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||||
|
AND LENGTH(TRIM(city_name)) > 0
|
||||||
),
|
),
|
||||||
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
|
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
|
||||||
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
|
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
|
||||||
|
|||||||
@@ -278,8 +278,8 @@ SELECT
|
|||||||
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
|
THEN ROUND(s.catchment_padel_courts::DOUBLE / s.catchment_population * 100000, 2)
|
||||||
ELSE NULL
|
ELSE NULL
|
||||||
END AS catchment_venues_per_100k,
|
END AS catchment_venues_per_100k,
|
||||||
s.market_score,
|
LEAST(GREATEST(s.market_score, 0), 100) AS market_score,
|
||||||
s.opportunity_score,
|
LEAST(GREATEST(s.opportunity_score, 0), 100) AS opportunity_score,
|
||||||
s.median_hourly_rate,
|
s.median_hourly_rate,
|
||||||
s.median_peak_rate,
|
s.median_peak_rate,
|
||||||
s.median_offpeak_rate,
|
s.median_offpeak_rate,
|
||||||
|
|||||||
@@ -38,3 +38,6 @@ WHERE geoname_id IS NOT NULL
|
|||||||
AND city_name IS NOT NULL
|
AND city_name IS NOT NULL
|
||||||
AND lat IS NOT NULL
|
AND lat IS NOT NULL
|
||||||
AND lon IS NOT NULL
|
AND lon IS NOT NULL
|
||||||
|
-- Reject names with non-Latin characters (CJK, Cyrillic, Arabic, Thai, etc.)
|
||||||
|
-- Allows ASCII + Latin Extended (diacritics: ÄÖÜ, àéî, ñ, ø, etc.)
|
||||||
|
AND regexp_matches(city_name, '^[\x20-\x7E\u00C0-\u024F\u1E00-\u1EFF]+$')
|
||||||
|
|||||||
Reference in New Issue
Block a user