Compare commits
1 Commits
v202603010
...
v202603010
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fea4f85da3 |
@@ -19,8 +19,10 @@
|
||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the
|
||||
-- exact sphere distance is computed.
|
||||
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||
-- (interval join) optimization: O((N+M) log M) vs O(N×M) nested-loop.
|
||||
-- Country pre-filters restrict the left side to ~20K rows for padel/tennis CTEs
|
||||
-- (~8 countries each), down from ~140K global locations.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_locations,
|
||||
@@ -147,6 +149,8 @@ padel_courts AS (
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)
|
||||
-- BETWEEN enables DuckDB IEJoin (O((N+M) log M)) vs ABS() nested-loop (O(N×M)).
|
||||
-- Country pre-filter reduces left side from ~140K to ~20K rows (padel is ~8 countries).
|
||||
nearest_padel AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
@@ -158,9 +162,12 @@ nearest_padel AS (
|
||||
) AS nearest_padel_court_km
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
-- ~55km bounding box pre-filter to limit cross-join before sphere calc
|
||||
ON ABS(l.lat - p.lat) < 0.5
|
||||
AND ABS(l.lon - p.lon) < 0.5
|
||||
-- ~55km bounding box pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN p.lat - 0.5 AND p.lat + 0.5
|
||||
AND l.lon BETWEEN p.lon - 0.5 AND p.lon + 0.5
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||
)
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Padel venues within 5km of each location (counts as "local padel supply")
|
||||
@@ -170,24 +177,35 @@ padel_local AS (
|
||||
COUNT(*) AS padel_venue_count
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter
|
||||
AND ABS(l.lon - p.lon) < 0.05
|
||||
WHERE ST_Distance_Sphere(
|
||||
-- ~5km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN p.lat - 0.05 AND p.lat + 0.05
|
||||
AND l.lon BETWEEN p.lon - 0.05 AND p.lon + 0.05
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||
)
|
||||
AND ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(p.lon, p.lat)
|
||||
) / 1000.0 <= 5.0
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Tennis courts within 25km of each location (sports culture proxy)
|
||||
-- Country pre-filter reduces left side from ~140K to ~20K rows (tennis courts are European only).
|
||||
tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
COUNT(*) AS tennis_courts_within_25km
|
||||
FROM locations l
|
||||
JOIN staging.stg_tennis_courts t
|
||||
ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter
|
||||
AND ABS(l.lon - t.lon) < 0.23
|
||||
WHERE ST_Distance_Sphere(
|
||||
-- ~25km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN t.lat - 0.23 AND t.lat + 0.23
|
||||
AND l.lon BETWEEN t.lon - 0.23 AND t.lon + 0.23
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code
|
||||
FROM staging.stg_tennis_courts
|
||||
WHERE country_code IS NOT NULL
|
||||
)
|
||||
AND ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(t.lon, t.lat)
|
||||
) / 1000.0 <= 25.0
|
||||
|
||||
Reference in New Issue
Block a user