Compare commits
1 Commits
v202603010
...
v202603010
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fea4f85da3 |
@@ -19,8 +19,10 @@
|
|||||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||||
--
|
--
|
||||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||||
-- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the
|
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||||
-- exact sphere distance is computed.
|
-- (interval join) optimization: O((N+M) log M) vs O(N×M) nested-loop.
|
||||||
|
-- Country pre-filters restrict the left side to ~20K rows for padel/tennis CTEs
|
||||||
|
-- (~8 countries each), down from ~140K global locations.
|
||||||
|
|
||||||
MODEL (
|
MODEL (
|
||||||
name foundation.dim_locations,
|
name foundation.dim_locations,
|
||||||
@@ -147,6 +149,8 @@ padel_courts AS (
|
|||||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||||
),
|
),
|
||||||
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)
|
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)
|
||||||
|
-- BETWEEN enables DuckDB IEJoin (O((N+M) log M)) vs ABS() nested-loop (O(N×M)).
|
||||||
|
-- Country pre-filter reduces left side from ~140K to ~20K rows (padel is ~8 countries).
|
||||||
nearest_padel AS (
|
nearest_padel AS (
|
||||||
SELECT
|
SELECT
|
||||||
l.geoname_id,
|
l.geoname_id,
|
||||||
@@ -158,9 +162,12 @@ nearest_padel AS (
|
|||||||
) AS nearest_padel_court_km
|
) AS nearest_padel_court_km
|
||||||
FROM locations l
|
FROM locations l
|
||||||
JOIN padel_courts p
|
JOIN padel_courts p
|
||||||
-- ~55km bounding box pre-filter to limit cross-join before sphere calc
|
-- ~55km bounding box pre-filter; BETWEEN triggers IEJoin optimization
|
||||||
ON ABS(l.lat - p.lat) < 0.5
|
ON l.lat BETWEEN p.lat - 0.5 AND p.lat + 0.5
|
||||||
AND ABS(l.lon - p.lon) < 0.5
|
AND l.lon BETWEEN p.lon - 0.5 AND p.lon + 0.5
|
||||||
|
WHERE l.country_code IN (
|
||||||
|
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||||
|
)
|
||||||
GROUP BY l.geoname_id
|
GROUP BY l.geoname_id
|
||||||
),
|
),
|
||||||
-- Padel venues within 5km of each location (counts as "local padel supply")
|
-- Padel venues within 5km of each location (counts as "local padel supply")
|
||||||
@@ -170,24 +177,35 @@ padel_local AS (
|
|||||||
COUNT(*) AS padel_venue_count
|
COUNT(*) AS padel_venue_count
|
||||||
FROM locations l
|
FROM locations l
|
||||||
JOIN padel_courts p
|
JOIN padel_courts p
|
||||||
ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter
|
-- ~5km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||||
AND ABS(l.lon - p.lon) < 0.05
|
ON l.lat BETWEEN p.lat - 0.05 AND p.lat + 0.05
|
||||||
WHERE ST_Distance_Sphere(
|
AND l.lon BETWEEN p.lon - 0.05 AND p.lon + 0.05
|
||||||
|
WHERE l.country_code IN (
|
||||||
|
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||||
|
)
|
||||||
|
AND ST_Distance_Sphere(
|
||||||
ST_Point(l.lon, l.lat),
|
ST_Point(l.lon, l.lat),
|
||||||
ST_Point(p.lon, p.lat)
|
ST_Point(p.lon, p.lat)
|
||||||
) / 1000.0 <= 5.0
|
) / 1000.0 <= 5.0
|
||||||
GROUP BY l.geoname_id
|
GROUP BY l.geoname_id
|
||||||
),
|
),
|
||||||
-- Tennis courts within 25km of each location (sports culture proxy)
|
-- Tennis courts within 25km of each location (sports culture proxy)
|
||||||
|
-- Country pre-filter reduces left side from ~140K to ~20K rows (tennis courts are European only).
|
||||||
tennis_nearby AS (
|
tennis_nearby AS (
|
||||||
SELECT
|
SELECT
|
||||||
l.geoname_id,
|
l.geoname_id,
|
||||||
COUNT(*) AS tennis_courts_within_25km
|
COUNT(*) AS tennis_courts_within_25km
|
||||||
FROM locations l
|
FROM locations l
|
||||||
JOIN staging.stg_tennis_courts t
|
JOIN staging.stg_tennis_courts t
|
||||||
ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter
|
-- ~25km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||||
AND ABS(l.lon - t.lon) < 0.23
|
ON l.lat BETWEEN t.lat - 0.23 AND t.lat + 0.23
|
||||||
WHERE ST_Distance_Sphere(
|
AND l.lon BETWEEN t.lon - 0.23 AND t.lon + 0.23
|
||||||
|
WHERE l.country_code IN (
|
||||||
|
SELECT DISTINCT country_code
|
||||||
|
FROM staging.stg_tennis_courts
|
||||||
|
WHERE country_code IS NOT NULL
|
||||||
|
)
|
||||||
|
AND ST_Distance_Sphere(
|
||||||
ST_Point(l.lon, l.lat),
|
ST_Point(l.lon, l.lat),
|
||||||
ST_Point(t.lon, t.lat)
|
ST_Point(t.lon, t.lat)
|
||||||
) / 1000.0 <= 25.0
|
) / 1000.0 <= 25.0
|
||||||
|
|||||||
Reference in New Issue
Block a user