fix(transform): tighten H3 catchment to res 5 (~24km radius)
Res 4 + k_ring(1) gave ~50-60km effective radius, causing Oldenburg to absorb Bremen (40km away) and destroying score differentiation. Res 5 + k_ring(1) gives ~24km — captures adjacent Gemeinden (Delmenhorst at 15km) without bleeding into unrelated cities at 40km+. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -215,7 +215,7 @@ SELECT
|
|||||||
l.location_slug,
|
l.location_slug,
|
||||||
l.lat,
|
l.lat,
|
||||||
l.lon,
|
l.lon,
|
||||||
h3_latlng_to_cell(l.lat, l.lon, 4) AS h3_cell_res4,
|
h3_latlng_to_cell(l.lat, l.lon, 5) AS h3_cell_res5,
|
||||||
l.admin1_code,
|
l.admin1_code,
|
||||||
l.admin2_code,
|
l.admin2_code,
|
||||||
l.population,
|
l.population,
|
||||||
|
|||||||
@@ -9,8 +9,9 @@
|
|||||||
-- H3 catchment methodology (v3):
|
-- H3 catchment methodology (v3):
|
||||||
-- Addressable market and supply gap now use a regional catchment lens rather than
|
-- Addressable market and supply gap now use a regional catchment lens rather than
|
||||||
-- the location's own population/court count. Each location is assigned an H3 cell
|
-- the location's own population/court count. Each location is assigned an H3 cell
|
||||||
-- at resolution 4 (~10km center-to-center). Catchment = cell + 6 neighbours (k_ring=1),
|
-- at resolution 5 (~8.5km edge). Catchment = cell + 6 neighbours (k_ring=1),
|
||||||
-- covering ~462km² — roughly a 15-18km radius, matching realistic driving distance.
|
-- covering ~24km effective radius — realistic driving distance without absorbing
|
||||||
|
-- unrelated cities (e.g. Oldenburg stays separate from Bremen at ~40km).
|
||||||
-- Population and court counts are first aggregated per H3 cell (hex_stats CTE), then
|
-- Population and court counts are first aggregated per H3 cell (hex_stats CTE), then
|
||||||
-- summed across the 7-cell ring (catchment CTE) to avoid scanning all 140K locations
|
-- summed across the 7-cell ring (catchment CTE) to avoid scanning all 140K locations
|
||||||
-- per location.
|
-- per location.
|
||||||
@@ -41,27 +42,27 @@ MODEL (
|
|||||||
);
|
);
|
||||||
|
|
||||||
WITH
|
WITH
|
||||||
-- Aggregate population and court counts per H3 cell (res 4, ~10km edge).
|
-- Aggregate population and court counts per H3 cell (res 5, ~8.5km edge).
|
||||||
-- Grouping by cell first (~30-50K distinct cells vs 140K locations) keeps the
|
-- Grouping by cell first (~50-80K distinct cells vs 140K locations) keeps the
|
||||||
-- subsequent lateral join small.
|
-- subsequent lateral join small.
|
||||||
hex_stats AS (
|
hex_stats AS (
|
||||||
SELECT
|
SELECT
|
||||||
h3_cell_res4,
|
h3_cell_res5,
|
||||||
SUM(population) AS hex_population,
|
SUM(population) AS hex_population,
|
||||||
SUM(padel_venue_count) AS hex_padel_courts
|
SUM(padel_venue_count) AS hex_padel_courts
|
||||||
FROM foundation.dim_locations
|
FROM foundation.dim_locations
|
||||||
GROUP BY h3_cell_res4
|
GROUP BY h3_cell_res5
|
||||||
),
|
),
|
||||||
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
|
-- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1).
|
||||||
-- Effective catchment: ~462km², ~15-18km radius — realistic driving distance.
|
-- Effective catchment: ~24km radius — realistic driving distance.
|
||||||
catchment AS (
|
catchment AS (
|
||||||
SELECT
|
SELECT
|
||||||
l.geoname_id,
|
l.geoname_id,
|
||||||
SUM(hs.hex_population) AS catchment_population,
|
SUM(hs.hex_population) AS catchment_population,
|
||||||
SUM(hs.hex_padel_courts) AS catchment_padel_courts
|
SUM(hs.hex_padel_courts) AS catchment_padel_courts
|
||||||
FROM foundation.dim_locations l,
|
FROM foundation.dim_locations l,
|
||||||
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res4, 1)) AS cell) ring
|
LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res5, 1)) AS cell) ring
|
||||||
JOIN hex_stats hs ON hs.h3_cell_res4 = ring.cell
|
JOIN hex_stats hs ON hs.h3_cell_res5 = ring.cell
|
||||||
GROUP BY l.geoname_id
|
GROUP BY l.geoname_id
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
@@ -83,7 +84,7 @@ SELECT
|
|||||||
l.padel_venues_per_100k,
|
l.padel_venues_per_100k,
|
||||||
l.nearest_padel_court_km,
|
l.nearest_padel_court_km,
|
||||||
l.tennis_courts_within_25km,
|
l.tennis_courts_within_25km,
|
||||||
-- Catchment metrics (H3 res-4 cell + 6 neighbours, ~15-18km radius)
|
-- Catchment metrics (H3 res-5 cell + 6 neighbours, ~24km radius)
|
||||||
COALESCE(c.catchment_population, l.population)::BIGINT AS catchment_population,
|
COALESCE(c.catchment_population, l.population)::BIGINT AS catchment_population,
|
||||||
COALESCE(c.catchment_padel_courts, l.padel_venue_count)::INTEGER AS catchment_padel_courts,
|
COALESCE(c.catchment_padel_courts, l.padel_venue_count)::INTEGER AS catchment_padel_courts,
|
||||||
CASE WHEN COALESCE(c.catchment_population, l.population) > 0
|
CASE WHEN COALESCE(c.catchment_population, l.population) > 0
|
||||||
@@ -94,7 +95,7 @@ SELECT
|
|||||||
END AS catchment_venues_per_100k,
|
END AS catchment_venues_per_100k,
|
||||||
ROUND(
|
ROUND(
|
||||||
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K.
|
-- Addressable market (25 pts): log-scaled catchment population, ceiling 500K.
|
||||||
-- v3: uses H3 catchment population (cell + 6 neighbours, ~15-18km radius) instead
|
-- v3: uses H3 catchment population (cell + 6 neighbours, ~24km radius) instead
|
||||||
-- of local city population, so mid-size cities surrounded by dense Gemeinden score
|
-- of local city population, so mid-size cities surrounded by dense Gemeinden score
|
||||||
-- correctly (e.g. Oldenburg pulls in Ammerland, Wesermarsch, etc.).
|
-- correctly (e.g. Oldenburg pulls in Ammerland, Wesermarsch, etc.).
|
||||||
25.0 * LEAST(1.0, LN(GREATEST(COALESCE(c.catchment_population, l.population), 1)) / LN(500000))
|
25.0 * LEAST(1.0, LN(GREATEST(COALESCE(c.catchment_population, l.population), 1)) / LN(500000))
|
||||||
@@ -109,7 +110,7 @@ SELECT
|
|||||||
|
|
||||||
-- Supply gap (30 pts): INVERTED catchment venue density.
|
-- Supply gap (30 pts): INVERTED catchment venue density.
|
||||||
-- v3: uses catchment courts / catchment population instead of local 5km count / city pop.
|
-- v3: uses catchment courts / catchment population instead of local 5km count / city pop.
|
||||||
-- 0 courts/100K across the ~15-18km ring = full 30 pts (genuine white space).
|
-- 0 courts/100K across the ~24km ring = full 30 pts (genuine white space).
|
||||||
-- ≥8/100K = 0 pts (well-served regional market).
|
-- ≥8/100K = 0 pts (well-served regional market).
|
||||||
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(
|
+ 30.0 * GREATEST(0.0, 1.0 - COALESCE(
|
||||||
CASE WHEN COALESCE(c.catchment_population, l.population) > 0
|
CASE WHEN COALESCE(c.catchment_population, l.population) > 0
|
||||||
|
|||||||
Reference in New Issue
Block a user