From f81d5f19da827581f255f09383a57bde6490a4c0 Mon Sep 17 00:00:00 2001 From: Deeman Date: Fri, 6 Mar 2026 14:34:56 +0100 Subject: [PATCH] fix(transform): tighten H3 catchment to res 5 (~24km radius) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Res 4 + k_ring(1) gave ~50-60km effective radius, causing Oldenburg to absorb Bremen (40km away) and destroying score differentiation. Res 5 + k_ring(1) gives ~24km — captures adjacent Gemeinden (Delmenhorst at 15km) without bleeding into unrelated cities at 40km+. Co-Authored-By: Claude Sonnet 4.6 --- .../models/foundation/dim_locations.sql | 2 +- .../serving/location_opportunity_profile.sql | 25 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql index 015bb28..b0ddbe3 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql @@ -215,7 +215,7 @@ SELECT l.location_slug, l.lat, l.lon, - h3_latlng_to_cell(l.lat, l.lon, 4) AS h3_cell_res4, + h3_latlng_to_cell(l.lat, l.lon, 5) AS h3_cell_res5, l.admin1_code, l.admin2_code, l.population, diff --git a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql index bbb5b41..bca33af 100644 --- a/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql +++ b/transform/sqlmesh_padelnomics/models/serving/location_opportunity_profile.sql @@ -9,8 +9,9 @@ -- H3 catchment methodology (v3): -- Addressable market and supply gap now use a regional catchment lens rather than -- the location's own population/court count. Each location is assigned an H3 cell --- at resolution 4 (~10km center-to-center). Catchment = cell + 6 neighbours (k_ring=1), --- covering ~462km² — roughly a 15-18km radius, matching realistic driving distance. +-- at resolution 5 (~8.5km edge). Catchment = cell + 6 neighbours (k_ring=1), +-- covering ~24km effective radius — realistic driving distance without absorbing +-- unrelated cities (e.g. Oldenburg stays separate from Bremen at ~40km). -- Population and court counts are first aggregated per H3 cell (hex_stats CTE), then -- summed across the 7-cell ring (catchment CTE) to avoid scanning all 140K locations -- per location. @@ -41,27 +42,27 @@ MODEL ( ); WITH --- Aggregate population and court counts per H3 cell (res 4, ~10km edge). --- Grouping by cell first (~30-50K distinct cells vs 140K locations) keeps the +-- Aggregate population and court counts per H3 cell (res 5, ~8.5km edge). +-- Grouping by cell first (~50-80K distinct cells vs 140K locations) keeps the -- subsequent lateral join small. hex_stats AS ( SELECT - h3_cell_res4, + h3_cell_res5, SUM(population) AS hex_population, SUM(padel_venue_count) AS hex_padel_courts FROM foundation.dim_locations - GROUP BY h3_cell_res4 + GROUP BY h3_cell_res5 ), -- For each location, sum hex_stats across the cell + 6 neighbours (k_ring=1). --- Effective catchment: ~462km², ~15-18km radius — realistic driving distance. +-- Effective catchment: ~24km radius — realistic driving distance. catchment AS ( SELECT l.geoname_id, SUM(hs.hex_population) AS catchment_population, SUM(hs.hex_padel_courts) AS catchment_padel_courts FROM foundation.dim_locations l, - LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res4, 1)) AS cell) ring - JOIN hex_stats hs ON hs.h3_cell_res4 = ring.cell + LATERAL (SELECT UNNEST(h3_grid_disk(l.h3_cell_res5, 1)) AS cell) ring + JOIN hex_stats hs ON hs.h3_cell_res5 = ring.cell GROUP BY l.geoname_id ) SELECT @@ -83,7 +84,7 @@ SELECT l.padel_venues_per_100k, l.nearest_padel_court_km, l.tennis_courts_within_25km, - -- Catchment metrics (H3 res-4 cell + 6 neighbours, ~15-18km radius) + -- Catchment metrics (H3 res-5 cell + 6 neighbours, ~24km radius) COALESCE(c.catchment_population, l.population)::BIGINT AS catchment_population, COALESCE(c.catchment_padel_courts, l.padel_venue_count)::INTEGER AS catchment_padel_courts, CASE WHEN COALESCE(c.catchment_population, l.population) > 0 @@ -94,7 +95,7 @@ SELECT END AS catchment_venues_per_100k, ROUND( -- Addressable market (25 pts): log-scaled catchment population, ceiling 500K. - -- v3: uses H3 catchment population (cell + 6 neighbours, ~15-18km radius) instead + -- v3: uses H3 catchment population (cell + 6 neighbours, ~24km radius) instead -- of local city population, so mid-size cities surrounded by dense Gemeinden score -- correctly (e.g. Oldenburg pulls in Ammerland, Wesermarsch, etc.). 25.0 * LEAST(1.0, LN(GREATEST(COALESCE(c.catchment_population, l.population), 1)) / LN(500000)) @@ -109,7 +110,7 @@ SELECT -- Supply gap (30 pts): INVERTED catchment venue density. -- v3: uses catchment courts / catchment population instead of local 5km count / city pop. - -- 0 courts/100K across the ~15-18km ring = full 30 pts (genuine white space). + -- 0 courts/100K across the ~24km ring = full 30 pts (genuine white space). -- ≥8/100K = 0 pts (well-served regional market). + 30.0 * GREATEST(0.0, 1.0 - COALESCE( CASE WHEN COALESCE(c.catchment_population, l.population) > 0