feat(sql): thread opportunity_score from location_opportunity_profile into pSEO serving chain

- dim_cities: add geoname_id to geonames_pop CTE and final SELECT
  Creates FK between dim_cities (city-with-padel-venues) and dim_locations (all GeoNames),
  enabling joins to location_opportunity_profile for the first time.
- city_market_profile: pass geoname_id through base CTE and final SELECT
- pseo_city_costs_de: LEFT JOIN location_opportunity_profile on (country_code, geoname_id),
  add opportunity_score to output columns
- pseo_country_overview: add avg_opportunity_score, top_opportunity_score, top_opportunity_slugs,
  top_opportunity_names aggregates

Cities with no GeoNames name match get opportunity_score = NULL; templates guard with
{% if opportunity_score %}.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-26 20:07:25 +01:00
parent 88378cbfa7
commit 7186d4582a
4 changed files with 17 additions and 2 deletions

View File

@@ -75,7 +75,7 @@ uk_pop AS (
), ),
-- GeoNames global fallback (all cities ≥50K) -- GeoNames global fallback (all cities ≥50K)
geonames_pop AS ( geonames_pop AS (
SELECT city_name, country_code, population, ref_year SELECT geoname_id, city_name, country_code, population, ref_year
FROM staging.stg_population_geonames FROM staging.stg_population_geonames
QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1 QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1
) )
@@ -153,7 +153,10 @@ SELECT
)::INTEGER AS population_year, )::INTEGER AS population_year,
vc.padel_venue_count, vc.padel_venue_count,
ci.median_income_pps, ci.median_income_pps,
ci.income_year ci.income_year,
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
-- NULL when city name doesn't match any GeoNames entry.
gn.geoname_id
FROM venue_cities vc FROM venue_cities vc
LEFT JOIN country_income ci ON vc.country_code = ci.country_code LEFT JOIN country_income ci ON vc.country_code = ci.country_code
-- Eurostat EU population (via city code→name lookup) -- Eurostat EU population (via city code→name lookup)

View File

@@ -33,6 +33,7 @@ WITH base AS (
c.padel_venue_count, c.padel_venue_count,
c.median_income_pps, c.median_income_pps,
c.income_year, c.income_year,
c.geoname_id,
-- Venue density: padel venues per 100K residents -- Venue density: padel venues per 100K residents
CASE WHEN c.population > 0 CASE WHEN c.population > 0
THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2) THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2)
@@ -107,6 +108,7 @@ SELECT
s.median_occupancy_rate, s.median_occupancy_rate,
s.median_daily_revenue_per_venue, s.median_daily_revenue_per_venue,
s.price_currency, s.price_currency,
s.geoname_id,
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM scored s FROM scored s
ORDER BY s.market_score DESC ORDER BY s.market_score DESC

View File

@@ -27,6 +27,7 @@ SELECT
c.padel_venue_count, c.padel_venue_count,
c.venues_per_100k, c.venues_per_100k,
c.market_score, c.market_score,
lop.opportunity_score,
c.data_confidence, c.data_confidence,
-- Pricing (from Playtomic, NULL when no coverage) -- Pricing (from Playtomic, NULL when no coverage)
c.median_hourly_rate, c.median_hourly_rate,
@@ -48,6 +49,9 @@ FROM serving.city_market_profile c
LEFT JOIN serving.planner_defaults p LEFT JOIN serving.planner_defaults p
ON c.country_code = p.country_code ON c.country_code = p.country_code
AND c.city_slug = p.city_slug AND c.city_slug = p.city_slug
LEFT JOIN serving.location_opportunity_profile lop
ON c.country_code = lop.country_code
AND c.geoname_id = lop.geoname_id
-- Only cities with actual padel presence and at least some rate data -- Only cities with actual padel presence and at least some rate data
WHERE c.padel_venue_count > 0 WHERE c.padel_venue_count > 0
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)

View File

@@ -23,6 +23,12 @@ SELECT
-- Top 5 cities by market score for internal linking (DuckDB list slice syntax) -- Top 5 cities by market score for internal linking (DuckDB list slice syntax)
LIST(city_slug ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_slugs, LIST(city_slug ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_slugs,
LIST(city_name ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_names, LIST(city_name ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_names,
-- Opportunity score aggregates (NULL-safe: cities without geoname_id match excluded from AVG)
ROUND(AVG(opportunity_score), 1) AS avg_opportunity_score,
MAX(opportunity_score) AS top_opportunity_score,
-- Top 5 cities by opportunity score (may differ from top market score cities)
LIST(city_slug ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs,
LIST(city_name ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_names,
-- Pricing medians across cities (NULL when no Playtomic coverage in country) -- Pricing medians across cities (NULL when no Playtomic coverage in country)
ROUND(MEDIAN(median_hourly_rate), 0) AS median_hourly_rate, ROUND(MEDIAN(median_hourly_rate), 0) AS median_hourly_rate,
ROUND(MEDIAN(median_peak_rate), 0) AS median_peak_rate, ROUND(MEDIAN(median_peak_rate), 0) AS median_peak_rate,