feat(scoring): Score v6 — World Bank global economic data for non-EU countries

Non-EU countries (AR, MX, AE, AU, etc.) previously got NULL for
median_income_pps and pli_construction, falling back to EU-calibrated
defaults (15K PPS, PLI=100) that produced wrong scores.

New World Bank WDI extractor fetches GNI per capita PPP and price level
ratio for 215 countries. dim_countries uses Germany as calibration anchor
to scale WB values into the Eurostat range (dynamic ratio, self-corrects
as both sources update). EU countries keep exact Eurostat values.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-08 18:17:33 +01:00
parent fcef47cb22
commit 3c135051fd
7 changed files with 246 additions and 8 deletions

View File

@@ -2,10 +2,14 @@
--
-- Consolidates data previously duplicated across dim_cities and dim_locations:
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
-- - median_income_pps (was: country_income CTE in both models)
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
-- - median_income_pps (Eurostat PPS preferred, World Bank GNI PPP fallback)
-- - energy prices, labour costs, PLI indices (Eurostat, WB price level ratio fallback)
-- - cost override columns for the financial calculator
--
-- World Bank fallback: for non-EU countries (AR, MX, AE, AU, etc.), income and PLI
-- are derived from WB WDI indicators calibrated to the Eurostat scale using Germany
-- as anchor. See de_calibration CTE. EU countries keep exact Eurostat values.
--
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
-- Kind: FULL — small table (~40 rows), full refresh daily.
@@ -82,6 +86,26 @@ de_elec AS (
de_gas AS (
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
),
-- Latest World Bank WDI per country (GNI PPP + price level ratio)
latest_wb AS (
SELECT country_code, gni_ppp, price_level_ratio, ref_year AS wb_year
FROM staging.stg_worldbank_income
WHERE gni_ppp IS NOT NULL OR price_level_ratio IS NOT NULL
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Germany calibration anchor: Eurostat PPS + WB GNI PPP + WB price ratio + Eurostat PLI construction.
-- Used to scale World Bank values into Eurostat-comparable ranges.
-- Single row; if DE is missing from any source, that ratio produces NULL (safe fallthrough).
de_calibration AS (
SELECT
i.median_income_pps AS de_eurostat_pps,
wb.gni_ppp AS de_gni_ppp,
wb.price_level_ratio AS de_price_level_ratio,
p.construction AS de_pli_construction
FROM (SELECT median_income_pps FROM latest_income WHERE country_code = 'DE') i
CROSS JOIN (SELECT gni_ppp, price_level_ratio FROM latest_wb WHERE country_code = 'DE') wb
CROSS JOIN (SELECT construction FROM pli_pivoted WHERE country_code = 'DE') p
),
-- All distinct country codes from any source
all_countries AS (
SELECT country_code FROM latest_income
@@ -93,6 +117,8 @@ all_countries AS (
SELECT country_code FROM latest_labour
UNION
SELECT country_code FROM pli_pivoted
UNION
SELECT country_code FROM latest_wb
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
UNION ALL
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
@@ -149,15 +175,21 @@ SELECT
ELSE ac.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Income data
i.median_income_pps,
i.income_year,
-- Income: Eurostat PPS preferred, World Bank GNI PPP scaled to PPS as fallback
COALESCE(
i.median_income_pps,
ROUND(wb.gni_ppp * (de_cal.de_eurostat_pps / NULLIF(de_cal.de_gni_ppp, 0)), 0)
) AS median_income_pps,
COALESCE(i.income_year, wb.wb_year) AS income_year,
-- Raw energy and labour data (for reference / future staffed-scenario use)
e.electricity_eur_kwh,
g.gas_eur_gj,
la.labour_cost_eur_hour,
-- PLI indices per category (EU27=100)
p.construction AS pli_construction,
-- PLI construction: Eurostat preferred, World Bank price level ratio scaled to PLI as fallback
COALESCE(
p.construction,
ROUND(wb.price_level_ratio / NULLIF(de_cal.de_price_level_ratio, 0) * de_cal.de_pli_construction, 1)
) AS pli_construction,
p.housing AS pli_housing,
p.services AS pli_services,
p.misc AS pli_misc,
@@ -278,8 +310,10 @@ LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
LEFT JOIN latest_wb wb ON ac.country_code = wb.country_code
CROSS JOIN de_pli de_p
CROSS JOIN de_elec de_e
CROSS JOIN de_gas de_g
CROSS JOIN de_calibration de_cal
-- Enforce grain
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1