feat(scoring): Score v6 — World Bank global economic data for non-EU countries
Non-EU countries (AR, MX, AE, AU, etc.) previously got NULL for median_income_pps and pli_construction, falling back to EU-calibrated defaults (15K PPS, PLI=100) that produced wrong scores. New World Bank WDI extractor fetches GNI per capita PPP and price level ratio for 215 countries. dim_countries uses Germany as calibration anchor to scale WB values into the Eurostat range (dynamic ratio, self-corrects as both sources update). EU countries keep exact Eurostat values. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,10 +2,14 @@
|
||||
--
|
||||
-- Consolidates data previously duplicated across dim_cities and dim_locations:
|
||||
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
|
||||
-- - median_income_pps (was: country_income CTE in both models)
|
||||
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
|
||||
-- - median_income_pps (Eurostat PPS preferred, World Bank GNI PPP fallback)
|
||||
-- - energy prices, labour costs, PLI indices (Eurostat, WB price level ratio fallback)
|
||||
-- - cost override columns for the financial calculator
|
||||
--
|
||||
-- World Bank fallback: for non-EU countries (AR, MX, AE, AU, etc.), income and PLI
|
||||
-- are derived from WB WDI indicators calibrated to the Eurostat scale using Germany
|
||||
-- as anchor. See de_calibration CTE. EU countries keep exact Eurostat values.
|
||||
--
|
||||
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
|
||||
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
|
||||
-- Kind: FULL — small table (~40 rows), full refresh daily.
|
||||
@@ -82,6 +86,26 @@ de_elec AS (
|
||||
de_gas AS (
|
||||
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
|
||||
),
|
||||
-- Latest World Bank WDI per country (GNI PPP + price level ratio)
|
||||
latest_wb AS (
|
||||
SELECT country_code, gni_ppp, price_level_ratio, ref_year AS wb_year
|
||||
FROM staging.stg_worldbank_income
|
||||
WHERE gni_ppp IS NOT NULL OR price_level_ratio IS NOT NULL
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Germany calibration anchor: Eurostat PPS + WB GNI PPP + WB price ratio + Eurostat PLI construction.
|
||||
-- Used to scale World Bank values into Eurostat-comparable ranges.
|
||||
-- Single row; if DE is missing from any source, that ratio produces NULL (safe fallthrough).
|
||||
de_calibration AS (
|
||||
SELECT
|
||||
i.median_income_pps AS de_eurostat_pps,
|
||||
wb.gni_ppp AS de_gni_ppp,
|
||||
wb.price_level_ratio AS de_price_level_ratio,
|
||||
p.construction AS de_pli_construction
|
||||
FROM (SELECT median_income_pps FROM latest_income WHERE country_code = 'DE') i
|
||||
CROSS JOIN (SELECT gni_ppp, price_level_ratio FROM latest_wb WHERE country_code = 'DE') wb
|
||||
CROSS JOIN (SELECT construction FROM pli_pivoted WHERE country_code = 'DE') p
|
||||
),
|
||||
-- All distinct country codes from any source
|
||||
all_countries AS (
|
||||
SELECT country_code FROM latest_income
|
||||
@@ -93,6 +117,8 @@ all_countries AS (
|
||||
SELECT country_code FROM latest_labour
|
||||
UNION
|
||||
SELECT country_code FROM pli_pivoted
|
||||
UNION
|
||||
SELECT country_code FROM latest_wb
|
||||
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
|
||||
UNION ALL
|
||||
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
|
||||
@@ -149,15 +175,21 @@ SELECT
|
||||
ELSE ac.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Income data
|
||||
i.median_income_pps,
|
||||
i.income_year,
|
||||
-- Income: Eurostat PPS preferred, World Bank GNI PPP scaled to PPS as fallback
|
||||
COALESCE(
|
||||
i.median_income_pps,
|
||||
ROUND(wb.gni_ppp * (de_cal.de_eurostat_pps / NULLIF(de_cal.de_gni_ppp, 0)), 0)
|
||||
) AS median_income_pps,
|
||||
COALESCE(i.income_year, wb.wb_year) AS income_year,
|
||||
-- Raw energy and labour data (for reference / future staffed-scenario use)
|
||||
e.electricity_eur_kwh,
|
||||
g.gas_eur_gj,
|
||||
la.labour_cost_eur_hour,
|
||||
-- PLI indices per category (EU27=100)
|
||||
p.construction AS pli_construction,
|
||||
-- PLI construction: Eurostat preferred, World Bank price level ratio scaled to PLI as fallback
|
||||
COALESCE(
|
||||
p.construction,
|
||||
ROUND(wb.price_level_ratio / NULLIF(de_cal.de_price_level_ratio, 0) * de_cal.de_pli_construction, 1)
|
||||
) AS pli_construction,
|
||||
p.housing AS pli_housing,
|
||||
p.services AS pli_services,
|
||||
p.misc AS pli_misc,
|
||||
@@ -278,8 +310,10 @@ LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
|
||||
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
|
||||
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
|
||||
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
|
||||
LEFT JOIN latest_wb wb ON ac.country_code = wb.country_code
|
||||
CROSS JOIN de_pli de_p
|
||||
CROSS JOIN de_elec de_e
|
||||
CROSS JOIN de_gas de_g
|
||||
CROSS JOIN de_calibration de_cal
|
||||
-- Enforce grain
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1
|
||||
|
||||
Reference in New Issue
Block a user