feat(transform): individualise article costs with per-country Eurostat data
Add real per-country cost data to ~30 calculator fields so pSEO articles show country-specific CAPEX/OPEX instead of hardcoded DE defaults. Extractor: - eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev, 5×prc_ppp_ind variants); add optional `dataset_code` field so multiple dict entries can share one Eurostat API endpoint Staging (4 new models): - stg_electricity_prices — EUR/kWh by country, semi-annual - stg_gas_prices — EUR/GJ by country, semi-annual - stg_labour_costs — EUR/hour by country, annual (future staffed scenario) - stg_price_levels — PLI indices (EU27=100) for 5 categories, annual Foundation: - dim_countries (new) — conformed country dimension; eliminates ~50-line CASE blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost override columns from PLI ratios and energy price ratios vs DE baseline; NULL for DE so calculator falls through to DEFAULTS unchanged - dim_cities — replace country_name/slug CASE blocks + country_income CTE with JOIN dim_countries - dim_locations — same refactor as dim_cities Serving: - pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …) - planner_defaults — JOIN dim_countries; same 29 cost columns flow through to /api/market-data endpoint Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,9 +6,9 @@
|
||||
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
|
||||
--
|
||||
-- Enriched with:
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
|
||||
-- stg_income_usa → US state-level income (PPS-normalised)
|
||||
-- stg_income → country-level income (fallback for all countries)
|
||||
-- stg_padel_courts → padel venue count + nearest court distance (km)
|
||||
-- stg_tennis_courts → tennis court count within 25km radius
|
||||
--
|
||||
@@ -16,7 +16,7 @@
|
||||
-- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains)
|
||||
-- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset)
|
||||
-- 3. US state income (ratio-normalised to PPS scale; see us_income CTE)
|
||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||
-- 4. Country-level income (global fallback from dim_countries / ilc_di03)
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||
@@ -49,12 +49,6 @@ locations AS (
|
||||
FROM staging.stg_population_geonames
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Country income (ilc_di03) — global fallback for all countries
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
|
||||
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
|
||||
-- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates
|
||||
@@ -214,56 +208,9 @@ tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
-- Human-readable country name (consistent with dim_cities)
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
@@ -276,12 +223,12 @@ SELECT
|
||||
COALESCE(
|
||||
ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback)
|
||||
us.median_income_pps, -- US: state-level PPS-equivalent
|
||||
ci.median_income_pps -- Global: country-level from ilc_di03
|
||||
c.median_income_pps -- Global: country-level from dim_countries / ilc_di03
|
||||
) AS median_income_pps,
|
||||
COALESCE(
|
||||
ri.regional_income_year,
|
||||
us.income_year,
|
||||
ci.income_year
|
||||
c.income_year
|
||||
) AS income_year,
|
||||
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
|
||||
-- Venues per 100K residents (NULL if population = 0)
|
||||
@@ -293,8 +240,8 @@ SELECT
|
||||
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM locations l
|
||||
LEFT JOIN country_income ci ON l.country_code = ci.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN us_income us ON l.country_code = 'US'
|
||||
AND l.admin1_code = us.admin1_code
|
||||
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id
|
||||
|
||||
Reference in New Issue
Block a user