feat(transform): individualise article costs with per-country Eurostat data
Add real per-country cost data to ~30 calculator fields so pSEO articles show country-specific CAPEX/OPEX instead of hardcoded DE defaults. Extractor: - eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev, 5×prc_ppp_ind variants); add optional `dataset_code` field so multiple dict entries can share one Eurostat API endpoint Staging (4 new models): - stg_electricity_prices — EUR/kWh by country, semi-annual - stg_gas_prices — EUR/GJ by country, semi-annual - stg_labour_costs — EUR/hour by country, annual (future staffed scenario) - stg_price_levels — PLI indices (EU27=100) for 5 categories, annual Foundation: - dim_countries (new) — conformed country dimension; eliminates ~50-line CASE blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost override columns from PLI ratios and energy price ratios vs DE baseline; NULL for DE so calculator falls through to DEFAULTS unchanged - dim_cities — replace country_name/slug CASE blocks + country_income CTE with JOIN dim_countries - dim_locations — same refactor as dim_cities Serving: - pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …) - planner_defaults — JOIN dim_countries; same 29 cost columns flow through to /api/market-data endpoint Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,285 @@
|
||||
-- Conformed country dimension — single authoritative source for all country metadata.
|
||||
--
|
||||
-- Consolidates data previously duplicated across dim_cities and dim_locations:
|
||||
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
|
||||
-- - median_income_pps (was: country_income CTE in both models)
|
||||
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
|
||||
-- - cost override columns for the financial calculator
|
||||
--
|
||||
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
|
||||
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
|
||||
-- Kind: FULL — small table (~40 rows), full refresh daily.
|
||||
--
|
||||
-- Cost override columns:
|
||||
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
|
||||
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
|
||||
-- For countries missing Eurostat data, NULLs propagate naturally.
|
||||
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
|
||||
--
|
||||
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
|
||||
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
|
||||
-- !! Search "DE baseline" in this file to find all affected lines.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_countries,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain country_code
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Latest income per country
|
||||
latest_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest electricity price per country (use most recent semi-annual period)
|
||||
latest_electricity AS (
|
||||
SELECT country_code, electricity_eur_kwh, ref_period
|
||||
FROM staging.stg_electricity_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest gas price per country
|
||||
latest_gas AS (
|
||||
SELECT country_code, gas_eur_gj, ref_period
|
||||
FROM staging.stg_gas_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest labour cost per country
|
||||
latest_labour AS (
|
||||
SELECT country_code, labour_cost_eur_hour, ref_year
|
||||
FROM staging.stg_labour_costs
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest PLI per (country, category)
|
||||
latest_pli AS (
|
||||
SELECT country_code, category, pli, ref_year
|
||||
FROM staging.stg_price_levels
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Pivot PLI categories into columns per country
|
||||
pli_pivoted AS (
|
||||
SELECT
|
||||
country_code,
|
||||
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
|
||||
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
|
||||
MAX(pli) FILTER (WHERE category = 'services') AS services,
|
||||
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
|
||||
MAX(pli) FILTER (WHERE category = 'government') AS government
|
||||
FROM latest_pli
|
||||
GROUP BY country_code
|
||||
),
|
||||
-- DE baseline rows for ratio computation
|
||||
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
|
||||
de_pli AS (
|
||||
SELECT construction, housing, services, misc, government
|
||||
FROM pli_pivoted WHERE country_code = 'DE'
|
||||
),
|
||||
de_elec AS (
|
||||
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
|
||||
),
|
||||
de_gas AS (
|
||||
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
|
||||
),
|
||||
-- All distinct country codes from any source
|
||||
all_countries AS (
|
||||
SELECT country_code FROM latest_income
|
||||
UNION
|
||||
SELECT country_code FROM latest_electricity
|
||||
UNION
|
||||
SELECT country_code FROM latest_gas
|
||||
UNION
|
||||
SELECT country_code FROM latest_labour
|
||||
UNION
|
||||
SELECT country_code FROM pli_pivoted
|
||||
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
|
||||
UNION ALL
|
||||
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
|
||||
'US','AR','MX','AE','AU','IE']) AS country_code
|
||||
)
|
||||
SELECT
|
||||
ac.country_code,
|
||||
-- Country name and slug (single definition, replacing duplicated CASE blocks)
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END AS country_name_en,
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Income data
|
||||
i.median_income_pps,
|
||||
i.income_year,
|
||||
-- Raw energy and labour data (for reference / future staffed-scenario use)
|
||||
e.electricity_eur_kwh,
|
||||
g.gas_eur_gj,
|
||||
la.labour_cost_eur_hour,
|
||||
-- PLI indices per category (EU27=100)
|
||||
p.construction AS pli_construction,
|
||||
p.housing AS pli_housing,
|
||||
p.services AS pli_services,
|
||||
p.misc AS pli_misc,
|
||||
p.government AS pli_government,
|
||||
-- ── Calculator cost override columns ────────────────────────────────────
|
||||
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
|
||||
-- Formulas: country_value = DE_default × (country_price / DE_price)
|
||||
-- or DE_default × (country_PLI / DE_PLI)
|
||||
--
|
||||
-- OPEX overrides — energy (direct price ratio)
|
||||
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
|
||||
END AS electricity,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
|
||||
END AS heating,
|
||||
-- OPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
|
||||
END AS rent_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
|
||||
END AS water,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
|
||||
END AS outdoor_rent,
|
||||
-- OPEX overrides — PLI-scaled (misc category)
|
||||
-- DE baseline: insurance=300
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
|
||||
END AS insurance,
|
||||
-- OPEX overrides — PLI-scaled (services category)
|
||||
-- DE baseline: cleaning=300, maintenance=300, marketing=350
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS cleaning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS maintenance,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
|
||||
END AS marketing,
|
||||
-- OPEX overrides — PLI-scaled (government category)
|
||||
-- DE baseline: propertyTax=250, permitsCompliance=12000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
|
||||
END AS property_tax,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
|
||||
END AS permits_compliance,
|
||||
-- CAPEX overrides — PLI-scaled (construction category)
|
||||
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
|
||||
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
|
||||
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
|
||||
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
|
||||
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hall_cost_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
|
||||
END AS foundation_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS electrical,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS sanitary,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS parking,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fitout,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS planning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fire_protection,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS floor_prep,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS lighting_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_foundation,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_site_work,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_lighting,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_fencing,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS working_capital,
|
||||
-- CAPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: landPriceSqm=60
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
|
||||
END AS land_price_sqm
|
||||
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
|
||||
LEFT JOIN latest_income i ON ac.country_code = i.country_code
|
||||
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
|
||||
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
|
||||
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
|
||||
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
|
||||
CROSS JOIN de_pli de_p
|
||||
CROSS JOIN de_elec de_e
|
||||
CROSS JOIN de_gas de_g
|
||||
-- Enforce grain
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1
|
||||
Reference in New Issue
Block a user