Files
padelnomics/transform/sqlmesh_padelnomics/models/foundation/dim_countries.sql
Deeman 2e68cfbe4f feat(transform): individualise article costs with per-country Eurostat data
Add real per-country cost data to ~30 calculator fields so pSEO articles
show country-specific CAPEX/OPEX instead of hardcoded DE defaults.

Extractor:
- eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev,
  5×prc_ppp_ind variants); add optional `dataset_code` field so multiple
  dict entries can share one Eurostat API endpoint

Staging (4 new models):
- stg_electricity_prices — EUR/kWh by country, semi-annual
- stg_gas_prices         — EUR/GJ by country, semi-annual
- stg_labour_costs       — EUR/hour by country, annual (future staffed scenario)
- stg_price_levels       — PLI indices (EU27=100) for 5 categories, annual

Foundation:
- dim_countries (new) — conformed country dimension; eliminates ~50-line CASE
  blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost
  override columns from PLI ratios and energy price ratios vs DE baseline;
  NULL for DE so calculator falls through to DEFAULTS unchanged
- dim_cities — replace country_name/slug CASE blocks + country_income CTE
  with JOIN dim_countries
- dim_locations — same refactor as dim_cities

Serving:
- pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns
  auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …)
- planner_defaults — JOIN dim_countries; same 29 cost columns flow through
  to /api/market-data endpoint

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-04 10:09:48 +01:00

286 lines
14 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Conformed country dimension — single authoritative source for all country metadata.
--
-- Consolidates data previously duplicated across dim_cities and dim_locations:
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
-- - median_income_pps (was: country_income CTE in both models)
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
-- - cost override columns for the financial calculator
--
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
-- Kind: FULL — small table (~40 rows), full refresh daily.
--
-- Cost override columns:
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
-- For countries missing Eurostat data, NULLs propagate naturally.
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
--
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
-- !! Search "DE baseline" in this file to find all affected lines.
MODEL (
name foundation.dim_countries,
kind FULL,
cron '@daily',
grain country_code
);
WITH
-- Latest income per country
latest_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest electricity price per country (use most recent semi-annual period)
latest_electricity AS (
SELECT country_code, electricity_eur_kwh, ref_period
FROM staging.stg_electricity_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest gas price per country
latest_gas AS (
SELECT country_code, gas_eur_gj, ref_period
FROM staging.stg_gas_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest labour cost per country
latest_labour AS (
SELECT country_code, labour_cost_eur_hour, ref_year
FROM staging.stg_labour_costs
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest PLI per (country, category)
latest_pli AS (
SELECT country_code, category, pli, ref_year
FROM staging.stg_price_levels
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
),
-- Pivot PLI categories into columns per country
pli_pivoted AS (
SELECT
country_code,
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
MAX(pli) FILTER (WHERE category = 'services') AS services,
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
MAX(pli) FILTER (WHERE category = 'government') AS government
FROM latest_pli
GROUP BY country_code
),
-- DE baseline rows for ratio computation
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
de_pli AS (
SELECT construction, housing, services, misc, government
FROM pli_pivoted WHERE country_code = 'DE'
),
de_elec AS (
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
),
de_gas AS (
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
),
-- All distinct country codes from any source
all_countries AS (
SELECT country_code FROM latest_income
UNION
SELECT country_code FROM latest_electricity
UNION
SELECT country_code FROM latest_gas
UNION
SELECT country_code FROM latest_labour
UNION
SELECT country_code FROM pli_pivoted
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
UNION ALL
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
'US','AR','MX','AE','AU','IE']) AS country_code
)
SELECT
ac.country_code,
-- Country name and slug (single definition, replacing duplicated CASE blocks)
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END AS country_name_en,
LOWER(REGEXP_REPLACE(
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Income data
i.median_income_pps,
i.income_year,
-- Raw energy and labour data (for reference / future staffed-scenario use)
e.electricity_eur_kwh,
g.gas_eur_gj,
la.labour_cost_eur_hour,
-- PLI indices per category (EU27=100)
p.construction AS pli_construction,
p.housing AS pli_housing,
p.services AS pli_services,
p.misc AS pli_misc,
p.government AS pli_government,
-- ── Calculator cost override columns ────────────────────────────────────
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
-- Formulas: country_value = DE_default × (country_price / DE_price)
-- or DE_default × (country_PLI / DE_PLI)
--
-- OPEX overrides — energy (direct price ratio)
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
END AS electricity,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
END AS heating,
-- OPEX overrides — PLI-scaled (housing category)
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
END AS rent_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
END AS water,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
END AS outdoor_rent,
-- OPEX overrides — PLI-scaled (misc category)
-- DE baseline: insurance=300
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
END AS insurance,
-- OPEX overrides — PLI-scaled (services category)
-- DE baseline: cleaning=300, maintenance=300, marketing=350
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS cleaning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS maintenance,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
END AS marketing,
-- OPEX overrides — PLI-scaled (government category)
-- DE baseline: propertyTax=250, permitsCompliance=12000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
END AS property_tax,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
END AS permits_compliance,
-- CAPEX overrides — PLI-scaled (construction category)
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
END AS hall_cost_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
END AS foundation_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS hvac,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
END AS electrical,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS sanitary,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
END AS parking,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
END AS fitout,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS planning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS fire_protection,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
END AS floor_prep,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
END AS hvac_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
END AS lighting_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
END AS outdoor_foundation,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_site_work,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_lighting,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_fencing,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
END AS working_capital,
-- CAPEX overrides — PLI-scaled (housing category)
-- DE baseline: landPriceSqm=60
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
END AS land_price_sqm
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
LEFT JOIN latest_income i ON ac.country_code = i.country_code
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
CROSS JOIN de_pli de_p
CROSS JOIN de_elec de_e
CROSS JOIN de_gas de_g
-- Enforce grain
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1