feat(transform): individualise article costs with per-country Eurostat data

Add real per-country cost data to ~30 calculator fields so pSEO articles
show country-specific CAPEX/OPEX instead of hardcoded DE defaults.

Extractor:
- eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev,
  5×prc_ppp_ind variants); add optional `dataset_code` field so multiple
  dict entries can share one Eurostat API endpoint

Staging (4 new models):
- stg_electricity_prices — EUR/kWh by country, semi-annual
- stg_gas_prices         — EUR/GJ by country, semi-annual
- stg_labour_costs       — EUR/hour by country, annual (future staffed scenario)
- stg_price_levels       — PLI indices (EU27=100) for 5 categories, annual

Foundation:
- dim_countries (new) — conformed country dimension; eliminates ~50-line CASE
  blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost
  override columns from PLI ratios and energy price ratios vs DE baseline;
  NULL for DE so calculator falls through to DEFAULTS unchanged
- dim_cities — replace country_name/slug CASE blocks + country_income CTE
  with JOIN dim_countries
- dim_locations — same refactor as dim_cities

Serving:
- pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns
  auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …)
- planner_defaults — JOIN dim_countries; same 29 cost columns flow through
  to /api/market-data endpoint

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-03-04 10:09:48 +01:00
parent ce466e3f7f
commit 2e68cfbe4f
12 changed files with 681 additions and 128 deletions

View File

@@ -54,6 +54,7 @@ Grain must match reality — use `QUALIFY ROW_NUMBER()` to enforce it.
| Dimension | Grain | Used by |
|-----------|-------|---------|
| `foundation.dim_countries` | `country_code` | `dim_cities`, `dim_locations`, `pseo_city_costs_de`, `planner_defaults` — single source for country names, income, PLI/cost overrides |
| `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) |
| `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models |
| `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations |

View File

@@ -5,7 +5,7 @@
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
-- Integrates four sources:
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
-- stg_income → country-level median income (Eurostat)
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
-- stg_population → Eurostat city-level population (EU, joined via city code)
-- stg_population_usa → US Census ACS place population
@@ -42,12 +42,6 @@ venue_cities AS (
WHERE city IS NOT NULL AND LENGTH(city) > 0
GROUP BY country_code, city
),
-- Latest country income per country
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Eurostat EU population: join city labels (code→name) with population values.
-- QUALIFY keeps only the most recent year per (country, city name).
eurostat_pop AS (
@@ -109,56 +103,9 @@ SELECT
vc.country_code,
vc.city_slug,
vc.city_name,
-- Human-readable country name for pSEO templates and internal linking
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE vc.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE vc.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Human-readable country name and slug — from dim_countries (single source of truth)
c.country_name_en,
c.country_slug,
vc.centroid_lat AS lat,
vc.centroid_lon AS lon,
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.
@@ -180,13 +127,13 @@ SELECT
0
)::INTEGER AS population_year,
vc.padel_venue_count,
ci.median_income_pps,
ci.income_year,
c.median_income_pps,
c.income_year,
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
-- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.)
COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id
FROM venue_cities vc
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code
-- Eurostat EU population (via city code→name lookup)
LEFT JOIN eurostat_pop ep
ON vc.country_code = ep.country_code

View File

@@ -0,0 +1,285 @@
-- Conformed country dimension — single authoritative source for all country metadata.
--
-- Consolidates data previously duplicated across dim_cities and dim_locations:
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
-- - median_income_pps (was: country_income CTE in both models)
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
-- - cost override columns for the financial calculator
--
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
-- Kind: FULL — small table (~40 rows), full refresh daily.
--
-- Cost override columns:
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
-- For countries missing Eurostat data, NULLs propagate naturally.
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
--
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
-- !! Search "DE baseline" in this file to find all affected lines.
MODEL (
name foundation.dim_countries,
kind FULL,
cron '@daily',
grain country_code
);
WITH
-- Latest income per country
latest_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest electricity price per country (use most recent semi-annual period)
latest_electricity AS (
SELECT country_code, electricity_eur_kwh, ref_period
FROM staging.stg_electricity_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest gas price per country
latest_gas AS (
SELECT country_code, gas_eur_gj, ref_period
FROM staging.stg_gas_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest labour cost per country
latest_labour AS (
SELECT country_code, labour_cost_eur_hour, ref_year
FROM staging.stg_labour_costs
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest PLI per (country, category)
latest_pli AS (
SELECT country_code, category, pli, ref_year
FROM staging.stg_price_levels
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
),
-- Pivot PLI categories into columns per country
pli_pivoted AS (
SELECT
country_code,
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
MAX(pli) FILTER (WHERE category = 'services') AS services,
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
MAX(pli) FILTER (WHERE category = 'government') AS government
FROM latest_pli
GROUP BY country_code
),
-- DE baseline rows for ratio computation
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
de_pli AS (
SELECT construction, housing, services, misc, government
FROM pli_pivoted WHERE country_code = 'DE'
),
de_elec AS (
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
),
de_gas AS (
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
),
-- All distinct country codes from any source
all_countries AS (
SELECT country_code FROM latest_income
UNION
SELECT country_code FROM latest_electricity
UNION
SELECT country_code FROM latest_gas
UNION
SELECT country_code FROM latest_labour
UNION
SELECT country_code FROM pli_pivoted
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
UNION ALL
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
'US','AR','MX','AE','AU','IE']) AS country_code
)
SELECT
ac.country_code,
-- Country name and slug (single definition, replacing duplicated CASE blocks)
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END AS country_name_en,
LOWER(REGEXP_REPLACE(
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Income data
i.median_income_pps,
i.income_year,
-- Raw energy and labour data (for reference / future staffed-scenario use)
e.electricity_eur_kwh,
g.gas_eur_gj,
la.labour_cost_eur_hour,
-- PLI indices per category (EU27=100)
p.construction AS pli_construction,
p.housing AS pli_housing,
p.services AS pli_services,
p.misc AS pli_misc,
p.government AS pli_government,
-- ── Calculator cost override columns ────────────────────────────────────
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
-- Formulas: country_value = DE_default × (country_price / DE_price)
-- or DE_default × (country_PLI / DE_PLI)
--
-- OPEX overrides — energy (direct price ratio)
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
END AS electricity,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
END AS heating,
-- OPEX overrides — PLI-scaled (housing category)
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
END AS rent_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
END AS water,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
END AS outdoor_rent,
-- OPEX overrides — PLI-scaled (misc category)
-- DE baseline: insurance=300
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
END AS insurance,
-- OPEX overrides — PLI-scaled (services category)
-- DE baseline: cleaning=300, maintenance=300, marketing=350
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS cleaning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS maintenance,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
END AS marketing,
-- OPEX overrides — PLI-scaled (government category)
-- DE baseline: propertyTax=250, permitsCompliance=12000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
END AS property_tax,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
END AS permits_compliance,
-- CAPEX overrides — PLI-scaled (construction category)
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
END AS hall_cost_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
END AS foundation_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS hvac,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
END AS electrical,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS sanitary,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
END AS parking,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
END AS fitout,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS planning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS fire_protection,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
END AS floor_prep,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
END AS hvac_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
END AS lighting_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
END AS outdoor_foundation,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_site_work,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_lighting,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_fencing,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
END AS working_capital,
-- CAPEX overrides — PLI-scaled (housing category)
-- DE baseline: landPriceSqm=60
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
END AS land_price_sqm
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
LEFT JOIN latest_income i ON ac.country_code = i.country_code
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
CROSS JOIN de_pli de_p
CROSS JOIN de_elec de_e
CROSS JOIN de_gas de_g
-- Enforce grain
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1

View File

@@ -6,9 +6,9 @@
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
--
-- Enriched with:
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
-- stg_income_usa → US state-level income (PPS-normalised)
-- stg_income → country-level income (fallback for all countries)
-- stg_padel_courts → padel venue count + nearest court distance (km)
-- stg_tennis_courts → tennis court count within 25km radius
--
@@ -16,7 +16,7 @@
-- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains)
-- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset)
-- 3. US state income (ratio-normalised to PPS scale; see us_income CTE)
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
-- 4. Country-level income (global fallback from dim_countries / ilc_di03)
--
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
@@ -49,12 +49,6 @@ locations AS (
FROM staging.stg_population_geonames
WHERE lat IS NOT NULL AND lon IS NOT NULL
),
-- Country income (ilc_di03) — global fallback for all countries
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
-- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates
@@ -214,56 +208,9 @@ tennis_nearby AS (
SELECT
l.geoname_id,
l.country_code,
-- Human-readable country name (consistent with dim_cities)
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END AS country_name_en,
-- URL-safe country slug
LOWER(REGEXP_REPLACE(
CASE l.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE l.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Human-readable country name and slug — from dim_countries (single source of truth)
c.country_name_en,
c.country_slug,
l.location_name,
l.location_slug,
l.lat,
@@ -276,12 +223,12 @@ SELECT
COALESCE(
ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback)
us.median_income_pps, -- US: state-level PPS-equivalent
ci.median_income_pps -- Global: country-level from ilc_di03
c.median_income_pps -- Global: country-level from dim_countries / ilc_di03
) AS median_income_pps,
COALESCE(
ri.regional_income_year,
us.income_year,
ci.income_year
c.income_year
) AS income_year,
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
-- Venues per 100K residents (NULL if population = 0)
@@ -293,8 +240,8 @@ SELECT
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
CURRENT_DATE AS refreshed_date
FROM locations l
LEFT JOIN country_income ci ON l.country_code = ci.country_code
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
LEFT JOIN us_income us ON l.country_code = 'US'
AND l.admin1_code = us.admin1_code
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id

View File

@@ -7,6 +7,10 @@
-- 2. Country-level: median across cities in same country
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
--
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices) are
-- included so the planner API pre-fills country-adjusted CAPEX/OPEX for all cities.
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
--
-- Units are explicit in column names. Monetary values in local currency.
MODEL (
@@ -125,6 +129,37 @@ SELECT
ELSE 0.2
END AS data_confidence,
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
-- Cost override columns (Eurostat PLI + energy prices via dim_countries).
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline).
dc.electricity,
dc.heating,
dc.rent_sqm,
dc.insurance,
dc.cleaning,
dc.maintenance,
dc.marketing,
dc.water,
dc.property_tax,
dc.outdoor_rent,
dc.hall_cost_sqm,
dc.foundation_sqm,
dc.land_price_sqm,
dc.hvac,
dc.electrical,
dc.sanitary,
dc.parking,
dc.fitout,
dc.planning,
dc.fire_protection,
dc.floor_prep,
dc.hvac_upgrade,
dc.lighting_upgrade,
dc.outdoor_foundation,
dc.outdoor_site_work,
dc.outdoor_lighting,
dc.outdoor_fencing,
dc.working_capital,
dc.permits_compliance,
CURRENT_DATE AS refreshed_date
FROM city_profiles cp
LEFT JOIN city_benchmarks cb
@@ -134,3 +169,5 @@ LEFT JOIN country_benchmarks ctb
ON cp.country_code = ctb.country_code
LEFT JOIN hardcoded_fallbacks hf
ON cp.country_code = hf.country_code
LEFT JOIN foundation.dim_countries dc
ON cp.country_code = dc.country_code

View File

@@ -4,6 +4,10 @@
--
-- Calculator override columns use camelCase to match the DEFAULTS keys in
-- planner/calculator.py, so they are auto-applied as calc pre-fills.
--
-- Cost override columns come from foundation.dim_countries (Eurostat PLI and energy
-- price indices). NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping
-- filters None). DE always produces NULL overrides — preserves exact DEFAULTS behaviour.
MODEL (
name serving.pseo_city_costs_de,
@@ -44,6 +48,39 @@ SELECT
FLOOR(p.courts_typical) AS "dblCourts",
-- 'country' drives currency formatting in the calculator
c.country_code AS "country",
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices).
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
-- OPEX overrides
cc.electricity AS "electricity",
cc.heating AS "heating",
cc.rent_sqm AS "rentSqm",
cc.insurance AS "insurance",
cc.cleaning AS "cleaning",
cc.maintenance AS "maintenance",
cc.marketing AS "marketing",
cc.water AS "water",
cc.property_tax AS "propertyTax",
cc.outdoor_rent AS "outdoorRent",
-- CAPEX overrides
cc.hall_cost_sqm AS "hallCostSqm",
cc.foundation_sqm AS "foundationSqm",
cc.land_price_sqm AS "landPriceSqm",
cc.hvac AS "hvac",
cc.electrical AS "electrical",
cc.sanitary AS "sanitary",
cc.parking AS "parking",
cc.fitout AS "fitout",
cc.planning AS "planning",
cc.fire_protection AS "fireProtection",
cc.floor_prep AS "floorPrep",
cc.hvac_upgrade AS "hvacUpgrade",
cc.lighting_upgrade AS "lightingUpgrade",
cc.outdoor_foundation AS "outdoorFoundation",
cc.outdoor_site_work AS "outdoorSiteWork",
cc.outdoor_lighting AS "outdoorLighting",
cc.outdoor_fencing AS "outdoorFencing",
cc.working_capital AS "workingCapital",
cc.permits_compliance AS "permitsCompliance",
CURRENT_DATE AS refreshed_date
FROM serving.city_market_profile c
LEFT JOIN serving.planner_defaults p
@@ -52,6 +89,8 @@ LEFT JOIN serving.planner_defaults p
LEFT JOIN serving.location_opportunity_profile lop
ON c.country_code = lop.country_code
AND c.geoname_id = lop.geoname_id
LEFT JOIN foundation.dim_countries cc
ON c.country_code = cc.country_code
-- Only cities with actual padel presence and at least some rate data
WHERE c.padel_venue_count > 0
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)

View File

@@ -0,0 +1,42 @@
-- Electricity prices for non-household consumers (Eurostat nrg_pc_205).
-- EUR/kWh excluding taxes, band MWH500-1999 (medium-sized commercial consumer).
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
--
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_205.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 0.1523}, ...]}
MODEL (
name staging.stg_electricity_prices,
kind FULL,
cron '@daily',
grain (country_code, ref_period)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/nrg_pc_205.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRIM(r.ref_year) AS ref_period,
TRY_CAST(r.value AS DOUBLE) AS electricity_eur_kwh
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_period,
electricity_eur_kwh
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND electricity_eur_kwh > 0

View File

@@ -0,0 +1,42 @@
-- Gas prices for non-household consumers (Eurostat nrg_pc_203).
-- EUR/GJ excluding taxes, band GJ1000-9999 (medium-sized commercial consumer).
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
--
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_203.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 14.23}, ...]}
MODEL (
name staging.stg_gas_prices,
kind FULL,
cron '@daily',
grain (country_code, ref_period)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/nrg_pc_203.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRIM(r.ref_year) AS ref_period,
TRY_CAST(r.value AS DOUBLE) AS gas_eur_gj
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_period,
gas_eur_gj
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND gas_eur_gj > 0

View File

@@ -0,0 +1,46 @@
-- Labour cost levels EUR/hour (Eurostat lc_lci_lev).
-- NACE R2 sector N (administrative and support service activities).
-- D1_D2_A_HW structure: wages + non-wage costs, actual hours worked.
-- Annual frequency.
--
-- Stored for future "staffed scenario" calculator variant.
-- Not wired into default calculator overrides (staff=0 is a business assumption).
--
-- Source: data/landing/eurostat/{year}/{month}/lc_lci_lev.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 28.4}, ...]}
MODEL (
name staging.stg_labour_costs,
kind FULL,
cron '@daily',
grain (country_code, ref_year)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/lc_lci_lev.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
TRY_CAST(r.value AS DOUBLE) AS labour_cost_eur_hour
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_year,
labour_cost_eur_hour
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND labour_cost_eur_hour > 0

View File

@@ -0,0 +1,96 @@
-- Price level indices relative to EU27=100 (Eurostat prc_ppp_ind).
-- Five categories, each from a separate landing file (different ppp_cat filters).
-- Annual frequency.
--
-- Categories and what they scale in the calculator:
-- construction — CAPEX: hallCostSqm, foundationSqm, hvac, electrical, sanitary, etc.
-- housing — rentSqm, landPriceSqm, water, outdoorRent
-- services — cleaning, maintenance, marketing
-- misc — insurance
-- government — permitsCompliance, propertyTax
--
-- Sources:
-- data/landing/eurostat/*/*/prc_ppp_ind_construction.json.gz (ppp_cat: A050202)
-- data/landing/eurostat/*/*/prc_ppp_ind_housing.json.gz (ppp_cat: A0104)
-- data/landing/eurostat/*/*/prc_ppp_ind_services.json.gz (ppp_cat: P0201)
-- data/landing/eurostat/*/*/prc_ppp_ind_misc.json.gz (ppp_cat: A0112)
-- data/landing/eurostat/*/*/prc_ppp_ind_government.json.gz (ppp_cat: P0202)
--
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 107.3}, ...]}
MODEL (
name staging.stg_price_levels,
kind FULL,
cron '@daily',
grain (country_code, category, ref_year)
);
WITH construction_raw AS (
SELECT unnest(rows) AS r, 'construction' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_construction.json.gz',
auto_detect = true
)
),
housing_raw AS (
SELECT unnest(rows) AS r, 'housing' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_housing.json.gz',
auto_detect = true
)
),
services_raw AS (
SELECT unnest(rows) AS r, 'services' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_services.json.gz',
auto_detect = true
)
),
misc_raw AS (
SELECT unnest(rows) AS r, 'misc' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_misc.json.gz',
auto_detect = true
)
),
government_raw AS (
SELECT unnest(rows) AS r, 'government' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_government.json.gz',
auto_detect = true
)
),
all_raw AS (
SELECT r, category FROM construction_raw
UNION ALL
SELECT r, category FROM housing_raw
UNION ALL
SELECT r, category FROM services_raw
UNION ALL
SELECT r, category FROM misc_raw
UNION ALL
SELECT r, category FROM government_raw
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
TRY_CAST(r.value AS DOUBLE) AS pli,
category
FROM all_raw
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
category,
ref_year,
pli
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND pli > 0