feat(transform): individualise article costs with per-country Eurostat data
Add real per-country cost data to ~30 calculator fields so pSEO articles show country-specific CAPEX/OPEX instead of hardcoded DE defaults. Extractor: - eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev, 5×prc_ppp_ind variants); add optional `dataset_code` field so multiple dict entries can share one Eurostat API endpoint Staging (4 new models): - stg_electricity_prices — EUR/kWh by country, semi-annual - stg_gas_prices — EUR/GJ by country, semi-annual - stg_labour_costs — EUR/hour by country, annual (future staffed scenario) - stg_price_levels — PLI indices (EU27=100) for 5 categories, annual Foundation: - dim_countries (new) — conformed country dimension; eliminates ~50-line CASE blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost override columns from PLI ratios and energy price ratios vs DE baseline; NULL for DE so calculator falls through to DEFAULTS unchanged - dim_cities — replace country_name/slug CASE blocks + country_income CTE with JOIN dim_countries - dim_locations — same refactor as dim_cities Serving: - pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …) - planner_defaults — JOIN dim_countries; same 29 cost columns flow through to /api/market-data endpoint Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
||||
-- Integrates four sources:
|
||||
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
||||
-- stg_income → country-level median income (Eurostat)
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
|
||||
-- stg_population → Eurostat city-level population (EU, joined via city code)
|
||||
-- stg_population_usa → US Census ACS place population
|
||||
@@ -42,12 +42,6 @@ venue_cities AS (
|
||||
WHERE city IS NOT NULL AND LENGTH(city) > 0
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Latest country income per country
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Eurostat EU population: join city labels (code→name) with population values.
|
||||
-- QUALIFY keeps only the most recent year per (country, city name).
|
||||
eurostat_pop AS (
|
||||
@@ -109,56 +103,9 @@ SELECT
|
||||
vc.country_code,
|
||||
vc.city_slug,
|
||||
vc.city_name,
|
||||
-- Human-readable country name for pSEO templates and internal linking
|
||||
CASE vc.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE vc.country_code
|
||||
END AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE vc.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE vc.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
vc.centroid_lat AS lat,
|
||||
vc.centroid_lon AS lon,
|
||||
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.
|
||||
@@ -180,13 +127,13 @@ SELECT
|
||||
0
|
||||
)::INTEGER AS population_year,
|
||||
vc.padel_venue_count,
|
||||
ci.median_income_pps,
|
||||
ci.income_year,
|
||||
c.median_income_pps,
|
||||
c.income_year,
|
||||
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
|
||||
-- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.)
|
||||
COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id
|
||||
FROM venue_cities vc
|
||||
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
||||
LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code
|
||||
-- Eurostat EU population (via city code→name lookup)
|
||||
LEFT JOIN eurostat_pop ep
|
||||
ON vc.country_code = ep.country_code
|
||||
|
||||
@@ -0,0 +1,285 @@
|
||||
-- Conformed country dimension — single authoritative source for all country metadata.
|
||||
--
|
||||
-- Consolidates data previously duplicated across dim_cities and dim_locations:
|
||||
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
|
||||
-- - median_income_pps (was: country_income CTE in both models)
|
||||
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
|
||||
-- - cost override columns for the financial calculator
|
||||
--
|
||||
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
|
||||
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
|
||||
-- Kind: FULL — small table (~40 rows), full refresh daily.
|
||||
--
|
||||
-- Cost override columns:
|
||||
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
|
||||
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
|
||||
-- For countries missing Eurostat data, NULLs propagate naturally.
|
||||
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
|
||||
--
|
||||
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
|
||||
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
|
||||
-- !! Search "DE baseline" in this file to find all affected lines.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_countries,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain country_code
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Latest income per country
|
||||
latest_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest electricity price per country (use most recent semi-annual period)
|
||||
latest_electricity AS (
|
||||
SELECT country_code, electricity_eur_kwh, ref_period
|
||||
FROM staging.stg_electricity_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest gas price per country
|
||||
latest_gas AS (
|
||||
SELECT country_code, gas_eur_gj, ref_period
|
||||
FROM staging.stg_gas_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest labour cost per country
|
||||
latest_labour AS (
|
||||
SELECT country_code, labour_cost_eur_hour, ref_year
|
||||
FROM staging.stg_labour_costs
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest PLI per (country, category)
|
||||
latest_pli AS (
|
||||
SELECT country_code, category, pli, ref_year
|
||||
FROM staging.stg_price_levels
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Pivot PLI categories into columns per country
|
||||
pli_pivoted AS (
|
||||
SELECT
|
||||
country_code,
|
||||
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
|
||||
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
|
||||
MAX(pli) FILTER (WHERE category = 'services') AS services,
|
||||
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
|
||||
MAX(pli) FILTER (WHERE category = 'government') AS government
|
||||
FROM latest_pli
|
||||
GROUP BY country_code
|
||||
),
|
||||
-- DE baseline rows for ratio computation
|
||||
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
|
||||
de_pli AS (
|
||||
SELECT construction, housing, services, misc, government
|
||||
FROM pli_pivoted WHERE country_code = 'DE'
|
||||
),
|
||||
de_elec AS (
|
||||
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
|
||||
),
|
||||
de_gas AS (
|
||||
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
|
||||
),
|
||||
-- All distinct country codes from any source
|
||||
all_countries AS (
|
||||
SELECT country_code FROM latest_income
|
||||
UNION
|
||||
SELECT country_code FROM latest_electricity
|
||||
UNION
|
||||
SELECT country_code FROM latest_gas
|
||||
UNION
|
||||
SELECT country_code FROM latest_labour
|
||||
UNION
|
||||
SELECT country_code FROM pli_pivoted
|
||||
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
|
||||
UNION ALL
|
||||
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
|
||||
'US','AR','MX','AE','AU','IE']) AS country_code
|
||||
)
|
||||
SELECT
|
||||
ac.country_code,
|
||||
-- Country name and slug (single definition, replacing duplicated CASE blocks)
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END AS country_name_en,
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Income data
|
||||
i.median_income_pps,
|
||||
i.income_year,
|
||||
-- Raw energy and labour data (for reference / future staffed-scenario use)
|
||||
e.electricity_eur_kwh,
|
||||
g.gas_eur_gj,
|
||||
la.labour_cost_eur_hour,
|
||||
-- PLI indices per category (EU27=100)
|
||||
p.construction AS pli_construction,
|
||||
p.housing AS pli_housing,
|
||||
p.services AS pli_services,
|
||||
p.misc AS pli_misc,
|
||||
p.government AS pli_government,
|
||||
-- ── Calculator cost override columns ────────────────────────────────────
|
||||
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
|
||||
-- Formulas: country_value = DE_default × (country_price / DE_price)
|
||||
-- or DE_default × (country_PLI / DE_PLI)
|
||||
--
|
||||
-- OPEX overrides — energy (direct price ratio)
|
||||
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
|
||||
END AS electricity,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
|
||||
END AS heating,
|
||||
-- OPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
|
||||
END AS rent_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
|
||||
END AS water,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
|
||||
END AS outdoor_rent,
|
||||
-- OPEX overrides — PLI-scaled (misc category)
|
||||
-- DE baseline: insurance=300
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
|
||||
END AS insurance,
|
||||
-- OPEX overrides — PLI-scaled (services category)
|
||||
-- DE baseline: cleaning=300, maintenance=300, marketing=350
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS cleaning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS maintenance,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
|
||||
END AS marketing,
|
||||
-- OPEX overrides — PLI-scaled (government category)
|
||||
-- DE baseline: propertyTax=250, permitsCompliance=12000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
|
||||
END AS property_tax,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
|
||||
END AS permits_compliance,
|
||||
-- CAPEX overrides — PLI-scaled (construction category)
|
||||
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
|
||||
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
|
||||
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
|
||||
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
|
||||
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hall_cost_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
|
||||
END AS foundation_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS electrical,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS sanitary,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS parking,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fitout,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS planning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fire_protection,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS floor_prep,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS lighting_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_foundation,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_site_work,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_lighting,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_fencing,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS working_capital,
|
||||
-- CAPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: landPriceSqm=60
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
|
||||
END AS land_price_sqm
|
||||
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
|
||||
LEFT JOIN latest_income i ON ac.country_code = i.country_code
|
||||
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
|
||||
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
|
||||
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
|
||||
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
|
||||
CROSS JOIN de_pli de_p
|
||||
CROSS JOIN de_elec de_e
|
||||
CROSS JOIN de_gas de_g
|
||||
-- Enforce grain
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1
|
||||
@@ -6,9 +6,9 @@
|
||||
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
|
||||
--
|
||||
-- Enriched with:
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
|
||||
-- stg_income_usa → US state-level income (PPS-normalised)
|
||||
-- stg_income → country-level income (fallback for all countries)
|
||||
-- stg_padel_courts → padel venue count + nearest court distance (km)
|
||||
-- stg_tennis_courts → tennis court count within 25km radius
|
||||
--
|
||||
@@ -16,7 +16,7 @@
|
||||
-- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains)
|
||||
-- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset)
|
||||
-- 3. US state income (ratio-normalised to PPS scale; see us_income CTE)
|
||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||
-- 4. Country-level income (global fallback from dim_countries / ilc_di03)
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||
@@ -49,12 +49,6 @@ locations AS (
|
||||
FROM staging.stg_population_geonames
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Country income (ilc_di03) — global fallback for all countries
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
|
||||
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
|
||||
-- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates
|
||||
@@ -214,56 +208,9 @@ tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
-- Human-readable country name (consistent with dim_cities)
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE l.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE l.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
@@ -276,12 +223,12 @@ SELECT
|
||||
COALESCE(
|
||||
ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback)
|
||||
us.median_income_pps, -- US: state-level PPS-equivalent
|
||||
ci.median_income_pps -- Global: country-level from ilc_di03
|
||||
c.median_income_pps -- Global: country-level from dim_countries / ilc_di03
|
||||
) AS median_income_pps,
|
||||
COALESCE(
|
||||
ri.regional_income_year,
|
||||
us.income_year,
|
||||
ci.income_year
|
||||
c.income_year
|
||||
) AS income_year,
|
||||
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
|
||||
-- Venues per 100K residents (NULL if population = 0)
|
||||
@@ -293,8 +240,8 @@ SELECT
|
||||
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM locations l
|
||||
LEFT JOIN country_income ci ON l.country_code = ci.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN us_income us ON l.country_code = 'US'
|
||||
AND l.admin1_code = us.admin1_code
|
||||
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
-- 2. Country-level: median across cities in same country
|
||||
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
|
||||
--
|
||||
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices) are
|
||||
-- included so the planner API pre-fills country-adjusted CAPEX/OPEX for all cities.
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
|
||||
--
|
||||
-- Units are explicit in column names. Monetary values in local currency.
|
||||
|
||||
MODEL (
|
||||
@@ -125,6 +129,37 @@ SELECT
|
||||
ELSE 0.2
|
||||
END AS data_confidence,
|
||||
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
|
||||
-- Cost override columns (Eurostat PLI + energy prices via dim_countries).
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline).
|
||||
dc.electricity,
|
||||
dc.heating,
|
||||
dc.rent_sqm,
|
||||
dc.insurance,
|
||||
dc.cleaning,
|
||||
dc.maintenance,
|
||||
dc.marketing,
|
||||
dc.water,
|
||||
dc.property_tax,
|
||||
dc.outdoor_rent,
|
||||
dc.hall_cost_sqm,
|
||||
dc.foundation_sqm,
|
||||
dc.land_price_sqm,
|
||||
dc.hvac,
|
||||
dc.electrical,
|
||||
dc.sanitary,
|
||||
dc.parking,
|
||||
dc.fitout,
|
||||
dc.planning,
|
||||
dc.fire_protection,
|
||||
dc.floor_prep,
|
||||
dc.hvac_upgrade,
|
||||
dc.lighting_upgrade,
|
||||
dc.outdoor_foundation,
|
||||
dc.outdoor_site_work,
|
||||
dc.outdoor_lighting,
|
||||
dc.outdoor_fencing,
|
||||
dc.working_capital,
|
||||
dc.permits_compliance,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM city_profiles cp
|
||||
LEFT JOIN city_benchmarks cb
|
||||
@@ -134,3 +169,5 @@ LEFT JOIN country_benchmarks ctb
|
||||
ON cp.country_code = ctb.country_code
|
||||
LEFT JOIN hardcoded_fallbacks hf
|
||||
ON cp.country_code = hf.country_code
|
||||
LEFT JOIN foundation.dim_countries dc
|
||||
ON cp.country_code = dc.country_code
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
--
|
||||
-- Calculator override columns use camelCase to match the DEFAULTS keys in
|
||||
-- planner/calculator.py, so they are auto-applied as calc pre-fills.
|
||||
--
|
||||
-- Cost override columns come from foundation.dim_countries (Eurostat PLI and energy
|
||||
-- price indices). NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping
|
||||
-- filters None). DE always produces NULL overrides — preserves exact DEFAULTS behaviour.
|
||||
|
||||
MODEL (
|
||||
name serving.pseo_city_costs_de,
|
||||
@@ -44,6 +48,39 @@ SELECT
|
||||
FLOOR(p.courts_typical) AS "dblCourts",
|
||||
-- 'country' drives currency formatting in the calculator
|
||||
c.country_code AS "country",
|
||||
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices).
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
|
||||
-- OPEX overrides
|
||||
cc.electricity AS "electricity",
|
||||
cc.heating AS "heating",
|
||||
cc.rent_sqm AS "rentSqm",
|
||||
cc.insurance AS "insurance",
|
||||
cc.cleaning AS "cleaning",
|
||||
cc.maintenance AS "maintenance",
|
||||
cc.marketing AS "marketing",
|
||||
cc.water AS "water",
|
||||
cc.property_tax AS "propertyTax",
|
||||
cc.outdoor_rent AS "outdoorRent",
|
||||
-- CAPEX overrides
|
||||
cc.hall_cost_sqm AS "hallCostSqm",
|
||||
cc.foundation_sqm AS "foundationSqm",
|
||||
cc.land_price_sqm AS "landPriceSqm",
|
||||
cc.hvac AS "hvac",
|
||||
cc.electrical AS "electrical",
|
||||
cc.sanitary AS "sanitary",
|
||||
cc.parking AS "parking",
|
||||
cc.fitout AS "fitout",
|
||||
cc.planning AS "planning",
|
||||
cc.fire_protection AS "fireProtection",
|
||||
cc.floor_prep AS "floorPrep",
|
||||
cc.hvac_upgrade AS "hvacUpgrade",
|
||||
cc.lighting_upgrade AS "lightingUpgrade",
|
||||
cc.outdoor_foundation AS "outdoorFoundation",
|
||||
cc.outdoor_site_work AS "outdoorSiteWork",
|
||||
cc.outdoor_lighting AS "outdoorLighting",
|
||||
cc.outdoor_fencing AS "outdoorFencing",
|
||||
cc.working_capital AS "workingCapital",
|
||||
cc.permits_compliance AS "permitsCompliance",
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM serving.city_market_profile c
|
||||
LEFT JOIN serving.planner_defaults p
|
||||
@@ -52,6 +89,8 @@ LEFT JOIN serving.planner_defaults p
|
||||
LEFT JOIN serving.location_opportunity_profile lop
|
||||
ON c.country_code = lop.country_code
|
||||
AND c.geoname_id = lop.geoname_id
|
||||
LEFT JOIN foundation.dim_countries cc
|
||||
ON c.country_code = cc.country_code
|
||||
-- Only cities with actual padel presence and at least some rate data
|
||||
WHERE c.padel_venue_count > 0
|
||||
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Electricity prices for non-household consumers (Eurostat nrg_pc_205).
|
||||
-- EUR/kWh excluding taxes, band MWH500-1999 (medium-sized commercial consumer).
|
||||
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_205.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 0.1523}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_electricity_prices,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_period)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/nrg_pc_205.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRIM(r.ref_year) AS ref_period,
|
||||
TRY_CAST(r.value AS DOUBLE) AS electricity_eur_kwh
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_period,
|
||||
electricity_eur_kwh
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND electricity_eur_kwh > 0
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Gas prices for non-household consumers (Eurostat nrg_pc_203).
|
||||
-- EUR/GJ excluding taxes, band GJ1000-9999 (medium-sized commercial consumer).
|
||||
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_203.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 14.23}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_gas_prices,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_period)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/nrg_pc_203.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRIM(r.ref_year) AS ref_period,
|
||||
TRY_CAST(r.value AS DOUBLE) AS gas_eur_gj
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_period,
|
||||
gas_eur_gj
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND gas_eur_gj > 0
|
||||
@@ -0,0 +1,46 @@
|
||||
-- Labour cost levels EUR/hour (Eurostat lc_lci_lev).
|
||||
-- NACE R2 sector N (administrative and support service activities).
|
||||
-- D1_D2_A_HW structure: wages + non-wage costs, actual hours worked.
|
||||
-- Annual frequency.
|
||||
--
|
||||
-- Stored for future "staffed scenario" calculator variant.
|
||||
-- Not wired into default calculator overrides (staff=0 is a business assumption).
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/lc_lci_lev.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 28.4}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_labour_costs,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_year)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/lc_lci_lev.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
|
||||
TRY_CAST(r.value AS DOUBLE) AS labour_cost_eur_hour
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_year,
|
||||
labour_cost_eur_hour
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND labour_cost_eur_hour > 0
|
||||
@@ -0,0 +1,96 @@
|
||||
-- Price level indices relative to EU27=100 (Eurostat prc_ppp_ind).
|
||||
-- Five categories, each from a separate landing file (different ppp_cat filters).
|
||||
-- Annual frequency.
|
||||
--
|
||||
-- Categories and what they scale in the calculator:
|
||||
-- construction — CAPEX: hallCostSqm, foundationSqm, hvac, electrical, sanitary, etc.
|
||||
-- housing — rentSqm, landPriceSqm, water, outdoorRent
|
||||
-- services — cleaning, maintenance, marketing
|
||||
-- misc — insurance
|
||||
-- government — permitsCompliance, propertyTax
|
||||
--
|
||||
-- Sources:
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_construction.json.gz (ppp_cat: A050202)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_housing.json.gz (ppp_cat: A0104)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_services.json.gz (ppp_cat: P0201)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_misc.json.gz (ppp_cat: A0112)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_government.json.gz (ppp_cat: P0202)
|
||||
--
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 107.3}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_price_levels,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, category, ref_year)
|
||||
);
|
||||
|
||||
WITH construction_raw AS (
|
||||
SELECT unnest(rows) AS r, 'construction' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_construction.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
housing_raw AS (
|
||||
SELECT unnest(rows) AS r, 'housing' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_housing.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
services_raw AS (
|
||||
SELECT unnest(rows) AS r, 'services' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_services.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
misc_raw AS (
|
||||
SELECT unnest(rows) AS r, 'misc' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_misc.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
government_raw AS (
|
||||
SELECT unnest(rows) AS r, 'government' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_government.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
all_raw AS (
|
||||
SELECT r, category FROM construction_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM housing_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM services_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM misc_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM government_raw
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
|
||||
TRY_CAST(r.value AS DOUBLE) AS pli,
|
||||
category
|
||||
FROM all_raw
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
category,
|
||||
ref_year,
|
||||
pli
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND pli > 0
|
||||
Reference in New Issue
Block a user