merge(worktree): individualise article costs with per-country Eurostat data + tiered proxy tenant work

# Conflicts:
#	CHANGELOG.md
#	transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql
#	transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql
This commit is contained in:
Deeman
2026-03-04 12:44:56 +01:00
12 changed files with 679 additions and 36 deletions

View File

@@ -7,6 +7,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [Unreleased] ## [Unreleased]
### Added ### Added
- **Individualised article financial calculations with real per-country cost data** — ~30 CAPEX/OPEX calculator fields now scale to each country's actual cost level via Eurostat data, eliminating the identical DE-hardcoded numbers shown for every city globally.
- **New Eurostat datasets extracted** (8 new landing files): electricity prices (`nrg_pc_205`), gas prices (`nrg_pc_203`), labour costs (`lc_lci_lev`), and 5 price level index categories from `prc_ppp_ind` (construction, housing, services, misc, government).
- `extract/padelnomics_extract/src/padelnomics_extract/eurostat.py`: added 8 dataset entries; added `dataset_code` field support so multiple dict entries can share one Eurostat API endpoint (needed for 5 prc_ppp_ind variants).
- **4 new staging models**: `stg_electricity_prices`, `stg_gas_prices`, `stg_labour_costs`, `stg_price_levels` — all read from landing zone with ISO code normalisation (EL→GR, UK→GB).
- **New `foundation.dim_countries`** — conformed country dimension (grain: `country_code`). Consolidates country names/slugs and income data previously duplicated in `dim_cities` and `dim_locations` as ~50-line CASE blocks. Computes ~29 calculator cost override columns from Eurostat PLI indices and energy prices relative to DE baseline.
- **Refactored `dim_cities`** — removed ~50-line CASE blocks and `country_income` CTE; JOIN `dim_countries` for `country_name_en`, `country_slug`, `median_income_pps`, `income_year`.
- **Refactored `dim_locations`** — same refactor as `dim_cities`; income cascade still cascades EU NUTS-2 → US state → `dim_countries` country-level.
- **Updated `serving.pseo_city_costs_de`** — JOIN `dim_countries`; 29 new camelCase override columns (`electricity`, `heating`, `rentSqm`, `hallCostSqm`, …, `permitsCompliance`) auto-applied by calculator.
- **Updated `serving.planner_defaults`** — JOIN `dim_countries`; same 29 cost columns flow through to the planner API `/api/market-data` endpoint.
- **Bulk actions for articles and leads** — checkbox selection + floating action bar on admin articles and leads pages (same pattern as suppliers). Articles: publish, unpublish, toggle noindex, rebuild, delete. Leads: set status, set heat. Re-renders results via HTMX after each action. - **Bulk actions for articles and leads** — checkbox selection + floating action bar on admin articles and leads pages (same pattern as suppliers). Articles: publish, unpublish, toggle noindex, rebuild, delete. Leads: set status, set heat. Re-renders results via HTMX after each action.
- **Stripe payment provider** — second payment provider alongside Paddle, switchable via `PAYMENT_PROVIDER=stripe` env var. Existing Paddle subscribers keep working regardless of toggle — both webhook endpoints stay active. - **Stripe payment provider** — second payment provider alongside Paddle, switchable via `PAYMENT_PROVIDER=stripe` env var. Existing Paddle subscribers keep working regardless of toggle — both webhook endpoints stay active.
- `billing/stripe.py`: full Stripe implementation (Checkout Sessions, Billing Portal, subscription cancel, webhook verification + parsing) - `billing/stripe.py`: full Stripe implementation (Checkout Sessions, Billing Portal, subscription cancel, webhook verification + parsing)

View File

@@ -26,6 +26,10 @@ EUROSTAT_BASE_URL = "https://ec.europa.eu/eurostat/api/dissemination/statistics/
# Dataset configs: filters fix dimension values, geo_dim/time_dim are iterated. # Dataset configs: filters fix dimension values, geo_dim/time_dim are iterated.
# All other dimensions must either be in filters or have size=1. # All other dimensions must either be in filters or have size=1.
#
# Optional `dataset_code` field: when present, used for the API URL instead of the dict key.
# This allows multiple entries to share the same Eurostat dataset with different filters
# (e.g. five prc_ppp_ind entries with different ppp_cat values).
DATASETS: dict[str, dict] = { DATASETS: dict[str, dict] = {
"urb_cpop1": { "urb_cpop1": {
"filters": {"indic_ur": "DE1001V"}, # Population on 1 January, total "filters": {"indic_ur": "DE1001V"}, # Population on 1 January, total
@@ -51,6 +55,59 @@ DATASETS: dict[str, dict] = {
"geo_dim": "geo", "geo_dim": "geo",
"time_dim": "time", "time_dim": "time",
}, },
# ── Direct-value datasets (actual EUR figures) ───────────────────────────
"nrg_pc_205": {
# Electricity prices for non-household consumers, EUR/kWh, excl. taxes
"filters": {"freq": "S", "nrg_cons": "MWH500-1999", "currency": "EUR", "tax": "I_TAX"},
"geo_dim": "geo",
"time_dim": "time",
},
"nrg_pc_203": {
# Gas prices for non-household consumers, EUR/GJ, excl. taxes
"filters": {"freq": "S", "nrg_cons": "GJ1000-9999", "currency": "EUR", "tax": "I_TAX"},
"geo_dim": "geo",
"time_dim": "time",
},
"lc_lci_lev": {
# Labour cost levels EUR/hour — NACE N (administrative/support services)
# Stored in dim_countries for future staffed-scenario calculations.
"filters": {"lcstruct": "D1_D2_A_HW", "nace_r2": "N", "currency": "EUR"},
"geo_dim": "geo",
"time_dim": "time",
},
# ── Price level indices (relative scaling, EU27=100) ─────────────────────
# Five entries share the prc_ppp_ind dataset with different ppp_cat filters.
# dataset_code points to the real API endpoint; the dict key is the landing filename.
"prc_ppp_ind_construction": {
"dataset_code": "prc_ppp_ind",
"filters": {"ppp_cat": "A050202", "na_item": "PLI_EU27_2020"},
"geo_dim": "geo",
"time_dim": "time",
},
"prc_ppp_ind_housing": {
"dataset_code": "prc_ppp_ind",
"filters": {"ppp_cat": "A0104", "na_item": "PLI_EU27_2020"},
"geo_dim": "geo",
"time_dim": "time",
},
"prc_ppp_ind_services": {
"dataset_code": "prc_ppp_ind",
"filters": {"ppp_cat": "P0201", "na_item": "PLI_EU27_2020"},
"geo_dim": "geo",
"time_dim": "time",
},
"prc_ppp_ind_misc": {
"dataset_code": "prc_ppp_ind",
"filters": {"ppp_cat": "A0112", "na_item": "PLI_EU27_2020"},
"geo_dim": "geo",
"time_dim": "time",
},
"prc_ppp_ind_government": {
"dataset_code": "prc_ppp_ind",
"filters": {"ppp_cat": "P0202", "na_item": "PLI_EU27_2020"},
"geo_dim": "geo",
"time_dim": "time",
},
} }
@@ -196,22 +253,25 @@ def extract(
files_skipped = 0 files_skipped = 0
bytes_written_total = 0 bytes_written_total = 0
for dataset_code, config in DATASETS.items(): for dataset_key, config in DATASETS.items():
url = f"{EUROSTAT_BASE_URL}/{dataset_code}?format=JSON&lang=EN" # Use dataset_code (if set) for the API URL; fall back to the dict key.
# This lets multiple entries share one Eurostat dataset with different filters.
api_code = config.get("dataset_code", dataset_key)
url = f"{EUROSTAT_BASE_URL}/{api_code}?format=JSON&lang=EN"
for key, val in config.get("filters", {}).items(): for key, val in config.get("filters", {}).items():
url += f"&{key}={val}" url += f"&{key}={val}"
dest_dir = landing_path(landing_dir, "eurostat", year, month) dest_dir = landing_path(landing_dir, "eurostat", year, month)
dest = dest_dir / f"{dataset_code}.json.gz" dest = dest_dir / f"{dataset_key}.json.gz"
logger.info("GET %s", dataset_code) logger.info("GET %s", dataset_key)
bytes_written = _fetch_with_etag(url, dest, session, config) bytes_written = _fetch_with_etag(url, dest, session, config)
if bytes_written > 0: if bytes_written > 0:
logger.info("%s updated — %s bytes compressed", dataset_code, f"{bytes_written:,}") logger.info("%s updated — %s bytes compressed", dataset_key, f"{bytes_written:,}")
files_written += 1 files_written += 1
bytes_written_total += bytes_written bytes_written_total += bytes_written
else: else:
logger.info("%s not modified (304)", dataset_code) logger.info("%s not modified (304)", dataset_key)
files_skipped += 1 files_skipped += 1
return { return {

View File

@@ -54,6 +54,7 @@ Grain must match reality — use `QUALIFY ROW_NUMBER()` to enforce it.
| Dimension | Grain | Used by | | Dimension | Grain | Used by |
|-----------|-------|---------| |-----------|-------|---------|
| `foundation.dim_countries` | `country_code` | `dim_cities`, `dim_locations`, `pseo_city_costs_de`, `planner_defaults` — single source for country names, income, PLI/cost overrides |
| `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) | | `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) |
| `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models | | `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models |
| `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations | | `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations |

View File

@@ -5,7 +5,7 @@
-- Conformed dimension: used by city_market_profile and all pSEO serving models. -- Conformed dimension: used by city_market_profile and all pSEO serving models.
-- Integrates four sources: -- Integrates four sources:
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM) -- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
-- stg_income → country-level median income (Eurostat) -- foundation.dim_countries → country_name_en, country_slug, median_income_pps
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities) -- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
-- stg_population → Eurostat city-level population (EU, joined via city code) -- stg_population → Eurostat city-level population (EU, joined via city code)
-- stg_population_usa → US Census ACS place population -- stg_population_usa → US Census ACS place population
@@ -41,12 +41,6 @@ venue_cities AS (
WHERE city IS NOT NULL AND LENGTH(city) > 0 WHERE city IS NOT NULL AND LENGTH(city) > 0
GROUP BY country_code, city GROUP BY country_code, city
), ),
-- Latest country income per country
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Eurostat EU population: join city labels (code→name) with population values. -- Eurostat EU population: join city labels (code→name) with population values.
-- QUALIFY keeps only the most recent year per (country, city name). -- QUALIFY keeps only the most recent year per (country, city name).
eurostat_pop AS ( eurostat_pop AS (
@@ -108,10 +102,9 @@ SELECT
vc.country_code, vc.country_code,
vc.city_slug, vc.city_slug,
vc.city_name, vc.city_name,
-- Human-readable country name for pSEO templates and internal linking -- Human-readable country name and slug — from dim_countries (single source of truth)
@country_name(vc.country_code) AS country_name_en, c.country_name_en,
-- URL-safe country slug c.country_slug,
@country_slug(vc.country_code) AS country_slug,
vc.centroid_lat AS lat, vc.centroid_lat AS lat,
vc.centroid_lon AS lon, vc.centroid_lon AS lon,
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0. -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.
@@ -133,13 +126,13 @@ SELECT
0 0
)::INTEGER AS population_year, )::INTEGER AS population_year,
vc.padel_venue_count, vc.padel_venue_count,
ci.median_income_pps, c.median_income_pps,
ci.income_year, c.income_year,
-- GeoNames ID: FK to dim_locations / location_opportunity_profile. -- GeoNames ID: FK to dim_locations / location_opportunity_profile.
-- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.) -- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.)
COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id
FROM venue_cities vc FROM venue_cities vc
LEFT JOIN country_income ci ON vc.country_code = ci.country_code LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code
-- Eurostat EU population (via city code→name lookup) -- Eurostat EU population (via city code→name lookup)
LEFT JOIN eurostat_pop ep LEFT JOIN eurostat_pop ep
ON vc.country_code = ep.country_code ON vc.country_code = ep.country_code

View File

@@ -0,0 +1,285 @@
-- Conformed country dimension — single authoritative source for all country metadata.
--
-- Consolidates data previously duplicated across dim_cities and dim_locations:
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
-- - median_income_pps (was: country_income CTE in both models)
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
-- - cost override columns for the financial calculator
--
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
-- Kind: FULL — small table (~40 rows), full refresh daily.
--
-- Cost override columns:
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
-- For countries missing Eurostat data, NULLs propagate naturally.
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
--
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
-- !! Search "DE baseline" in this file to find all affected lines.
MODEL (
name foundation.dim_countries,
kind FULL,
cron '@daily',
grain country_code
);
WITH
-- Latest income per country
latest_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest electricity price per country (use most recent semi-annual period)
latest_electricity AS (
SELECT country_code, electricity_eur_kwh, ref_period
FROM staging.stg_electricity_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest gas price per country
latest_gas AS (
SELECT country_code, gas_eur_gj, ref_period
FROM staging.stg_gas_prices
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
),
-- Latest labour cost per country
latest_labour AS (
SELECT country_code, labour_cost_eur_hour, ref_year
FROM staging.stg_labour_costs
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- Latest PLI per (country, category)
latest_pli AS (
SELECT country_code, category, pli, ref_year
FROM staging.stg_price_levels
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
),
-- Pivot PLI categories into columns per country
pli_pivoted AS (
SELECT
country_code,
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
MAX(pli) FILTER (WHERE category = 'services') AS services,
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
MAX(pli) FILTER (WHERE category = 'government') AS government
FROM latest_pli
GROUP BY country_code
),
-- DE baseline rows for ratio computation
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
de_pli AS (
SELECT construction, housing, services, misc, government
FROM pli_pivoted WHERE country_code = 'DE'
),
de_elec AS (
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
),
de_gas AS (
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
),
-- All distinct country codes from any source
all_countries AS (
SELECT country_code FROM latest_income
UNION
SELECT country_code FROM latest_electricity
UNION
SELECT country_code FROM latest_gas
UNION
SELECT country_code FROM latest_labour
UNION
SELECT country_code FROM pli_pivoted
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
UNION ALL
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
'US','AR','MX','AE','AU','IE']) AS country_code
)
SELECT
ac.country_code,
-- Country name and slug (single definition, replacing duplicated CASE blocks)
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END AS country_name_en,
LOWER(REGEXP_REPLACE(
CASE ac.country_code
WHEN 'DE' THEN 'Germany'
WHEN 'ES' THEN 'Spain'
WHEN 'GB' THEN 'United Kingdom'
WHEN 'FR' THEN 'France'
WHEN 'IT' THEN 'Italy'
WHEN 'PT' THEN 'Portugal'
WHEN 'AT' THEN 'Austria'
WHEN 'CH' THEN 'Switzerland'
WHEN 'NL' THEN 'Netherlands'
WHEN 'BE' THEN 'Belgium'
WHEN 'SE' THEN 'Sweden'
WHEN 'NO' THEN 'Norway'
WHEN 'DK' THEN 'Denmark'
WHEN 'FI' THEN 'Finland'
WHEN 'US' THEN 'United States'
WHEN 'AR' THEN 'Argentina'
WHEN 'MX' THEN 'Mexico'
WHEN 'AE' THEN 'UAE'
WHEN 'AU' THEN 'Australia'
WHEN 'IE' THEN 'Ireland'
ELSE ac.country_code
END, '[^a-zA-Z0-9]+', '-'
)) AS country_slug,
-- Income data
i.median_income_pps,
i.income_year,
-- Raw energy and labour data (for reference / future staffed-scenario use)
e.electricity_eur_kwh,
g.gas_eur_gj,
la.labour_cost_eur_hour,
-- PLI indices per category (EU27=100)
p.construction AS pli_construction,
p.housing AS pli_housing,
p.services AS pli_services,
p.misc AS pli_misc,
p.government AS pli_government,
-- ── Calculator cost override columns ────────────────────────────────────
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
-- Formulas: country_value = DE_default × (country_price / DE_price)
-- or DE_default × (country_PLI / DE_PLI)
--
-- OPEX overrides — energy (direct price ratio)
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
END AS electricity,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
END AS heating,
-- OPEX overrides — PLI-scaled (housing category)
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
END AS rent_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
END AS water,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
END AS outdoor_rent,
-- OPEX overrides — PLI-scaled (misc category)
-- DE baseline: insurance=300
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
END AS insurance,
-- OPEX overrides — PLI-scaled (services category)
-- DE baseline: cleaning=300, maintenance=300, marketing=350
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS cleaning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
END AS maintenance,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
END AS marketing,
-- OPEX overrides — PLI-scaled (government category)
-- DE baseline: propertyTax=250, permitsCompliance=12000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
END AS property_tax,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
END AS permits_compliance,
-- CAPEX overrides — PLI-scaled (construction category)
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
END AS hall_cost_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
END AS foundation_sqm,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS hvac,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
END AS electrical,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS sanitary,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
END AS parking,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
END AS fitout,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
END AS planning,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
END AS fire_protection,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
END AS floor_prep,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
END AS hvac_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
END AS lighting_upgrade,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
END AS outdoor_foundation,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_site_work,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_lighting,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
END AS outdoor_fencing,
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
END AS working_capital,
-- CAPEX overrides — PLI-scaled (housing category)
-- DE baseline: landPriceSqm=60
CASE WHEN ac.country_code = 'DE' THEN NULL
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
END AS land_price_sqm
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
LEFT JOIN latest_income i ON ac.country_code = i.country_code
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
CROSS JOIN de_pli de_p
CROSS JOIN de_elec de_e
CROSS JOIN de_gas de_g
-- Enforce grain
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1

View File

@@ -6,9 +6,9 @@
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully. -- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
-- --
-- Enriched with: -- Enriched with:
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join) -- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
-- stg_income_usa → US state-level income (PPS-normalised) -- stg_income_usa → US state-level income (PPS-normalised)
-- stg_income → country-level income (fallback for all countries)
-- stg_padel_courts → padel venue count + nearest court distance (km) -- stg_padel_courts → padel venue count + nearest court distance (km)
-- stg_tennis_courts → tennis court count within 25km radius -- stg_tennis_courts → tennis court count within 25km radius
-- --
@@ -16,7 +16,7 @@
-- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains) -- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains)
-- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset) -- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset)
-- 3. US state income (ratio-normalised to PPS scale; see us_income CTE) -- 3. US state income (ratio-normalised to PPS scale; see us_income CTE)
-- 4. Country-level income (global fallback from stg_income / ilc_di03) -- 4. Country-level income (global fallback from dim_countries / ilc_di03)
-- --
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension). -- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin -- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
@@ -49,12 +49,6 @@ locations AS (
FROM staging.stg_population_geonames FROM staging.stg_population_geonames
WHERE lat IS NOT NULL AND lon IS NOT NULL WHERE lat IS NOT NULL AND lon IS NOT NULL
), ),
-- Country income (ilc_di03) — global fallback for all countries
country_income AS (
SELECT country_code, median_income_pps, ref_year AS income_year
FROM staging.stg_income
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
),
-- ── EU NUTS-2 income via spatial join ────────────────────────────────────── -- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons. -- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
-- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates -- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates
@@ -214,10 +208,9 @@ tennis_nearby AS (
SELECT SELECT
l.geoname_id, l.geoname_id,
l.country_code, l.country_code,
-- Human-readable country name (consistent with dim_cities) -- Human-readable country name and slug — from dim_countries (single source of truth)
@country_name(l.country_code) AS country_name_en, c.country_name_en,
-- URL-safe country slug c.country_slug,
@country_slug(l.country_code) AS country_slug,
l.location_name, l.location_name,
l.location_slug, l.location_slug,
l.lat, l.lat,
@@ -230,12 +223,12 @@ SELECT
COALESCE( COALESCE(
ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback) ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback)
us.median_income_pps, -- US: state-level PPS-equivalent us.median_income_pps, -- US: state-level PPS-equivalent
ci.median_income_pps -- Global: country-level from ilc_di03 c.median_income_pps -- Global: country-level from dim_countries / ilc_di03
) AS median_income_pps, ) AS median_income_pps,
COALESCE( COALESCE(
ri.regional_income_year, ri.regional_income_year,
us.income_year, us.income_year,
ci.income_year c.income_year
) AS income_year, ) AS income_year,
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count, COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
-- Venues per 100K residents (NULL if population = 0) -- Venues per 100K residents (NULL if population = 0)
@@ -247,8 +240,8 @@ SELECT
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km, COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM locations l FROM locations l
LEFT JOIN country_income ci ON l.country_code = ci.country_code LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
LEFT JOIN us_income us ON l.country_code = 'US' LEFT JOIN us_income us ON l.country_code = 'US'
AND l.admin1_code = us.admin1_code AND l.admin1_code = us.admin1_code
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id

View File

@@ -7,6 +7,10 @@
-- 2. Country-level: median across cities in same country -- 2. Country-level: median across cities in same country
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data) -- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
-- --
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices) are
-- included so the planner API pre-fills country-adjusted CAPEX/OPEX for all cities.
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
--
-- Units are explicit in column names. Monetary values in local currency. -- Units are explicit in column names. Monetary values in local currency.
MODEL ( MODEL (
@@ -125,6 +129,37 @@ SELECT
ELSE 0.2 ELSE 0.2
END AS data_confidence, END AS data_confidence,
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency, COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
-- Cost override columns (Eurostat PLI + energy prices via dim_countries).
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline).
dc.electricity,
dc.heating,
dc.rent_sqm,
dc.insurance,
dc.cleaning,
dc.maintenance,
dc.marketing,
dc.water,
dc.property_tax,
dc.outdoor_rent,
dc.hall_cost_sqm,
dc.foundation_sqm,
dc.land_price_sqm,
dc.hvac,
dc.electrical,
dc.sanitary,
dc.parking,
dc.fitout,
dc.planning,
dc.fire_protection,
dc.floor_prep,
dc.hvac_upgrade,
dc.lighting_upgrade,
dc.outdoor_foundation,
dc.outdoor_site_work,
dc.outdoor_lighting,
dc.outdoor_fencing,
dc.working_capital,
dc.permits_compliance,
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM city_profiles cp FROM city_profiles cp
LEFT JOIN city_benchmarks cb LEFT JOIN city_benchmarks cb
@@ -134,3 +169,5 @@ LEFT JOIN country_benchmarks ctb
ON cp.country_code = ctb.country_code ON cp.country_code = ctb.country_code
LEFT JOIN hardcoded_fallbacks hf LEFT JOIN hardcoded_fallbacks hf
ON cp.country_code = hf.country_code ON cp.country_code = hf.country_code
LEFT JOIN foundation.dim_countries dc
ON cp.country_code = dc.country_code

View File

@@ -4,6 +4,10 @@
-- --
-- Calculator override columns use camelCase to match the DEFAULTS keys in -- Calculator override columns use camelCase to match the DEFAULTS keys in
-- planner/calculator.py, so they are auto-applied as calc pre-fills. -- planner/calculator.py, so they are auto-applied as calc pre-fills.
--
-- Cost override columns come from foundation.dim_countries (Eurostat PLI and energy
-- price indices). NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping
-- filters None). DE always produces NULL overrides — preserves exact DEFAULTS behaviour.
MODEL ( MODEL (
name serving.pseo_city_costs_de, name serving.pseo_city_costs_de,
@@ -44,6 +48,39 @@ SELECT
FLOOR(p.courts_typical) AS "dblCourts", FLOOR(p.courts_typical) AS "dblCourts",
-- 'country' drives currency formatting in the calculator -- 'country' drives currency formatting in the calculator
c.country_code AS "country", c.country_code AS "country",
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices).
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
-- OPEX overrides
cc.electricity AS "electricity",
cc.heating AS "heating",
cc.rent_sqm AS "rentSqm",
cc.insurance AS "insurance",
cc.cleaning AS "cleaning",
cc.maintenance AS "maintenance",
cc.marketing AS "marketing",
cc.water AS "water",
cc.property_tax AS "propertyTax",
cc.outdoor_rent AS "outdoorRent",
-- CAPEX overrides
cc.hall_cost_sqm AS "hallCostSqm",
cc.foundation_sqm AS "foundationSqm",
cc.land_price_sqm AS "landPriceSqm",
cc.hvac AS "hvac",
cc.electrical AS "electrical",
cc.sanitary AS "sanitary",
cc.parking AS "parking",
cc.fitout AS "fitout",
cc.planning AS "planning",
cc.fire_protection AS "fireProtection",
cc.floor_prep AS "floorPrep",
cc.hvac_upgrade AS "hvacUpgrade",
cc.lighting_upgrade AS "lightingUpgrade",
cc.outdoor_foundation AS "outdoorFoundation",
cc.outdoor_site_work AS "outdoorSiteWork",
cc.outdoor_lighting AS "outdoorLighting",
cc.outdoor_fencing AS "outdoorFencing",
cc.working_capital AS "workingCapital",
cc.permits_compliance AS "permitsCompliance",
CURRENT_DATE AS refreshed_date CURRENT_DATE AS refreshed_date
FROM serving.city_market_profile c FROM serving.city_market_profile c
LEFT JOIN serving.planner_defaults p LEFT JOIN serving.planner_defaults p
@@ -52,6 +89,8 @@ LEFT JOIN serving.planner_defaults p
LEFT JOIN serving.location_opportunity_profile lop LEFT JOIN serving.location_opportunity_profile lop
ON c.country_code = lop.country_code ON c.country_code = lop.country_code
AND c.geoname_id = lop.geoname_id AND c.geoname_id = lop.geoname_id
LEFT JOIN foundation.dim_countries cc
ON c.country_code = cc.country_code
-- Only cities with actual padel presence and at least some rate data -- Only cities with actual padel presence and at least some rate data
WHERE c.padel_venue_count > 0 WHERE c.padel_venue_count > 0
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)

View File

@@ -0,0 +1,42 @@
-- Electricity prices for non-household consumers (Eurostat nrg_pc_205).
-- EUR/kWh excluding taxes, band MWH500-1999 (medium-sized commercial consumer).
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
--
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_205.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 0.1523}, ...]}
MODEL (
name staging.stg_electricity_prices,
kind FULL,
cron '@daily',
grain (country_code, ref_period)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/nrg_pc_205.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRIM(r.ref_year) AS ref_period,
TRY_CAST(r.value AS DOUBLE) AS electricity_eur_kwh
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_period,
electricity_eur_kwh
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND electricity_eur_kwh > 0

View File

@@ -0,0 +1,42 @@
-- Gas prices for non-household consumers (Eurostat nrg_pc_203).
-- EUR/GJ excluding taxes, band GJ1000-9999 (medium-sized commercial consumer).
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
--
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_203.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 14.23}, ...]}
MODEL (
name staging.stg_gas_prices,
kind FULL,
cron '@daily',
grain (country_code, ref_period)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/nrg_pc_203.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRIM(r.ref_year) AS ref_period,
TRY_CAST(r.value AS DOUBLE) AS gas_eur_gj
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_period,
gas_eur_gj
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND gas_eur_gj > 0

View File

@@ -0,0 +1,46 @@
-- Labour cost levels EUR/hour (Eurostat lc_lci_lev).
-- NACE R2 sector N (administrative and support service activities).
-- D1_D2_A_HW structure: wages + non-wage costs, actual hours worked.
-- Annual frequency.
--
-- Stored for future "staffed scenario" calculator variant.
-- Not wired into default calculator overrides (staff=0 is a business assumption).
--
-- Source: data/landing/eurostat/{year}/{month}/lc_lci_lev.json.gz
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 28.4}, ...]}
MODEL (
name staging.stg_labour_costs,
kind FULL,
cron '@daily',
grain (country_code, ref_year)
);
WITH source AS (
SELECT unnest(rows) AS r
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/lc_lci_lev.json.gz',
auto_detect = true
)
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
TRY_CAST(r.value AS DOUBLE) AS labour_cost_eur_hour
FROM source
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
ref_year,
labour_cost_eur_hour
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND labour_cost_eur_hour > 0

View File

@@ -0,0 +1,96 @@
-- Price level indices relative to EU27=100 (Eurostat prc_ppp_ind).
-- Five categories, each from a separate landing file (different ppp_cat filters).
-- Annual frequency.
--
-- Categories and what they scale in the calculator:
-- construction — CAPEX: hallCostSqm, foundationSqm, hvac, electrical, sanitary, etc.
-- housing — rentSqm, landPriceSqm, water, outdoorRent
-- services — cleaning, maintenance, marketing
-- misc — insurance
-- government — permitsCompliance, propertyTax
--
-- Sources:
-- data/landing/eurostat/*/*/prc_ppp_ind_construction.json.gz (ppp_cat: A050202)
-- data/landing/eurostat/*/*/prc_ppp_ind_housing.json.gz (ppp_cat: A0104)
-- data/landing/eurostat/*/*/prc_ppp_ind_services.json.gz (ppp_cat: P0201)
-- data/landing/eurostat/*/*/prc_ppp_ind_misc.json.gz (ppp_cat: A0112)
-- data/landing/eurostat/*/*/prc_ppp_ind_government.json.gz (ppp_cat: P0202)
--
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 107.3}, ...]}
MODEL (
name staging.stg_price_levels,
kind FULL,
cron '@daily',
grain (country_code, category, ref_year)
);
WITH construction_raw AS (
SELECT unnest(rows) AS r, 'construction' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_construction.json.gz',
auto_detect = true
)
),
housing_raw AS (
SELECT unnest(rows) AS r, 'housing' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_housing.json.gz',
auto_detect = true
)
),
services_raw AS (
SELECT unnest(rows) AS r, 'services' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_services.json.gz',
auto_detect = true
)
),
misc_raw AS (
SELECT unnest(rows) AS r, 'misc' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_misc.json.gz',
auto_detect = true
)
),
government_raw AS (
SELECT unnest(rows) AS r, 'government' AS category
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_government.json.gz',
auto_detect = true
)
),
all_raw AS (
SELECT r, category FROM construction_raw
UNION ALL
SELECT r, category FROM housing_raw
UNION ALL
SELECT r, category FROM services_raw
UNION ALL
SELECT r, category FROM misc_raw
UNION ALL
SELECT r, category FROM government_raw
),
parsed AS (
SELECT
UPPER(TRIM(r.geo_code)) AS geo_code,
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
TRY_CAST(r.value AS DOUBLE) AS pli,
category
FROM all_raw
WHERE r.value IS NOT NULL
)
SELECT
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
CASE geo_code
WHEN 'EL' THEN 'GR'
WHEN 'UK' THEN 'GB'
ELSE geo_code
END AS country_code,
category,
ref_year,
pli
FROM parsed
WHERE LENGTH(geo_code) = 2
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
AND pli > 0