diff --git a/CHANGELOG.md b/CHANGELOG.md index e14aeff..af16efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] ### Added +- **Individualised article financial calculations with real per-country cost data** — ~30 CAPEX/OPEX calculator fields now scale to each country's actual cost level via Eurostat data, eliminating the identical DE-hardcoded numbers shown for every city globally. + - **New Eurostat datasets extracted** (8 new landing files): electricity prices (`nrg_pc_205`), gas prices (`nrg_pc_203`), labour costs (`lc_lci_lev`), and 5 price level index categories from `prc_ppp_ind` (construction, housing, services, misc, government). + - `extract/padelnomics_extract/src/padelnomics_extract/eurostat.py`: added 8 dataset entries; added `dataset_code` field support so multiple dict entries can share one Eurostat API endpoint (needed for 5 prc_ppp_ind variants). + - **4 new staging models**: `stg_electricity_prices`, `stg_gas_prices`, `stg_labour_costs`, `stg_price_levels` — all read from landing zone with ISO code normalisation (EL→GR, UK→GB). + - **New `foundation.dim_countries`** — conformed country dimension (grain: `country_code`). Consolidates country names/slugs and income data previously duplicated in `dim_cities` and `dim_locations` as ~50-line CASE blocks. Computes ~29 calculator cost override columns from Eurostat PLI indices and energy prices relative to DE baseline. + - **Refactored `dim_cities`** — removed ~50-line CASE blocks and `country_income` CTE; JOIN `dim_countries` for `country_name_en`, `country_slug`, `median_income_pps`, `income_year`. + - **Refactored `dim_locations`** — same refactor as `dim_cities`; income cascade still cascades EU NUTS-2 → US state → `dim_countries` country-level. + - **Updated `serving.pseo_city_costs_de`** — JOIN `dim_countries`; 29 new camelCase override columns (`electricity`, `heating`, `rentSqm`, `hallCostSqm`, …, `permitsCompliance`) auto-applied by calculator. + - **Updated `serving.planner_defaults`** — JOIN `dim_countries`; same 29 cost columns flow through to the planner API `/api/market-data` endpoint. - **Bulk actions for articles and leads** — checkbox selection + floating action bar on admin articles and leads pages (same pattern as suppliers). Articles: publish, unpublish, toggle noindex, rebuild, delete. Leads: set status, set heat. Re-renders results via HTMX after each action. - **Stripe payment provider** — second payment provider alongside Paddle, switchable via `PAYMENT_PROVIDER=stripe` env var. Existing Paddle subscribers keep working regardless of toggle — both webhook endpoints stay active. - `billing/stripe.py`: full Stripe implementation (Checkout Sessions, Billing Portal, subscription cancel, webhook verification + parsing) diff --git a/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py b/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py index ee8c429..0a62ff9 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/eurostat.py @@ -26,6 +26,10 @@ EUROSTAT_BASE_URL = "https://ec.europa.eu/eurostat/api/dissemination/statistics/ # Dataset configs: filters fix dimension values, geo_dim/time_dim are iterated. # All other dimensions must either be in filters or have size=1. +# +# Optional `dataset_code` field: when present, used for the API URL instead of the dict key. +# This allows multiple entries to share the same Eurostat dataset with different filters +# (e.g. five prc_ppp_ind entries with different ppp_cat values). DATASETS: dict[str, dict] = { "urb_cpop1": { "filters": {"indic_ur": "DE1001V"}, # Population on 1 January, total @@ -51,6 +55,59 @@ DATASETS: dict[str, dict] = { "geo_dim": "geo", "time_dim": "time", }, + # ── Direct-value datasets (actual EUR figures) ─────────────────────────── + "nrg_pc_205": { + # Electricity prices for non-household consumers, EUR/kWh, excl. taxes + "filters": {"freq": "S", "nrg_cons": "MWH500-1999", "currency": "EUR", "tax": "I_TAX"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "nrg_pc_203": { + # Gas prices for non-household consumers, EUR/GJ, excl. taxes + "filters": {"freq": "S", "nrg_cons": "GJ1000-9999", "currency": "EUR", "tax": "I_TAX"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "lc_lci_lev": { + # Labour cost levels EUR/hour — NACE N (administrative/support services) + # Stored in dim_countries for future staffed-scenario calculations. + "filters": {"lcstruct": "D1_D2_A_HW", "nace_r2": "N", "currency": "EUR"}, + "geo_dim": "geo", + "time_dim": "time", + }, + # ── Price level indices (relative scaling, EU27=100) ───────────────────── + # Five entries share the prc_ppp_ind dataset with different ppp_cat filters. + # dataset_code points to the real API endpoint; the dict key is the landing filename. + "prc_ppp_ind_construction": { + "dataset_code": "prc_ppp_ind", + "filters": {"ppp_cat": "A050202", "na_item": "PLI_EU27_2020"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "prc_ppp_ind_housing": { + "dataset_code": "prc_ppp_ind", + "filters": {"ppp_cat": "A0104", "na_item": "PLI_EU27_2020"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "prc_ppp_ind_services": { + "dataset_code": "prc_ppp_ind", + "filters": {"ppp_cat": "P0201", "na_item": "PLI_EU27_2020"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "prc_ppp_ind_misc": { + "dataset_code": "prc_ppp_ind", + "filters": {"ppp_cat": "A0112", "na_item": "PLI_EU27_2020"}, + "geo_dim": "geo", + "time_dim": "time", + }, + "prc_ppp_ind_government": { + "dataset_code": "prc_ppp_ind", + "filters": {"ppp_cat": "P0202", "na_item": "PLI_EU27_2020"}, + "geo_dim": "geo", + "time_dim": "time", + }, } @@ -196,22 +253,25 @@ def extract( files_skipped = 0 bytes_written_total = 0 - for dataset_code, config in DATASETS.items(): - url = f"{EUROSTAT_BASE_URL}/{dataset_code}?format=JSON&lang=EN" + for dataset_key, config in DATASETS.items(): + # Use dataset_code (if set) for the API URL; fall back to the dict key. + # This lets multiple entries share one Eurostat dataset with different filters. + api_code = config.get("dataset_code", dataset_key) + url = f"{EUROSTAT_BASE_URL}/{api_code}?format=JSON&lang=EN" for key, val in config.get("filters", {}).items(): url += f"&{key}={val}" dest_dir = landing_path(landing_dir, "eurostat", year, month) - dest = dest_dir / f"{dataset_code}.json.gz" + dest = dest_dir / f"{dataset_key}.json.gz" - logger.info("GET %s", dataset_code) + logger.info("GET %s", dataset_key) bytes_written = _fetch_with_etag(url, dest, session, config) if bytes_written > 0: - logger.info("%s updated — %s bytes compressed", dataset_code, f"{bytes_written:,}") + logger.info("%s updated — %s bytes compressed", dataset_key, f"{bytes_written:,}") files_written += 1 bytes_written_total += bytes_written else: - logger.info("%s not modified (304)", dataset_code) + logger.info("%s not modified (304)", dataset_key) files_skipped += 1 return { diff --git a/transform/sqlmesh_padelnomics/CLAUDE.md b/transform/sqlmesh_padelnomics/CLAUDE.md index c66ab96..2686693 100644 --- a/transform/sqlmesh_padelnomics/CLAUDE.md +++ b/transform/sqlmesh_padelnomics/CLAUDE.md @@ -54,6 +54,7 @@ Grain must match reality — use `QUALIFY ROW_NUMBER()` to enforce it. | Dimension | Grain | Used by | |-----------|-------|---------| +| `foundation.dim_countries` | `country_code` | `dim_cities`, `dim_locations`, `pseo_city_costs_de`, `planner_defaults` — single source for country names, income, PLI/cost overrides | | `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) | | `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models | | `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations | diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql index b74e7cf..f25aa60 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql @@ -5,7 +5,7 @@ -- Conformed dimension: used by city_market_profile and all pSEO serving models. -- Integrates four sources: -- dim_venues → city list, venue count, coordinates (Playtomic + OSM) --- stg_income → country-level median income (Eurostat) +-- foundation.dim_countries → country_name_en, country_slug, median_income_pps -- stg_city_labels → Eurostat city_code → city_name mapping (EU cities) -- stg_population → Eurostat city-level population (EU, joined via city code) -- stg_population_usa → US Census ACS place population @@ -41,12 +41,6 @@ venue_cities AS ( WHERE city IS NOT NULL AND LENGTH(city) > 0 GROUP BY country_code, city ), --- Latest country income per country -country_income AS ( - SELECT country_code, median_income_pps, ref_year AS income_year - FROM staging.stg_income - QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 -), -- Eurostat EU population: join city labels (code→name) with population values. -- QUALIFY keeps only the most recent year per (country, city name). eurostat_pop AS ( @@ -108,10 +102,9 @@ SELECT vc.country_code, vc.city_slug, vc.city_name, - -- Human-readable country name for pSEO templates and internal linking - @country_name(vc.country_code) AS country_name_en, - -- URL-safe country slug - @country_slug(vc.country_code) AS country_slug, + -- Human-readable country name and slug — from dim_countries (single source of truth) + c.country_name_en, + c.country_slug, vc.centroid_lat AS lat, vc.centroid_lon AS lon, -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0. @@ -133,13 +126,13 @@ SELECT 0 )::INTEGER AS population_year, vc.padel_venue_count, - ci.median_income_pps, - ci.income_year, + c.median_income_pps, + c.income_year, -- GeoNames ID: FK to dim_locations / location_opportunity_profile. -- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.) COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id FROM venue_cities vc -LEFT JOIN country_income ci ON vc.country_code = ci.country_code +LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code -- Eurostat EU population (via city code→name lookup) LEFT JOIN eurostat_pop ep ON vc.country_code = ep.country_code diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_countries.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_countries.sql new file mode 100644 index 0000000..908fb0f --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_countries.sql @@ -0,0 +1,285 @@ +-- Conformed country dimension — single authoritative source for all country metadata. +-- +-- Consolidates data previously duplicated across dim_cities and dim_locations: +-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models) +-- - median_income_pps (was: country_income CTE in both models) +-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets) +-- - cost override columns for the financial calculator +-- +-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults. +-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code). +-- Kind: FULL — small table (~40 rows), full refresh daily. +-- +-- Cost override columns: +-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None). +-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS. +-- For countries missing Eurostat data, NULLs propagate naturally. +-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py. +-- +-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py). +-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match. +-- !! Search "DE baseline" in this file to find all affected lines. + +MODEL ( + name foundation.dim_countries, + kind FULL, + cron '@daily', + grain country_code +); + +WITH +-- Latest income per country +latest_income AS ( + SELECT country_code, median_income_pps, ref_year AS income_year + FROM staging.stg_income + QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 +), +-- Latest electricity price per country (use most recent semi-annual period) +latest_electricity AS ( + SELECT country_code, electricity_eur_kwh, ref_period + FROM staging.stg_electricity_prices + QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1 +), +-- Latest gas price per country +latest_gas AS ( + SELECT country_code, gas_eur_gj, ref_period + FROM staging.stg_gas_prices + QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1 +), +-- Latest labour cost per country +latest_labour AS ( + SELECT country_code, labour_cost_eur_hour, ref_year + FROM staging.stg_labour_costs + QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 +), +-- Latest PLI per (country, category) +latest_pli AS ( + SELECT country_code, category, pli, ref_year + FROM staging.stg_price_levels + QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1 +), +-- Pivot PLI categories into columns per country +pli_pivoted AS ( + SELECT + country_code, + MAX(pli) FILTER (WHERE category = 'construction') AS construction, + MAX(pli) FILTER (WHERE category = 'housing') AS housing, + MAX(pli) FILTER (WHERE category = 'services') AS services, + MAX(pli) FILTER (WHERE category = 'misc') AS misc, + MAX(pli) FILTER (WHERE category = 'government') AS government + FROM latest_pli + GROUP BY country_code +), +-- DE baseline rows for ratio computation +-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough). +de_pli AS ( + SELECT construction, housing, services, misc, government + FROM pli_pivoted WHERE country_code = 'DE' +), +de_elec AS ( + SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE' +), +de_gas AS ( + SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE' +), +-- All distinct country codes from any source +all_countries AS ( + SELECT country_code FROM latest_income + UNION + SELECT country_code FROM latest_electricity + UNION + SELECT country_code FROM latest_gas + UNION + SELECT country_code FROM latest_labour + UNION + SELECT country_code FROM pli_pivoted + -- Ensure known padel markets appear even if Eurostat doesn't cover them yet + UNION ALL + SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI', + 'US','AR','MX','AE','AU','IE']) AS country_code +) +SELECT + ac.country_code, + -- Country name and slug (single definition, replacing duplicated CASE blocks) + CASE ac.country_code + WHEN 'DE' THEN 'Germany' + WHEN 'ES' THEN 'Spain' + WHEN 'GB' THEN 'United Kingdom' + WHEN 'FR' THEN 'France' + WHEN 'IT' THEN 'Italy' + WHEN 'PT' THEN 'Portugal' + WHEN 'AT' THEN 'Austria' + WHEN 'CH' THEN 'Switzerland' + WHEN 'NL' THEN 'Netherlands' + WHEN 'BE' THEN 'Belgium' + WHEN 'SE' THEN 'Sweden' + WHEN 'NO' THEN 'Norway' + WHEN 'DK' THEN 'Denmark' + WHEN 'FI' THEN 'Finland' + WHEN 'US' THEN 'United States' + WHEN 'AR' THEN 'Argentina' + WHEN 'MX' THEN 'Mexico' + WHEN 'AE' THEN 'UAE' + WHEN 'AU' THEN 'Australia' + WHEN 'IE' THEN 'Ireland' + ELSE ac.country_code + END AS country_name_en, + LOWER(REGEXP_REPLACE( + CASE ac.country_code + WHEN 'DE' THEN 'Germany' + WHEN 'ES' THEN 'Spain' + WHEN 'GB' THEN 'United Kingdom' + WHEN 'FR' THEN 'France' + WHEN 'IT' THEN 'Italy' + WHEN 'PT' THEN 'Portugal' + WHEN 'AT' THEN 'Austria' + WHEN 'CH' THEN 'Switzerland' + WHEN 'NL' THEN 'Netherlands' + WHEN 'BE' THEN 'Belgium' + WHEN 'SE' THEN 'Sweden' + WHEN 'NO' THEN 'Norway' + WHEN 'DK' THEN 'Denmark' + WHEN 'FI' THEN 'Finland' + WHEN 'US' THEN 'United States' + WHEN 'AR' THEN 'Argentina' + WHEN 'MX' THEN 'Mexico' + WHEN 'AE' THEN 'UAE' + WHEN 'AU' THEN 'Australia' + WHEN 'IE' THEN 'Ireland' + ELSE ac.country_code + END, '[^a-zA-Z0-9]+', '-' + )) AS country_slug, + -- Income data + i.median_income_pps, + i.income_year, + -- Raw energy and labour data (for reference / future staffed-scenario use) + e.electricity_eur_kwh, + g.gas_eur_gj, + la.labour_cost_eur_hour, + -- PLI indices per category (EU27=100) + p.construction AS pli_construction, + p.housing AS pli_housing, + p.services AS pli_services, + p.misc AS pli_misc, + p.government AS pli_government, + -- ── Calculator cost override columns ──────────────────────────────────── + -- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None). + -- Formulas: country_value = DE_default × (country_price / DE_price) + -- or DE_default × (country_PLI / DE_PLI) + -- + -- OPEX overrides — energy (direct price ratio) + -- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS) + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0) + END AS electricity, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0) + END AS heating, + -- OPEX overrides — PLI-scaled (housing category) + -- DE baseline: rentSqm=4, water=125, outdoorRent=400 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(4.0 * (p.housing / de_p.housing), 2) + END AS rent_sqm, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(125.0 * (p.housing / de_p.housing), 0) + END AS water, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(400.0 * (p.housing / de_p.housing), 0) + END AS outdoor_rent, + -- OPEX overrides — PLI-scaled (misc category) + -- DE baseline: insurance=300 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(300.0 * (p.misc / de_p.misc), 0) + END AS insurance, + -- OPEX overrides — PLI-scaled (services category) + -- DE baseline: cleaning=300, maintenance=300, marketing=350 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(300.0 * (p.services / de_p.services), 0) + END AS cleaning, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(300.0 * (p.services / de_p.services), 0) + END AS maintenance, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(350.0 * (p.services / de_p.services), 0) + END AS marketing, + -- OPEX overrides — PLI-scaled (government category) + -- DE baseline: propertyTax=250, permitsCompliance=12000 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(250.0 * (p.government / de_p.government), 0) + END AS property_tax, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(12000.0 * (p.government / de_p.government), 0) + END AS permits_compliance, + -- CAPEX overrides — PLI-scaled (construction category) + -- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000, + -- sanitary=80000, parking=50000, fitout=40000, planning=100000, + -- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000, + -- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000, + -- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(500.0 * (p.construction / de_p.construction), 0) + END AS hall_cost_sqm, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(150.0 * (p.construction / de_p.construction), 0) + END AS foundation_sqm, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0) + END AS hvac, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0) + END AS electrical, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0) + END AS sanitary, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0) + END AS parking, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0) + END AS fitout, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0) + END AS planning, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0) + END AS fire_protection, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0) + END AS floor_prep, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0) + END AS hvac_upgrade, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0) + END AS lighting_upgrade, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(35.0 * (p.construction / de_p.construction), 0) + END AS outdoor_foundation, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0) + END AS outdoor_site_work, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0) + END AS outdoor_lighting, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0) + END AS outdoor_fencing, + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0) + END AS working_capital, + -- CAPEX overrides — PLI-scaled (housing category) + -- DE baseline: landPriceSqm=60 + CASE WHEN ac.country_code = 'DE' THEN NULL + ELSE ROUND(60.0 * (p.housing / de_p.housing), 0) + END AS land_price_sqm +FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac +LEFT JOIN latest_income i ON ac.country_code = i.country_code +LEFT JOIN latest_electricity e ON ac.country_code = e.country_code +LEFT JOIN latest_gas g ON ac.country_code = g.country_code +LEFT JOIN latest_labour la ON ac.country_code = la.country_code +LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code +CROSS JOIN de_pli de_p +CROSS JOIN de_elec de_e +CROSS JOIN de_gas de_g +-- Enforce grain +QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1 diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql index ffc41c2..38a54a7 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql @@ -6,9 +6,9 @@ -- covers all locations with population ≥ 1K so zero-court Gemeinden score fully. -- -- Enriched with: +-- foundation.dim_countries → country_name_en, country_slug, median_income_pps -- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join) -- stg_income_usa → US state-level income (PPS-normalised) --- stg_income → country-level income (fallback for all countries) -- stg_padel_courts → padel venue count + nearest court distance (km) -- stg_tennis_courts → tennis court count within 25km radius -- @@ -16,7 +16,7 @@ -- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains) -- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset) -- 3. US state income (ratio-normalised to PPS scale; see us_income CTE) --- 4. Country-level income (global fallback from stg_income / ilc_di03) +-- 4. Country-level income (global fallback from dim_countries / ilc_di03) -- -- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension). -- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin @@ -49,12 +49,6 @@ locations AS ( FROM staging.stg_population_geonames WHERE lat IS NOT NULL AND lon IS NOT NULL ), --- Country income (ilc_di03) — global fallback for all countries -country_income AS ( - SELECT country_code, median_income_pps, ref_year AS income_year - FROM staging.stg_income - QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 -), -- ── EU NUTS-2 income via spatial join ────────────────────────────────────── -- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons. -- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates @@ -214,10 +208,9 @@ tennis_nearby AS ( SELECT l.geoname_id, l.country_code, - -- Human-readable country name (consistent with dim_cities) - @country_name(l.country_code) AS country_name_en, - -- URL-safe country slug - @country_slug(l.country_code) AS country_slug, + -- Human-readable country name and slug — from dim_countries (single source of truth) + c.country_name_en, + c.country_slug, l.location_name, l.location_slug, l.lat, @@ -230,12 +223,12 @@ SELECT COALESCE( ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback) us.median_income_pps, -- US: state-level PPS-equivalent - ci.median_income_pps -- Global: country-level from ilc_di03 + c.median_income_pps -- Global: country-level from dim_countries / ilc_di03 ) AS median_income_pps, COALESCE( ri.regional_income_year, us.income_year, - ci.income_year + c.income_year ) AS income_year, COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count, -- Venues per 100K residents (NULL if population = 0) @@ -247,8 +240,8 @@ SELECT COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km, CURRENT_DATE AS refreshed_date FROM locations l -LEFT JOIN country_income ci ON l.country_code = ci.country_code -LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id +LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code +LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id LEFT JOIN us_income us ON l.country_code = 'US' AND l.admin1_code = us.admin1_code LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id diff --git a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql index 31c7fb4..eb0fcb3 100644 --- a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql +++ b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql @@ -7,6 +7,10 @@ -- 2. Country-level: median across cities in same country -- 3. Hardcoded fallback: market research estimates (only when no Playtomic data) -- +-- Cost override columns from dim_countries (Eurostat PLI + energy price indices) are +-- included so the planner API pre-fills country-adjusted CAPEX/OPEX for all cities. +-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved). +-- -- Units are explicit in column names. Monetary values in local currency. MODEL ( @@ -125,6 +129,37 @@ SELECT ELSE 0.2 END AS data_confidence, COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency, + -- Cost override columns (Eurostat PLI + energy prices via dim_countries). + -- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline). + dc.electricity, + dc.heating, + dc.rent_sqm, + dc.insurance, + dc.cleaning, + dc.maintenance, + dc.marketing, + dc.water, + dc.property_tax, + dc.outdoor_rent, + dc.hall_cost_sqm, + dc.foundation_sqm, + dc.land_price_sqm, + dc.hvac, + dc.electrical, + dc.sanitary, + dc.parking, + dc.fitout, + dc.planning, + dc.fire_protection, + dc.floor_prep, + dc.hvac_upgrade, + dc.lighting_upgrade, + dc.outdoor_foundation, + dc.outdoor_site_work, + dc.outdoor_lighting, + dc.outdoor_fencing, + dc.working_capital, + dc.permits_compliance, CURRENT_DATE AS refreshed_date FROM city_profiles cp LEFT JOIN city_benchmarks cb @@ -134,3 +169,5 @@ LEFT JOIN country_benchmarks ctb ON cp.country_code = ctb.country_code LEFT JOIN hardcoded_fallbacks hf ON cp.country_code = hf.country_code +LEFT JOIN foundation.dim_countries dc + ON cp.country_code = dc.country_code diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql index 1544997..5cff371 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_costs_de.sql @@ -4,6 +4,10 @@ -- -- Calculator override columns use camelCase to match the DEFAULTS keys in -- planner/calculator.py, so they are auto-applied as calc pre-fills. +-- +-- Cost override columns come from foundation.dim_countries (Eurostat PLI and energy +-- price indices). NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping +-- filters None). DE always produces NULL overrides — preserves exact DEFAULTS behaviour. MODEL ( name serving.pseo_city_costs_de, @@ -44,6 +48,39 @@ SELECT FLOOR(p.courts_typical) AS "dblCourts", -- 'country' drives currency formatting in the calculator c.country_code AS "country", + -- Cost override columns from dim_countries (Eurostat PLI + energy price indices). + -- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved). + -- OPEX overrides + cc.electricity AS "electricity", + cc.heating AS "heating", + cc.rent_sqm AS "rentSqm", + cc.insurance AS "insurance", + cc.cleaning AS "cleaning", + cc.maintenance AS "maintenance", + cc.marketing AS "marketing", + cc.water AS "water", + cc.property_tax AS "propertyTax", + cc.outdoor_rent AS "outdoorRent", + -- CAPEX overrides + cc.hall_cost_sqm AS "hallCostSqm", + cc.foundation_sqm AS "foundationSqm", + cc.land_price_sqm AS "landPriceSqm", + cc.hvac AS "hvac", + cc.electrical AS "electrical", + cc.sanitary AS "sanitary", + cc.parking AS "parking", + cc.fitout AS "fitout", + cc.planning AS "planning", + cc.fire_protection AS "fireProtection", + cc.floor_prep AS "floorPrep", + cc.hvac_upgrade AS "hvacUpgrade", + cc.lighting_upgrade AS "lightingUpgrade", + cc.outdoor_foundation AS "outdoorFoundation", + cc.outdoor_site_work AS "outdoorSiteWork", + cc.outdoor_lighting AS "outdoorLighting", + cc.outdoor_fencing AS "outdoorFencing", + cc.working_capital AS "workingCapital", + cc.permits_compliance AS "permitsCompliance", CURRENT_DATE AS refreshed_date FROM serving.city_market_profile c LEFT JOIN serving.planner_defaults p @@ -52,6 +89,8 @@ LEFT JOIN serving.planner_defaults p LEFT JOIN serving.location_opportunity_profile lop ON c.country_code = lop.country_code AND c.geoname_id = lop.geoname_id +LEFT JOIN foundation.dim_countries cc + ON c.country_code = cc.country_code -- Only cities with actual padel presence and at least some rate data WHERE c.padel_venue_count > 0 AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL) diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_electricity_prices.sql b/transform/sqlmesh_padelnomics/models/staging/stg_electricity_prices.sql new file mode 100644 index 0000000..fd93459 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_electricity_prices.sql @@ -0,0 +1,42 @@ +-- Electricity prices for non-household consumers (Eurostat nrg_pc_205). +-- EUR/kWh excluding taxes, band MWH500-1999 (medium-sized commercial consumer). +-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2". +-- +-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_205.json.gz +-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 0.1523}, ...]} + +MODEL ( + name staging.stg_electricity_prices, + kind FULL, + cron '@daily', + grain (country_code, ref_period) +); + +WITH source AS ( + SELECT unnest(rows) AS r + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/nrg_pc_205.json.gz', + auto_detect = true + ) +), +parsed AS ( + SELECT + UPPER(TRIM(r.geo_code)) AS geo_code, + TRIM(r.ref_year) AS ref_period, + TRY_CAST(r.value AS DOUBLE) AS electricity_eur_kwh + FROM source + WHERE r.value IS NOT NULL +) +SELECT + -- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB + CASE geo_code + WHEN 'EL' THEN 'GR' + WHEN 'UK' THEN 'GB' + ELSE geo_code + END AS country_code, + ref_period, + electricity_eur_kwh +FROM parsed +WHERE LENGTH(geo_code) = 2 + AND geo_code NOT IN ('EU', 'EA', 'EU27_2020') + AND electricity_eur_kwh > 0 diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_gas_prices.sql b/transform/sqlmesh_padelnomics/models/staging/stg_gas_prices.sql new file mode 100644 index 0000000..40347a7 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_gas_prices.sql @@ -0,0 +1,42 @@ +-- Gas prices for non-household consumers (Eurostat nrg_pc_203). +-- EUR/GJ excluding taxes, band GJ1000-9999 (medium-sized commercial consumer). +-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2". +-- +-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_203.json.gz +-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 14.23}, ...]} + +MODEL ( + name staging.stg_gas_prices, + kind FULL, + cron '@daily', + grain (country_code, ref_period) +); + +WITH source AS ( + SELECT unnest(rows) AS r + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/nrg_pc_203.json.gz', + auto_detect = true + ) +), +parsed AS ( + SELECT + UPPER(TRIM(r.geo_code)) AS geo_code, + TRIM(r.ref_year) AS ref_period, + TRY_CAST(r.value AS DOUBLE) AS gas_eur_gj + FROM source + WHERE r.value IS NOT NULL +) +SELECT + -- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB + CASE geo_code + WHEN 'EL' THEN 'GR' + WHEN 'UK' THEN 'GB' + ELSE geo_code + END AS country_code, + ref_period, + gas_eur_gj +FROM parsed +WHERE LENGTH(geo_code) = 2 + AND geo_code NOT IN ('EU', 'EA', 'EU27_2020') + AND gas_eur_gj > 0 diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_labour_costs.sql b/transform/sqlmesh_padelnomics/models/staging/stg_labour_costs.sql new file mode 100644 index 0000000..e06e49a --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_labour_costs.sql @@ -0,0 +1,46 @@ +-- Labour cost levels EUR/hour (Eurostat lc_lci_lev). +-- NACE R2 sector N (administrative and support service activities). +-- D1_D2_A_HW structure: wages + non-wage costs, actual hours worked. +-- Annual frequency. +-- +-- Stored for future "staffed scenario" calculator variant. +-- Not wired into default calculator overrides (staff=0 is a business assumption). +-- +-- Source: data/landing/eurostat/{year}/{month}/lc_lci_lev.json.gz +-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 28.4}, ...]} + +MODEL ( + name staging.stg_labour_costs, + kind FULL, + cron '@daily', + grain (country_code, ref_year) +); + +WITH source AS ( + SELECT unnest(rows) AS r + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/lc_lci_lev.json.gz', + auto_detect = true + ) +), +parsed AS ( + SELECT + UPPER(TRIM(r.geo_code)) AS geo_code, + TRY_CAST(r.ref_year AS INTEGER) AS ref_year, + TRY_CAST(r.value AS DOUBLE) AS labour_cost_eur_hour + FROM source + WHERE r.value IS NOT NULL +) +SELECT + -- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB + CASE geo_code + WHEN 'EL' THEN 'GR' + WHEN 'UK' THEN 'GB' + ELSE geo_code + END AS country_code, + ref_year, + labour_cost_eur_hour +FROM parsed +WHERE LENGTH(geo_code) = 2 + AND geo_code NOT IN ('EU', 'EA', 'EU27_2020') + AND labour_cost_eur_hour > 0 diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_price_levels.sql b/transform/sqlmesh_padelnomics/models/staging/stg_price_levels.sql new file mode 100644 index 0000000..38fc263 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_price_levels.sql @@ -0,0 +1,96 @@ +-- Price level indices relative to EU27=100 (Eurostat prc_ppp_ind). +-- Five categories, each from a separate landing file (different ppp_cat filters). +-- Annual frequency. +-- +-- Categories and what they scale in the calculator: +-- construction — CAPEX: hallCostSqm, foundationSqm, hvac, electrical, sanitary, etc. +-- housing — rentSqm, landPriceSqm, water, outdoorRent +-- services — cleaning, maintenance, marketing +-- misc — insurance +-- government — permitsCompliance, propertyTax +-- +-- Sources: +-- data/landing/eurostat/*/*/prc_ppp_ind_construction.json.gz (ppp_cat: A050202) +-- data/landing/eurostat/*/*/prc_ppp_ind_housing.json.gz (ppp_cat: A0104) +-- data/landing/eurostat/*/*/prc_ppp_ind_services.json.gz (ppp_cat: P0201) +-- data/landing/eurostat/*/*/prc_ppp_ind_misc.json.gz (ppp_cat: A0112) +-- data/landing/eurostat/*/*/prc_ppp_ind_government.json.gz (ppp_cat: P0202) +-- +-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 107.3}, ...]} + +MODEL ( + name staging.stg_price_levels, + kind FULL, + cron '@daily', + grain (country_code, category, ref_year) +); + +WITH construction_raw AS ( + SELECT unnest(rows) AS r, 'construction' AS category + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_construction.json.gz', + auto_detect = true + ) +), +housing_raw AS ( + SELECT unnest(rows) AS r, 'housing' AS category + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_housing.json.gz', + auto_detect = true + ) +), +services_raw AS ( + SELECT unnest(rows) AS r, 'services' AS category + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_services.json.gz', + auto_detect = true + ) +), +misc_raw AS ( + SELECT unnest(rows) AS r, 'misc' AS category + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_misc.json.gz', + auto_detect = true + ) +), +government_raw AS ( + SELECT unnest(rows) AS r, 'government' AS category + FROM read_json( + @LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_government.json.gz', + auto_detect = true + ) +), +all_raw AS ( + SELECT r, category FROM construction_raw + UNION ALL + SELECT r, category FROM housing_raw + UNION ALL + SELECT r, category FROM services_raw + UNION ALL + SELECT r, category FROM misc_raw + UNION ALL + SELECT r, category FROM government_raw +), +parsed AS ( + SELECT + UPPER(TRIM(r.geo_code)) AS geo_code, + TRY_CAST(r.ref_year AS INTEGER) AS ref_year, + TRY_CAST(r.value AS DOUBLE) AS pli, + category + FROM all_raw + WHERE r.value IS NOT NULL +) +SELECT + -- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB + CASE geo_code + WHEN 'EL' THEN 'GR' + WHEN 'UK' THEN 'GB' + ELSE geo_code + END AS country_code, + category, + ref_year, + pli +FROM parsed +WHERE LENGTH(geo_code) = 2 + AND geo_code NOT IN ('EU', 'EA', 'EU27_2020') + AND pli > 0