merge(worktree): individualise article costs with per-country Eurostat data + tiered proxy tenant work
# Conflicts: # CHANGELOG.md # transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql # transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql
This commit is contained in:
@@ -7,6 +7,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
- **Individualised article financial calculations with real per-country cost data** — ~30 CAPEX/OPEX calculator fields now scale to each country's actual cost level via Eurostat data, eliminating the identical DE-hardcoded numbers shown for every city globally.
|
||||
- **New Eurostat datasets extracted** (8 new landing files): electricity prices (`nrg_pc_205`), gas prices (`nrg_pc_203`), labour costs (`lc_lci_lev`), and 5 price level index categories from `prc_ppp_ind` (construction, housing, services, misc, government).
|
||||
- `extract/padelnomics_extract/src/padelnomics_extract/eurostat.py`: added 8 dataset entries; added `dataset_code` field support so multiple dict entries can share one Eurostat API endpoint (needed for 5 prc_ppp_ind variants).
|
||||
- **4 new staging models**: `stg_electricity_prices`, `stg_gas_prices`, `stg_labour_costs`, `stg_price_levels` — all read from landing zone with ISO code normalisation (EL→GR, UK→GB).
|
||||
- **New `foundation.dim_countries`** — conformed country dimension (grain: `country_code`). Consolidates country names/slugs and income data previously duplicated in `dim_cities` and `dim_locations` as ~50-line CASE blocks. Computes ~29 calculator cost override columns from Eurostat PLI indices and energy prices relative to DE baseline.
|
||||
- **Refactored `dim_cities`** — removed ~50-line CASE blocks and `country_income` CTE; JOIN `dim_countries` for `country_name_en`, `country_slug`, `median_income_pps`, `income_year`.
|
||||
- **Refactored `dim_locations`** — same refactor as `dim_cities`; income cascade still cascades EU NUTS-2 → US state → `dim_countries` country-level.
|
||||
- **Updated `serving.pseo_city_costs_de`** — JOIN `dim_countries`; 29 new camelCase override columns (`electricity`, `heating`, `rentSqm`, `hallCostSqm`, …, `permitsCompliance`) auto-applied by calculator.
|
||||
- **Updated `serving.planner_defaults`** — JOIN `dim_countries`; same 29 cost columns flow through to the planner API `/api/market-data` endpoint.
|
||||
- **Bulk actions for articles and leads** — checkbox selection + floating action bar on admin articles and leads pages (same pattern as suppliers). Articles: publish, unpublish, toggle noindex, rebuild, delete. Leads: set status, set heat. Re-renders results via HTMX after each action.
|
||||
- **Stripe payment provider** — second payment provider alongside Paddle, switchable via `PAYMENT_PROVIDER=stripe` env var. Existing Paddle subscribers keep working regardless of toggle — both webhook endpoints stay active.
|
||||
- `billing/stripe.py`: full Stripe implementation (Checkout Sessions, Billing Portal, subscription cancel, webhook verification + parsing)
|
||||
|
||||
@@ -26,6 +26,10 @@ EUROSTAT_BASE_URL = "https://ec.europa.eu/eurostat/api/dissemination/statistics/
|
||||
|
||||
# Dataset configs: filters fix dimension values, geo_dim/time_dim are iterated.
|
||||
# All other dimensions must either be in filters or have size=1.
|
||||
#
|
||||
# Optional `dataset_code` field: when present, used for the API URL instead of the dict key.
|
||||
# This allows multiple entries to share the same Eurostat dataset with different filters
|
||||
# (e.g. five prc_ppp_ind entries with different ppp_cat values).
|
||||
DATASETS: dict[str, dict] = {
|
||||
"urb_cpop1": {
|
||||
"filters": {"indic_ur": "DE1001V"}, # Population on 1 January, total
|
||||
@@ -51,6 +55,59 @@ DATASETS: dict[str, dict] = {
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
# ── Direct-value datasets (actual EUR figures) ───────────────────────────
|
||||
"nrg_pc_205": {
|
||||
# Electricity prices for non-household consumers, EUR/kWh, excl. taxes
|
||||
"filters": {"freq": "S", "nrg_cons": "MWH500-1999", "currency": "EUR", "tax": "I_TAX"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"nrg_pc_203": {
|
||||
# Gas prices for non-household consumers, EUR/GJ, excl. taxes
|
||||
"filters": {"freq": "S", "nrg_cons": "GJ1000-9999", "currency": "EUR", "tax": "I_TAX"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"lc_lci_lev": {
|
||||
# Labour cost levels EUR/hour — NACE N (administrative/support services)
|
||||
# Stored in dim_countries for future staffed-scenario calculations.
|
||||
"filters": {"lcstruct": "D1_D2_A_HW", "nace_r2": "N", "currency": "EUR"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
# ── Price level indices (relative scaling, EU27=100) ─────────────────────
|
||||
# Five entries share the prc_ppp_ind dataset with different ppp_cat filters.
|
||||
# dataset_code points to the real API endpoint; the dict key is the landing filename.
|
||||
"prc_ppp_ind_construction": {
|
||||
"dataset_code": "prc_ppp_ind",
|
||||
"filters": {"ppp_cat": "A050202", "na_item": "PLI_EU27_2020"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"prc_ppp_ind_housing": {
|
||||
"dataset_code": "prc_ppp_ind",
|
||||
"filters": {"ppp_cat": "A0104", "na_item": "PLI_EU27_2020"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"prc_ppp_ind_services": {
|
||||
"dataset_code": "prc_ppp_ind",
|
||||
"filters": {"ppp_cat": "P0201", "na_item": "PLI_EU27_2020"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"prc_ppp_ind_misc": {
|
||||
"dataset_code": "prc_ppp_ind",
|
||||
"filters": {"ppp_cat": "A0112", "na_item": "PLI_EU27_2020"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
"prc_ppp_ind_government": {
|
||||
"dataset_code": "prc_ppp_ind",
|
||||
"filters": {"ppp_cat": "P0202", "na_item": "PLI_EU27_2020"},
|
||||
"geo_dim": "geo",
|
||||
"time_dim": "time",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -196,22 +253,25 @@ def extract(
|
||||
files_skipped = 0
|
||||
bytes_written_total = 0
|
||||
|
||||
for dataset_code, config in DATASETS.items():
|
||||
url = f"{EUROSTAT_BASE_URL}/{dataset_code}?format=JSON&lang=EN"
|
||||
for dataset_key, config in DATASETS.items():
|
||||
# Use dataset_code (if set) for the API URL; fall back to the dict key.
|
||||
# This lets multiple entries share one Eurostat dataset with different filters.
|
||||
api_code = config.get("dataset_code", dataset_key)
|
||||
url = f"{EUROSTAT_BASE_URL}/{api_code}?format=JSON&lang=EN"
|
||||
for key, val in config.get("filters", {}).items():
|
||||
url += f"&{key}={val}"
|
||||
dest_dir = landing_path(landing_dir, "eurostat", year, month)
|
||||
dest = dest_dir / f"{dataset_code}.json.gz"
|
||||
dest = dest_dir / f"{dataset_key}.json.gz"
|
||||
|
||||
logger.info("GET %s", dataset_code)
|
||||
logger.info("GET %s", dataset_key)
|
||||
bytes_written = _fetch_with_etag(url, dest, session, config)
|
||||
|
||||
if bytes_written > 0:
|
||||
logger.info("%s updated — %s bytes compressed", dataset_code, f"{bytes_written:,}")
|
||||
logger.info("%s updated — %s bytes compressed", dataset_key, f"{bytes_written:,}")
|
||||
files_written += 1
|
||||
bytes_written_total += bytes_written
|
||||
else:
|
||||
logger.info("%s not modified (304)", dataset_code)
|
||||
logger.info("%s not modified (304)", dataset_key)
|
||||
files_skipped += 1
|
||||
|
||||
return {
|
||||
|
||||
@@ -54,6 +54,7 @@ Grain must match reality — use `QUALIFY ROW_NUMBER()` to enforce it.
|
||||
|
||||
| Dimension | Grain | Used by |
|
||||
|-----------|-------|---------|
|
||||
| `foundation.dim_countries` | `country_code` | `dim_cities`, `dim_locations`, `pseo_city_costs_de`, `planner_defaults` — single source for country names, income, PLI/cost overrides |
|
||||
| `foundation.dim_venues` | `venue_id` | `dim_cities`, `dim_venue_capacity`, `fct_daily_availability` (via capacity join) |
|
||||
| `foundation.dim_cities` | `(country_code, city_slug)` | `serving.city_market_profile` → all pSEO serving models |
|
||||
| `foundation.dim_locations` | `(country_code, geoname_id)` | `serving.location_opportunity_profile` — all GeoNames locations (pop ≥1K), incl. zero-court locations |
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
||||
-- Integrates four sources:
|
||||
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
||||
-- stg_income → country-level median income (Eurostat)
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
|
||||
-- stg_population → Eurostat city-level population (EU, joined via city code)
|
||||
-- stg_population_usa → US Census ACS place population
|
||||
@@ -41,12 +41,6 @@ venue_cities AS (
|
||||
WHERE city IS NOT NULL AND LENGTH(city) > 0
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Latest country income per country
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Eurostat EU population: join city labels (code→name) with population values.
|
||||
-- QUALIFY keeps only the most recent year per (country, city name).
|
||||
eurostat_pop AS (
|
||||
@@ -108,10 +102,9 @@ SELECT
|
||||
vc.country_code,
|
||||
vc.city_slug,
|
||||
vc.city_name,
|
||||
-- Human-readable country name for pSEO templates and internal linking
|
||||
@country_name(vc.country_code) AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
@country_slug(vc.country_code) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
vc.centroid_lat AS lat,
|
||||
vc.centroid_lon AS lon,
|
||||
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.
|
||||
@@ -133,13 +126,13 @@ SELECT
|
||||
0
|
||||
)::INTEGER AS population_year,
|
||||
vc.padel_venue_count,
|
||||
ci.median_income_pps,
|
||||
ci.income_year,
|
||||
c.median_income_pps,
|
||||
c.income_year,
|
||||
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
|
||||
-- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.)
|
||||
COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id
|
||||
FROM venue_cities vc
|
||||
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
||||
LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code
|
||||
-- Eurostat EU population (via city code→name lookup)
|
||||
LEFT JOIN eurostat_pop ep
|
||||
ON vc.country_code = ep.country_code
|
||||
|
||||
@@ -0,0 +1,285 @@
|
||||
-- Conformed country dimension — single authoritative source for all country metadata.
|
||||
--
|
||||
-- Consolidates data previously duplicated across dim_cities and dim_locations:
|
||||
-- - country_name_en / country_slug (was: ~50-line CASE blocks in both models)
|
||||
-- - median_income_pps (was: country_income CTE in both models)
|
||||
-- - energy prices, labour costs, PLI indices (new — from Eurostat datasets)
|
||||
-- - cost override columns for the financial calculator
|
||||
--
|
||||
-- Used by: dim_cities, dim_locations, pseo_city_costs_de, planner_defaults.
|
||||
-- Grain: country_code (one row per ISO 3166-1 alpha-2 country code).
|
||||
-- Kind: FULL — small table (~40 rows), full refresh daily.
|
||||
--
|
||||
-- Cost override columns:
|
||||
-- NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping filters None).
|
||||
-- For DE (the baseline country) all overrides are NULL to preserve exact DEFAULTS.
|
||||
-- For countries missing Eurostat data, NULLs propagate naturally.
|
||||
-- camelCase column aliases match DEFAULTS keys for auto-mapping in content/__init__.py.
|
||||
--
|
||||
-- !! DE baseline values sourced from calculator.py DEFAULTS (web/src/padelnomics/planner/calculator.py).
|
||||
-- !! If DEFAULTS change, the hardcoded baseline values below must be updated to match.
|
||||
-- !! Search "DE baseline" in this file to find all affected lines.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_countries,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain country_code
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Latest income per country
|
||||
latest_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest electricity price per country (use most recent semi-annual period)
|
||||
latest_electricity AS (
|
||||
SELECT country_code, electricity_eur_kwh, ref_period
|
||||
FROM staging.stg_electricity_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest gas price per country
|
||||
latest_gas AS (
|
||||
SELECT country_code, gas_eur_gj, ref_period
|
||||
FROM staging.stg_gas_prices
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_period DESC) = 1
|
||||
),
|
||||
-- Latest labour cost per country
|
||||
latest_labour AS (
|
||||
SELECT country_code, labour_cost_eur_hour, ref_year
|
||||
FROM staging.stg_labour_costs
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Latest PLI per (country, category)
|
||||
latest_pli AS (
|
||||
SELECT country_code, category, pli, ref_year
|
||||
FROM staging.stg_price_levels
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code, category ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Pivot PLI categories into columns per country
|
||||
pli_pivoted AS (
|
||||
SELECT
|
||||
country_code,
|
||||
MAX(pli) FILTER (WHERE category = 'construction') AS construction,
|
||||
MAX(pli) FILTER (WHERE category = 'housing') AS housing,
|
||||
MAX(pli) FILTER (WHERE category = 'services') AS services,
|
||||
MAX(pli) FILTER (WHERE category = 'misc') AS misc,
|
||||
MAX(pli) FILTER (WHERE category = 'government') AS government
|
||||
FROM latest_pli
|
||||
GROUP BY country_code
|
||||
),
|
||||
-- DE baseline rows for ratio computation
|
||||
-- NULL-safe: if DE is missing from a source, ratios produce NULL (safe fallthrough).
|
||||
de_pli AS (
|
||||
SELECT construction, housing, services, misc, government
|
||||
FROM pli_pivoted WHERE country_code = 'DE'
|
||||
),
|
||||
de_elec AS (
|
||||
SELECT electricity_eur_kwh FROM latest_electricity WHERE country_code = 'DE'
|
||||
),
|
||||
de_gas AS (
|
||||
SELECT gas_eur_gj FROM latest_gas WHERE country_code = 'DE'
|
||||
),
|
||||
-- All distinct country codes from any source
|
||||
all_countries AS (
|
||||
SELECT country_code FROM latest_income
|
||||
UNION
|
||||
SELECT country_code FROM latest_electricity
|
||||
UNION
|
||||
SELECT country_code FROM latest_gas
|
||||
UNION
|
||||
SELECT country_code FROM latest_labour
|
||||
UNION
|
||||
SELECT country_code FROM pli_pivoted
|
||||
-- Ensure known padel markets appear even if Eurostat doesn't cover them yet
|
||||
UNION ALL
|
||||
SELECT unnest(['DE','ES','GB','FR','IT','PT','AT','CH','NL','BE','SE','NO','DK','FI',
|
||||
'US','AR','MX','AE','AU','IE']) AS country_code
|
||||
)
|
||||
SELECT
|
||||
ac.country_code,
|
||||
-- Country name and slug (single definition, replacing duplicated CASE blocks)
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END AS country_name_en,
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE ac.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE ac.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Income data
|
||||
i.median_income_pps,
|
||||
i.income_year,
|
||||
-- Raw energy and labour data (for reference / future staffed-scenario use)
|
||||
e.electricity_eur_kwh,
|
||||
g.gas_eur_gj,
|
||||
la.labour_cost_eur_hour,
|
||||
-- PLI indices per category (EU27=100)
|
||||
p.construction AS pli_construction,
|
||||
p.housing AS pli_housing,
|
||||
p.services AS pli_services,
|
||||
p.misc AS pli_misc,
|
||||
p.government AS pli_government,
|
||||
-- ── Calculator cost override columns ────────────────────────────────────
|
||||
-- NULL for DE = fall through to calculator.py DEFAULTS (safe: auto-mapping skips None).
|
||||
-- Formulas: country_value = DE_default × (country_price / DE_price)
|
||||
-- or DE_default × (country_PLI / DE_PLI)
|
||||
--
|
||||
-- OPEX overrides — energy (direct price ratio)
|
||||
-- DE baseline: electricity=600, heating=400 (see calculator.py DEFAULTS)
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(600.0 * (e.electricity_eur_kwh / de_e.electricity_eur_kwh), 0)
|
||||
END AS electricity,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (g.gas_eur_gj / de_g.gas_eur_gj), 0)
|
||||
END AS heating,
|
||||
-- OPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: rentSqm=4, water=125, outdoorRent=400
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4.0 * (p.housing / de_p.housing), 2)
|
||||
END AS rent_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(125.0 * (p.housing / de_p.housing), 0)
|
||||
END AS water,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(400.0 * (p.housing / de_p.housing), 0)
|
||||
END AS outdoor_rent,
|
||||
-- OPEX overrides — PLI-scaled (misc category)
|
||||
-- DE baseline: insurance=300
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.misc / de_p.misc), 0)
|
||||
END AS insurance,
|
||||
-- OPEX overrides — PLI-scaled (services category)
|
||||
-- DE baseline: cleaning=300, maintenance=300, marketing=350
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS cleaning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(300.0 * (p.services / de_p.services), 0)
|
||||
END AS maintenance,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(350.0 * (p.services / de_p.services), 0)
|
||||
END AS marketing,
|
||||
-- OPEX overrides — PLI-scaled (government category)
|
||||
-- DE baseline: propertyTax=250, permitsCompliance=12000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(250.0 * (p.government / de_p.government), 0)
|
||||
END AS property_tax,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.government / de_p.government), 0)
|
||||
END AS permits_compliance,
|
||||
-- CAPEX overrides — PLI-scaled (construction category)
|
||||
-- DE baseline: hallCostSqm=500, foundationSqm=150, hvac=100000, electrical=60000,
|
||||
-- sanitary=80000, parking=50000, fitout=40000, planning=100000,
|
||||
-- fireProtection=80000, floorPrep=12000, hvacUpgrade=20000,
|
||||
-- lightingUpgrade=10000, outdoorFoundation=35, outdoorSiteWork=8000,
|
||||
-- outdoorLighting=4000, outdoorFencing=6000, workingCapital=15000
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(500.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hall_cost_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(150.0 * (p.construction / de_p.construction), 0)
|
||||
END AS foundation_sqm,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS electrical,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS sanitary,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(50000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS parking,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(40000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fitout,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(100000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS planning,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(80000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS fire_protection,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(12000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS floor_prep,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(20000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS hvac_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(10000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS lighting_upgrade,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(35.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_foundation,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(8000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_site_work,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(4000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_lighting,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(6000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS outdoor_fencing,
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(15000.0 * (p.construction / de_p.construction), 0)
|
||||
END AS working_capital,
|
||||
-- CAPEX overrides — PLI-scaled (housing category)
|
||||
-- DE baseline: landPriceSqm=60
|
||||
CASE WHEN ac.country_code = 'DE' THEN NULL
|
||||
ELSE ROUND(60.0 * (p.housing / de_p.housing), 0)
|
||||
END AS land_price_sqm
|
||||
FROM (SELECT DISTINCT country_code FROM all_countries WHERE LENGTH(country_code) = 2) ac
|
||||
LEFT JOIN latest_income i ON ac.country_code = i.country_code
|
||||
LEFT JOIN latest_electricity e ON ac.country_code = e.country_code
|
||||
LEFT JOIN latest_gas g ON ac.country_code = g.country_code
|
||||
LEFT JOIN latest_labour la ON ac.country_code = la.country_code
|
||||
LEFT JOIN pli_pivoted p ON ac.country_code = p.country_code
|
||||
CROSS JOIN de_pli de_p
|
||||
CROSS JOIN de_elec de_e
|
||||
CROSS JOIN de_gas de_g
|
||||
-- Enforce grain
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY ac.country_code ORDER BY ac.country_code) = 1
|
||||
@@ -6,9 +6,9 @@
|
||||
-- covers all locations with population ≥ 1K so zero-court Gemeinden score fully.
|
||||
--
|
||||
-- Enriched with:
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_nuts2_boundaries + stg_regional_income → EU NUTS-2/NUTS-1 income (spatial join)
|
||||
-- stg_income_usa → US state-level income (PPS-normalised)
|
||||
-- stg_income → country-level income (fallback for all countries)
|
||||
-- stg_padel_courts → padel venue count + nearest court distance (km)
|
||||
-- stg_tennis_courts → tennis court count within 25km radius
|
||||
--
|
||||
@@ -16,7 +16,7 @@
|
||||
-- 1. EU NUTS-2 regional income (finest; spatial join via ST_Contains)
|
||||
-- 2. EU NUTS-1 regional income (fallback when NUTS-2 income missing from dataset)
|
||||
-- 3. US state income (ratio-normalised to PPS scale; see us_income CTE)
|
||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||
-- 4. Country-level income (global fallback from dim_countries / ilc_di03)
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||
@@ -49,12 +49,6 @@ locations AS (
|
||||
FROM staging.stg_population_geonames
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Country income (ilc_di03) — global fallback for all countries
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- ── EU NUTS-2 income via spatial join ──────────────────────────────────────
|
||||
-- Each EU location's (lon, lat) is matched against NUTS-2 boundary polygons.
|
||||
-- The bounding box pre-filter (bbox_lat/lon_min/max) eliminates most candidates
|
||||
@@ -214,10 +208,9 @@ tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
l.country_code,
|
||||
-- Human-readable country name (consistent with dim_cities)
|
||||
@country_name(l.country_code) AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
@country_slug(l.country_code) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
l.location_name,
|
||||
l.location_slug,
|
||||
l.lat,
|
||||
@@ -230,12 +223,12 @@ SELECT
|
||||
COALESCE(
|
||||
ri.regional_income_pps, -- EU: NUTS-2 (finest) or NUTS-1 (fallback)
|
||||
us.median_income_pps, -- US: state-level PPS-equivalent
|
||||
ci.median_income_pps -- Global: country-level from ilc_di03
|
||||
c.median_income_pps -- Global: country-level from dim_countries / ilc_di03
|
||||
) AS median_income_pps,
|
||||
COALESCE(
|
||||
ri.regional_income_year,
|
||||
us.income_year,
|
||||
ci.income_year
|
||||
c.income_year
|
||||
) AS income_year,
|
||||
COALESCE(pl.padel_venue_count, 0)::INTEGER AS padel_venue_count,
|
||||
-- Venues per 100K residents (NULL if population = 0)
|
||||
@@ -247,8 +240,8 @@ SELECT
|
||||
COALESCE(tn.tennis_courts_within_25km, 0)::INTEGER AS tennis_courts_within_25km,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM locations l
|
||||
LEFT JOIN country_income ci ON l.country_code = ci.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN foundation.dim_countries c ON l.country_code = c.country_code
|
||||
LEFT JOIN regional_income ri ON l.geoname_id = ri.geoname_id
|
||||
LEFT JOIN us_income us ON l.country_code = 'US'
|
||||
AND l.admin1_code = us.admin1_code
|
||||
LEFT JOIN nearest_padel np ON l.geoname_id = np.geoname_id
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
-- 2. Country-level: median across cities in same country
|
||||
-- 3. Hardcoded fallback: market research estimates (only when no Playtomic data)
|
||||
--
|
||||
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices) are
|
||||
-- included so the planner API pre-fills country-adjusted CAPEX/OPEX for all cities.
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
|
||||
--
|
||||
-- Units are explicit in column names. Monetary values in local currency.
|
||||
|
||||
MODEL (
|
||||
@@ -125,6 +129,37 @@ SELECT
|
||||
ELSE 0.2
|
||||
END AS data_confidence,
|
||||
COALESCE(cb.price_currency, ctb.price_currency, hf.currency, 'EUR') AS price_currency,
|
||||
-- Cost override columns (Eurostat PLI + energy prices via dim_countries).
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline).
|
||||
dc.electricity,
|
||||
dc.heating,
|
||||
dc.rent_sqm,
|
||||
dc.insurance,
|
||||
dc.cleaning,
|
||||
dc.maintenance,
|
||||
dc.marketing,
|
||||
dc.water,
|
||||
dc.property_tax,
|
||||
dc.outdoor_rent,
|
||||
dc.hall_cost_sqm,
|
||||
dc.foundation_sqm,
|
||||
dc.land_price_sqm,
|
||||
dc.hvac,
|
||||
dc.electrical,
|
||||
dc.sanitary,
|
||||
dc.parking,
|
||||
dc.fitout,
|
||||
dc.planning,
|
||||
dc.fire_protection,
|
||||
dc.floor_prep,
|
||||
dc.hvac_upgrade,
|
||||
dc.lighting_upgrade,
|
||||
dc.outdoor_foundation,
|
||||
dc.outdoor_site_work,
|
||||
dc.outdoor_lighting,
|
||||
dc.outdoor_fencing,
|
||||
dc.working_capital,
|
||||
dc.permits_compliance,
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM city_profiles cp
|
||||
LEFT JOIN city_benchmarks cb
|
||||
@@ -134,3 +169,5 @@ LEFT JOIN country_benchmarks ctb
|
||||
ON cp.country_code = ctb.country_code
|
||||
LEFT JOIN hardcoded_fallbacks hf
|
||||
ON cp.country_code = hf.country_code
|
||||
LEFT JOIN foundation.dim_countries dc
|
||||
ON cp.country_code = dc.country_code
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
--
|
||||
-- Calculator override columns use camelCase to match the DEFAULTS keys in
|
||||
-- planner/calculator.py, so they are auto-applied as calc pre-fills.
|
||||
--
|
||||
-- Cost override columns come from foundation.dim_countries (Eurostat PLI and energy
|
||||
-- price indices). NULL = fall through to calculator.py DEFAULTS (safe: auto-mapping
|
||||
-- filters None). DE always produces NULL overrides — preserves exact DEFAULTS behaviour.
|
||||
|
||||
MODEL (
|
||||
name serving.pseo_city_costs_de,
|
||||
@@ -44,6 +48,39 @@ SELECT
|
||||
FLOOR(p.courts_typical) AS "dblCourts",
|
||||
-- 'country' drives currency formatting in the calculator
|
||||
c.country_code AS "country",
|
||||
-- Cost override columns from dim_countries (Eurostat PLI + energy price indices).
|
||||
-- NULL = fall through to calculator.py DEFAULTS. DE always NULL (baseline preserved).
|
||||
-- OPEX overrides
|
||||
cc.electricity AS "electricity",
|
||||
cc.heating AS "heating",
|
||||
cc.rent_sqm AS "rentSqm",
|
||||
cc.insurance AS "insurance",
|
||||
cc.cleaning AS "cleaning",
|
||||
cc.maintenance AS "maintenance",
|
||||
cc.marketing AS "marketing",
|
||||
cc.water AS "water",
|
||||
cc.property_tax AS "propertyTax",
|
||||
cc.outdoor_rent AS "outdoorRent",
|
||||
-- CAPEX overrides
|
||||
cc.hall_cost_sqm AS "hallCostSqm",
|
||||
cc.foundation_sqm AS "foundationSqm",
|
||||
cc.land_price_sqm AS "landPriceSqm",
|
||||
cc.hvac AS "hvac",
|
||||
cc.electrical AS "electrical",
|
||||
cc.sanitary AS "sanitary",
|
||||
cc.parking AS "parking",
|
||||
cc.fitout AS "fitout",
|
||||
cc.planning AS "planning",
|
||||
cc.fire_protection AS "fireProtection",
|
||||
cc.floor_prep AS "floorPrep",
|
||||
cc.hvac_upgrade AS "hvacUpgrade",
|
||||
cc.lighting_upgrade AS "lightingUpgrade",
|
||||
cc.outdoor_foundation AS "outdoorFoundation",
|
||||
cc.outdoor_site_work AS "outdoorSiteWork",
|
||||
cc.outdoor_lighting AS "outdoorLighting",
|
||||
cc.outdoor_fencing AS "outdoorFencing",
|
||||
cc.working_capital AS "workingCapital",
|
||||
cc.permits_compliance AS "permitsCompliance",
|
||||
CURRENT_DATE AS refreshed_date
|
||||
FROM serving.city_market_profile c
|
||||
LEFT JOIN serving.planner_defaults p
|
||||
@@ -52,6 +89,8 @@ LEFT JOIN serving.planner_defaults p
|
||||
LEFT JOIN serving.location_opportunity_profile lop
|
||||
ON c.country_code = lop.country_code
|
||||
AND c.geoname_id = lop.geoname_id
|
||||
LEFT JOIN foundation.dim_countries cc
|
||||
ON c.country_code = cc.country_code
|
||||
-- Only cities with actual padel presence and at least some rate data
|
||||
WHERE c.padel_venue_count > 0
|
||||
AND (p.rate_peak IS NOT NULL OR c.median_peak_rate IS NOT NULL)
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Electricity prices for non-household consumers (Eurostat nrg_pc_205).
|
||||
-- EUR/kWh excluding taxes, band MWH500-1999 (medium-sized commercial consumer).
|
||||
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_205.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 0.1523}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_electricity_prices,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_period)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/nrg_pc_205.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRIM(r.ref_year) AS ref_period,
|
||||
TRY_CAST(r.value AS DOUBLE) AS electricity_eur_kwh
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_period,
|
||||
electricity_eur_kwh
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND electricity_eur_kwh > 0
|
||||
@@ -0,0 +1,42 @@
|
||||
-- Gas prices for non-household consumers (Eurostat nrg_pc_203).
|
||||
-- EUR/GJ excluding taxes, band GJ1000-9999 (medium-sized commercial consumer).
|
||||
-- Semi-annual frequency: ref_period is "YYYY-S1" or "YYYY-S2".
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/nrg_pc_203.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2024-S1", "value": 14.23}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_gas_prices,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_period)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/nrg_pc_203.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRIM(r.ref_year) AS ref_period,
|
||||
TRY_CAST(r.value AS DOUBLE) AS gas_eur_gj
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_period,
|
||||
gas_eur_gj
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND gas_eur_gj > 0
|
||||
@@ -0,0 +1,46 @@
|
||||
-- Labour cost levels EUR/hour (Eurostat lc_lci_lev).
|
||||
-- NACE R2 sector N (administrative and support service activities).
|
||||
-- D1_D2_A_HW structure: wages + non-wage costs, actual hours worked.
|
||||
-- Annual frequency.
|
||||
--
|
||||
-- Stored for future "staffed scenario" calculator variant.
|
||||
-- Not wired into default calculator overrides (staff=0 is a business assumption).
|
||||
--
|
||||
-- Source: data/landing/eurostat/{year}/{month}/lc_lci_lev.json.gz
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 28.4}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_labour_costs,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, ref_year)
|
||||
);
|
||||
|
||||
WITH source AS (
|
||||
SELECT unnest(rows) AS r
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/lc_lci_lev.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
|
||||
TRY_CAST(r.value AS DOUBLE) AS labour_cost_eur_hour
|
||||
FROM source
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
ref_year,
|
||||
labour_cost_eur_hour
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND labour_cost_eur_hour > 0
|
||||
@@ -0,0 +1,96 @@
|
||||
-- Price level indices relative to EU27=100 (Eurostat prc_ppp_ind).
|
||||
-- Five categories, each from a separate landing file (different ppp_cat filters).
|
||||
-- Annual frequency.
|
||||
--
|
||||
-- Categories and what they scale in the calculator:
|
||||
-- construction — CAPEX: hallCostSqm, foundationSqm, hvac, electrical, sanitary, etc.
|
||||
-- housing — rentSqm, landPriceSqm, water, outdoorRent
|
||||
-- services — cleaning, maintenance, marketing
|
||||
-- misc — insurance
|
||||
-- government — permitsCompliance, propertyTax
|
||||
--
|
||||
-- Sources:
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_construction.json.gz (ppp_cat: A050202)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_housing.json.gz (ppp_cat: A0104)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_services.json.gz (ppp_cat: P0201)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_misc.json.gz (ppp_cat: A0112)
|
||||
-- data/landing/eurostat/*/*/prc_ppp_ind_government.json.gz (ppp_cat: P0202)
|
||||
--
|
||||
-- Format: {"rows": [{"geo_code": "DE", "ref_year": "2022", "value": 107.3}, ...]}
|
||||
|
||||
MODEL (
|
||||
name staging.stg_price_levels,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain (country_code, category, ref_year)
|
||||
);
|
||||
|
||||
WITH construction_raw AS (
|
||||
SELECT unnest(rows) AS r, 'construction' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_construction.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
housing_raw AS (
|
||||
SELECT unnest(rows) AS r, 'housing' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_housing.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
services_raw AS (
|
||||
SELECT unnest(rows) AS r, 'services' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_services.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
misc_raw AS (
|
||||
SELECT unnest(rows) AS r, 'misc' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_misc.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
government_raw AS (
|
||||
SELECT unnest(rows) AS r, 'government' AS category
|
||||
FROM read_json(
|
||||
@LANDING_DIR || '/eurostat/*/*/prc_ppp_ind_government.json.gz',
|
||||
auto_detect = true
|
||||
)
|
||||
),
|
||||
all_raw AS (
|
||||
SELECT r, category FROM construction_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM housing_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM services_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM misc_raw
|
||||
UNION ALL
|
||||
SELECT r, category FROM government_raw
|
||||
),
|
||||
parsed AS (
|
||||
SELECT
|
||||
UPPER(TRIM(r.geo_code)) AS geo_code,
|
||||
TRY_CAST(r.ref_year AS INTEGER) AS ref_year,
|
||||
TRY_CAST(r.value AS DOUBLE) AS pli,
|
||||
category
|
||||
FROM all_raw
|
||||
WHERE r.value IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
-- Normalise to ISO 3166-1 alpha-2: EL→GR, UK→GB
|
||||
CASE geo_code
|
||||
WHEN 'EL' THEN 'GR'
|
||||
WHEN 'UK' THEN 'GB'
|
||||
ELSE geo_code
|
||||
END AS country_code,
|
||||
category,
|
||||
ref_year,
|
||||
pli
|
||||
FROM parsed
|
||||
WHERE LENGTH(geo_code) = 2
|
||||
AND geo_code NOT IN ('EU', 'EA', 'EU27_2020')
|
||||
AND pli > 0
|
||||
Reference in New Issue
Block a user