feat(transform): individualise article costs with per-country Eurostat data
Add real per-country cost data to ~30 calculator fields so pSEO articles show country-specific CAPEX/OPEX instead of hardcoded DE defaults. Extractor: - eurostat.py: add 8 new datasets (nrg_pc_205, nrg_pc_203, lc_lci_lev, 5×prc_ppp_ind variants); add optional `dataset_code` field so multiple dict entries can share one Eurostat API endpoint Staging (4 new models): - stg_electricity_prices — EUR/kWh by country, semi-annual - stg_gas_prices — EUR/GJ by country, semi-annual - stg_labour_costs — EUR/hour by country, annual (future staffed scenario) - stg_price_levels — PLI indices (EU27=100) for 5 categories, annual Foundation: - dim_countries (new) — conformed country dimension; eliminates ~50-line CASE blocks duplicated in dim_cities/dim_locations; computes ~29 calculator cost override columns from PLI ratios and energy price ratios vs DE baseline; NULL for DE so calculator falls through to DEFAULTS unchanged - dim_cities — replace country_name/slug CASE blocks + country_income CTE with JOIN dim_countries - dim_locations — same refactor as dim_cities Serving: - pseo_city_costs_de — JOIN dim_countries; add 29 camelCase override columns auto-applied by calculator (electricity, heating, rentSqm, hallCostSqm, …) - planner_defaults — JOIN dim_countries; same 29 cost columns flow through to /api/market-data endpoint Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
||||
-- Integrates four sources:
|
||||
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
||||
-- stg_income → country-level median income (Eurostat)
|
||||
-- foundation.dim_countries → country_name_en, country_slug, median_income_pps
|
||||
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
|
||||
-- stg_population → Eurostat city-level population (EU, joined via city code)
|
||||
-- stg_population_usa → US Census ACS place population
|
||||
@@ -42,12 +42,6 @@ venue_cities AS (
|
||||
WHERE city IS NOT NULL AND LENGTH(city) > 0
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Latest country income per country
|
||||
country_income AS (
|
||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||
FROM staging.stg_income
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Eurostat EU population: join city labels (code→name) with population values.
|
||||
-- QUALIFY keeps only the most recent year per (country, city name).
|
||||
eurostat_pop AS (
|
||||
@@ -109,56 +103,9 @@ SELECT
|
||||
vc.country_code,
|
||||
vc.city_slug,
|
||||
vc.city_name,
|
||||
-- Human-readable country name for pSEO templates and internal linking
|
||||
CASE vc.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE vc.country_code
|
||||
END AS country_name_en,
|
||||
-- URL-safe country slug
|
||||
LOWER(REGEXP_REPLACE(
|
||||
CASE vc.country_code
|
||||
WHEN 'DE' THEN 'Germany'
|
||||
WHEN 'ES' THEN 'Spain'
|
||||
WHEN 'GB' THEN 'United Kingdom'
|
||||
WHEN 'FR' THEN 'France'
|
||||
WHEN 'IT' THEN 'Italy'
|
||||
WHEN 'PT' THEN 'Portugal'
|
||||
WHEN 'AT' THEN 'Austria'
|
||||
WHEN 'CH' THEN 'Switzerland'
|
||||
WHEN 'NL' THEN 'Netherlands'
|
||||
WHEN 'BE' THEN 'Belgium'
|
||||
WHEN 'SE' THEN 'Sweden'
|
||||
WHEN 'NO' THEN 'Norway'
|
||||
WHEN 'DK' THEN 'Denmark'
|
||||
WHEN 'FI' THEN 'Finland'
|
||||
WHEN 'US' THEN 'United States'
|
||||
WHEN 'AR' THEN 'Argentina'
|
||||
WHEN 'MX' THEN 'Mexico'
|
||||
WHEN 'AE' THEN 'UAE'
|
||||
WHEN 'AU' THEN 'Australia'
|
||||
WHEN 'IE' THEN 'Ireland'
|
||||
ELSE vc.country_code
|
||||
END, '[^a-zA-Z0-9]+', '-'
|
||||
)) AS country_slug,
|
||||
-- Human-readable country name and slug — from dim_countries (single source of truth)
|
||||
c.country_name_en,
|
||||
c.country_slug,
|
||||
vc.centroid_lat AS lat,
|
||||
vc.centroid_lon AS lon,
|
||||
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames string > GeoNames spatial > 0.
|
||||
@@ -180,13 +127,13 @@ SELECT
|
||||
0
|
||||
)::INTEGER AS population_year,
|
||||
vc.padel_venue_count,
|
||||
ci.median_income_pps,
|
||||
ci.income_year,
|
||||
c.median_income_pps,
|
||||
c.income_year,
|
||||
-- GeoNames ID: FK to dim_locations / location_opportunity_profile.
|
||||
-- String match preferred; spatial fallback used when name doesn't match (Milano→Milan, etc.)
|
||||
COALESCE(gn.geoname_id, gs.spatial_geoname_id) AS geoname_id
|
||||
FROM venue_cities vc
|
||||
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
||||
LEFT JOIN foundation.dim_countries c ON vc.country_code = c.country_code
|
||||
-- Eurostat EU population (via city code→name lookup)
|
||||
LEFT JOIN eurostat_pop ep
|
||||
ON vc.country_code = ep.country_code
|
||||
|
||||
Reference in New Issue
Block a user