diff --git a/extract/padelnomics_extract/pyproject.toml b/extract/padelnomics_extract/pyproject.toml index da52449..2b52b8e 100644 --- a/extract/padelnomics_extract/pyproject.toml +++ b/extract/padelnomics_extract/pyproject.toml @@ -15,6 +15,10 @@ extract-eurostat = "padelnomics_extract.eurostat:main" extract-playtomic-tenants = "padelnomics_extract.playtomic_tenants:main" extract-playtomic-availability = "padelnomics_extract.playtomic_availability:main" extract-playtomic-recheck = "padelnomics_extract.playtomic_availability:main_recheck" +extract-eurostat-city-labels = "padelnomics_extract.eurostat_city_labels:main" +extract-census-usa = "padelnomics_extract.census_usa:main" +extract-ons-uk = "padelnomics_extract.ons_uk:main" +extract-geonames = "padelnomics_extract.geonames:main" [build-system] requires = ["hatchling"] diff --git a/extract/padelnomics_extract/src/padelnomics_extract/all.py b/extract/padelnomics_extract/src/padelnomics_extract/all.py index 3004753..94db5ab 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/all.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/all.py @@ -5,8 +5,16 @@ Each extractor gets its own state tracking row in .state.sqlite. """ from ._shared import run_extractor, setup_logging +from .census_usa import EXTRACTOR_NAME as CENSUS_USA_NAME +from .census_usa import extract as extract_census_usa from .eurostat import EXTRACTOR_NAME as EUROSTAT_NAME from .eurostat import extract as extract_eurostat +from .eurostat_city_labels import EXTRACTOR_NAME as EUROSTAT_CITY_LABELS_NAME +from .eurostat_city_labels import extract as extract_eurostat_city_labels +from .geonames import EXTRACTOR_NAME as GEONAMES_NAME +from .geonames import extract as extract_geonames +from .ons_uk import EXTRACTOR_NAME as ONS_UK_NAME +from .ons_uk import extract as extract_ons_uk from .overpass import EXTRACTOR_NAME as OVERPASS_NAME from .overpass import extract as extract_overpass from .playtomic_availability import EXTRACTOR_NAME as AVAILABILITY_NAME @@ -19,6 +27,10 @@ logger = setup_logging("padelnomics.extract") EXTRACTORS = [ (OVERPASS_NAME, extract_overpass), (EUROSTAT_NAME, extract_eurostat), + (EUROSTAT_CITY_LABELS_NAME, extract_eurostat_city_labels), + (CENSUS_USA_NAME, extract_census_usa), + (ONS_UK_NAME, extract_ons_uk), + (GEONAMES_NAME, extract_geonames), (TENANTS_NAME, extract_tenants), (AVAILABILITY_NAME, extract_availability), ] diff --git a/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py b/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py new file mode 100644 index 0000000..1083c38 --- /dev/null +++ b/extract/padelnomics_extract/src/padelnomics_extract/census_usa.py @@ -0,0 +1,139 @@ +"""US Census Bureau ACS 5-year population extractor. + +Fetches city-level (Census place) population from the American Community Survey +5-year estimates. Requires a free API key from api.census.gov. + +Env var: CENSUS_API_KEY (register free at https://api.census.gov/data/key_signup.html) + +Landing: {LANDING_DIR}/census_usa/{year}/{month}/acs5_places.json.gz +Output: {"rows": [{"city_name": "Los Angeles", "state_fips": "06", + "place_fips": "0644000", "population": 3990456, + "ref_year": 2023, "country_code": "US"}], "count": N} +""" + +import json +import os +import sqlite3 +from pathlib import Path + +import niquests + +from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging +from .utils import get_last_cursor, landing_path, write_gzip_atomic + +logger = setup_logging("padelnomics.extract.census_usa") + +EXTRACTOR_NAME = "census_usa" + +# ACS 5-year estimates, 2023 vintage — refreshed annually by Census Bureau. +# B01003_001E = total population; NAME = place name + state. +ACS_URL = ( + "https://api.census.gov/data/2023/acs/acs5" + "?get=B01003_001E,NAME&for=place:*&in=state:*" +) + +REF_YEAR = 2023 +MIN_POPULATION = 50_000 +MAX_RETRIES = 2 + + +def _parse_city_name(full_name: str) -> str: + """Extract city name from Census place name. + + Examples: + 'Los Angeles city, California' → 'Los Angeles' + 'New York city, New York' → 'New York' + 'Miami city, Florida' → 'Miami' + """ + # Take everything before the first comma + before_comma = full_name.split(",")[0].strip() + # Strip common suffixes: ' city', ' town', ' CDP', ' borough', ' village' + for suffix in (" city", " town", " CDP", " borough", " village", " municipality"): + if before_comma.lower().endswith(suffix): + before_comma = before_comma[: -len(suffix)].strip() + break + return before_comma + + +def extract( + landing_dir: Path, + year_month: str, + conn: sqlite3.Connection, + session: niquests.Session, +) -> dict: + """Fetch ACS 5-year place population. Skips if already run this month.""" + api_key = os.environ.get("CENSUS_API_KEY", "").strip() + if not api_key: + logger.warning("CENSUS_API_KEY not set — skipping US Census extract") + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + # Skip if we already have data for this month (annual data, monthly cursor) + last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) + if last_cursor == year_month: + logger.info("already have data for %s — skipping", year_month) + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + year, month = year_month.split("/") + url = f"{ACS_URL}&key={api_key}" + + logger.info("GET ACS 5-year places (vintage %d)", REF_YEAR) + resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 2) + resp.raise_for_status() + + raw = resp.json() + assert isinstance(raw, list) and len(raw) > 1, "ACS response must be a non-empty list" + + # First row is headers: ["B01003_001E", "NAME", "state", "place"] + headers = raw[0] + assert "B01003_001E" in headers, f"Population column missing from ACS response: {headers}" + pop_idx = headers.index("B01003_001E") + name_idx = headers.index("NAME") + state_idx = headers.index("state") + place_idx = headers.index("place") + + rows: list[dict] = [] + for row in raw[1:]: + try: + population = int(row[pop_idx]) + except (ValueError, TypeError): + continue + if population < MIN_POPULATION: + continue + full_name = row[name_idx] + city_name = _parse_city_name(full_name) + if not city_name: + continue + state_fips = row[state_idx] + place_fips = state_fips + row[place_idx] + rows.append({ + "city_name": city_name, + "state_fips": state_fips, + "place_fips": place_fips, + "population": population, + "ref_year": REF_YEAR, + "country_code": "US", + }) + + assert len(rows) > 500, f"Expected >500 US cities ≥50K pop, got {len(rows)} — parse may have failed" + logger.info("parsed %d US cities with population ≥%d", len(rows), MIN_POPULATION) + + dest_dir = landing_path(landing_dir, "census_usa", year, month) + dest = dest_dir / "acs5_places.json.gz" + payload = json.dumps({"rows": rows, "count": len(rows)}).encode() + bytes_written = write_gzip_atomic(dest, payload) + logger.info("written %s bytes compressed", f"{bytes_written:,}") + + return { + "files_written": 1, + "files_skipped": 0, + "bytes_written": bytes_written, + "cursor_value": year_month, + } + + +def main() -> None: + run_extractor(EXTRACTOR_NAME, extract) + + +if __name__ == "__main__": + main() diff --git a/extract/padelnomics_extract/src/padelnomics_extract/eurostat_city_labels.py b/extract/padelnomics_extract/src/padelnomics_extract/eurostat_city_labels.py new file mode 100644 index 0000000..02e4c73 --- /dev/null +++ b/extract/padelnomics_extract/src/padelnomics_extract/eurostat_city_labels.py @@ -0,0 +1,123 @@ +"""Eurostat SDMX city codelist extractor — city_code → city_name mapping. + +The Eurostat Urban Audit population dataset (urb_cpop1) uses coded city identifiers +(e.g. DE001C = Berlin) with no city name column. This extractor fetches the SDMX +codelist that maps those codes to human-readable names, enabling stg_city_labels to +join population data to dim_cities (which has names, not codes). + +The codelist changes very rarely so ETag dedup means most runs produce a 304 skip. + +Landing: {LANDING_DIR}/eurostat_city_labels/{year}/{month}/cities_codelist.json.gz +Output: {"rows": [{"city_code": "DE001C", "city_name": "Berlin"}, ...], "count": N} +""" + +import json +import sqlite3 +from pathlib import Path + +import niquests + +from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging +from .utils import landing_path, write_gzip_atomic + +logger = setup_logging("padelnomics.extract.eurostat_city_labels") + +EXTRACTOR_NAME = "eurostat_city_labels" + +# SDMX codelist endpoint — returns the full CITIES dimension codes with labels +# format=JSON gives a compact JSON-stat-like structure for the codelist +CODELIST_URL = ( + "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/CITIES" + "?format=JSON&lang=EN" +) + + +def _parse_sdmx_codelist(data: dict) -> list[dict]: + """Extract city_code → city_name pairs from SDMX codelist JSON response. + + The SDMX 2.1 JSON structure varies by endpoint. This endpoint returns a + structure.codelists[0].codes list where each code has id and name[0].name. + """ + try: + codelists = data["structure"]["codelists"] + except (KeyError, TypeError) as e: + raise ValueError(f"Unexpected SDMX structure — missing codelists: {e}") from e + + assert len(codelists) > 0, "SDMX response has empty codelists array" + + codes = codelists[0].get("codes", []) + assert len(codes) > 0, "SDMX codelist has no codes — API response may have changed" + + rows: list[dict] = [] + for code in codes: + city_code = code.get("id", "").strip() + if not city_code: + continue + # Name is a list of {lang, name} objects; pick the first (EN requested above) + names = code.get("name", []) + if isinstance(names, list) and names: + city_name = names[0].get("name", "").strip() + elif isinstance(names, str): + city_name = names.strip() + else: + continue + if city_name: + rows.append({"city_code": city_code, "city_name": city_name}) + + return rows + + +def _etag_path(dest: Path) -> Path: + return dest.parent / (dest.name + ".etag") + + +def extract( + landing_dir: Path, + year_month: str, + conn: sqlite3.Connection, + session: niquests.Session, +) -> dict: + """Fetch Eurostat CITIES codelist with ETag dedup. Returns run metrics.""" + year, month = year_month.split("/") + dest_dir = landing_path(landing_dir, "eurostat_city_labels", year, month) + dest = dest_dir / "cities_codelist.json.gz" + etag_file = _etag_path(dest) + + headers: dict[str, str] = {} + if etag_file.exists(): + headers["If-None-Match"] = etag_file.read_text().strip() + + logger.info("GET CITIES codelist") + resp = session.get(CODELIST_URL, headers=headers, timeout=HTTP_TIMEOUT_SECONDS) + + if resp.status_code == 304: + logger.info("CITIES codelist not modified (304)") + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + resp.raise_for_status() + + rows = _parse_sdmx_codelist(resp.json()) + assert len(rows) > 100, f"Expected >100 city codes, got {len(rows)} — parse may have failed" + + payload = json.dumps({"rows": rows, "count": len(rows)}).encode() + bytes_written = write_gzip_atomic(dest, payload) + logger.info("written %d city codes (%s bytes compressed)", len(rows), f"{bytes_written:,}") + + if etag := resp.headers.get("etag"): + etag_file.parent.mkdir(parents=True, exist_ok=True) + etag_file.write_text(etag) + + return { + "files_written": 1, + "files_skipped": 0, + "bytes_written": bytes_written, + "cursor_value": year_month, + } + + +def main() -> None: + run_extractor(EXTRACTOR_NAME, extract) + + +if __name__ == "__main__": + main() diff --git a/extract/padelnomics_extract/src/padelnomics_extract/geonames.py b/extract/padelnomics_extract/src/padelnomics_extract/geonames.py new file mode 100644 index 0000000..e648053 --- /dev/null +++ b/extract/padelnomics_extract/src/padelnomics_extract/geonames.py @@ -0,0 +1,157 @@ +"""GeoNames global city population extractor. + +Downloads the cities15000.zip bulk file (~1.5MB compressed, ~26K entries) from +GeoNames and filters to cities with population ≥ 50,000 and feature codes in +{PPLA, PPLA2, PPLC, PPL} (populated places, avoiding parks, airports, etc.). + +Used as the global fallback for population when Eurostat/Census/ONS don't cover +a country. Padel is expanding globally so this catches UAE, Australia, Argentina, etc. + +Requires: GEONAMES_USERNAME env var (free registration at geonames.org) + +Landing: {LANDING_DIR}/geonames/{year}/{month}/cities_global.json.gz +Output: {"rows": [{"geoname_id": 2950159, "city_name": "Berlin", + "country_code": "DE", "population": 3644826, + "ref_year": 2024}], "count": N} +""" + +import io +import json +import os +import sqlite3 +import zipfile +from pathlib import Path + +import niquests + +from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging +from .utils import get_last_cursor, landing_path, write_gzip_atomic + +logger = setup_logging("padelnomics.extract.geonames") + +EXTRACTOR_NAME = "geonames" + +DOWNLOAD_URL = "https://download.geonames.org/export/dump/cities15000.zip" + +# Only populated place feature codes — excludes airports, parks, admin areas, etc. +# PPLC = capital of a political entity +# PPLA = seat of a first-order administrative division +# PPLA2 = seat of a second-order admin division +# PPL = populated place +VALID_FEATURE_CODES = {"PPLC", "PPLA", "PPLA2", "PPL"} + +MIN_POPULATION = 50_000 + +# GeoNames tab-separated column layout for cities15000.txt +# https://download.geonames.org/export/dump/readme.txt +COL_GEONAME_ID = 0 +COL_NAME = 1 +COL_ASCIINAME = 2 +COL_COUNTRY_CODE = 8 +COL_FEATURE_CODE = 7 +COL_POPULATION = 14 +COL_MODIFICATION_DATE = 18 + +# Approximate year of last data update (GeoNames doesn't provide a precise vintage) +REF_YEAR = 2024 + + +def _parse_cities_txt(content: bytes) -> list[dict]: + """Parse GeoNames cities TSV into filtered rows.""" + rows: list[dict] = [] + for line in content.decode("utf-8").splitlines(): + if not line.strip(): + continue + parts = line.split("\t") + if len(parts) < 15: + continue + feature_code = parts[COL_FEATURE_CODE].strip() + if feature_code not in VALID_FEATURE_CODES: + continue + try: + population = int(parts[COL_POPULATION]) + except (ValueError, IndexError): + continue + if population < MIN_POPULATION: + continue + geoname_id_str = parts[COL_GEONAME_ID].strip() + try: + geoname_id = int(geoname_id_str) + except ValueError: + continue + # Prefer ASCII name for matching (avoids diacritic mismatch); fall back to name + ascii_name = parts[COL_ASCIINAME].strip() + name = parts[COL_NAME].strip() + city_name = ascii_name if ascii_name else name + country_code = parts[COL_COUNTRY_CODE].strip().upper() + if not city_name or not country_code: + continue + rows.append({ + "geoname_id": geoname_id, + "city_name": city_name, + "country_code": country_code, + "population": population, + "ref_year": REF_YEAR, + }) + return rows + + +def extract( + landing_dir: Path, + year_month: str, + conn: sqlite3.Connection, + session: niquests.Session, +) -> dict: + """Download GeoNames cities15000.zip. Skips if already run this month.""" + username = os.environ.get("GEONAMES_USERNAME", "").strip() + if not username: + logger.warning("GEONAMES_USERNAME not set — skipping GeoNames extract") + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) + if last_cursor == year_month: + logger.info("already have data for %s — skipping", year_month) + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + year, month = year_month.split("/") + + # GeoNames bulk downloads don't require the username in the URL for cities15000.zip, + # but the username signals acceptance of their terms of use and helps their monitoring. + url = f"{DOWNLOAD_URL}?username={username}" + logger.info("GET cities15000.zip (~1.5MB compressed)") + resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 4) + resp.raise_for_status() + + assert len(resp.content) > 100_000, ( + f"cities15000.zip too small ({len(resp.content)} bytes) — download may have failed" + ) + + with zipfile.ZipFile(io.BytesIO(resp.content)) as zf: + txt_name = next((n for n in zf.namelist() if n.endswith(".txt")), None) + assert txt_name, f"No .txt file in cities15000.zip: {zf.namelist()}" + txt_content = zf.read(txt_name) + + rows = _parse_cities_txt(txt_content) + assert len(rows) > 5_000, f"Expected >5000 global cities ≥50K pop, got {len(rows)}" + logger.info("parsed %d global cities with population ≥%d", len(rows), MIN_POPULATION) + + dest_dir = landing_path(landing_dir, "geonames", year, month) + dest = dest_dir / "cities_global.json.gz" + payload = json.dumps({"rows": rows, "count": len(rows)}).encode() + bytes_written = write_gzip_atomic(dest, payload) + logger.info("written %s bytes compressed", f"{bytes_written:,}") + + return { + "files_written": 1, + "files_skipped": 0, + "bytes_written": bytes_written, + "cursor_value": year_month, + } + + +def main() -> None: + run_extractor(EXTRACTOR_NAME, extract) + + +if __name__ == "__main__": + main() diff --git a/extract/padelnomics_extract/src/padelnomics_extract/ons_uk.py b/extract/padelnomics_extract/src/padelnomics_extract/ons_uk.py new file mode 100644 index 0000000..5e49783 --- /dev/null +++ b/extract/padelnomics_extract/src/padelnomics_extract/ons_uk.py @@ -0,0 +1,153 @@ +"""ONS (Office for National Statistics) UK population extractor. + +Fetches 2021 Census population by Local Authority District (LAD) from the ONS +beta API. No authentication required. + +Landing: {LANDING_DIR}/ons_uk/{year}/{month}/lad_population.json.gz +Output: {"rows": [{"lad_code": "E08000003", "lad_name": "Manchester", + "population": 553230, "ref_year": 2021, + "country_code": "GB"}], "count": N} +""" + +import json +import sqlite3 +from pathlib import Path + +import niquests + +from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging +from .utils import get_last_cursor, landing_path, write_gzip_atomic + +logger = setup_logging("padelnomics.extract.ons_uk") + +EXTRACTOR_NAME = "ons_uk" + +# ONS beta API — 2021 Census population estimates by Local Authority District. +# TS007A = "Age by single year" dataset; aggregate gives total population per LAD. +# We use the observations endpoint which returns flat rows. +# limit=500 covers all ~380 LADs in England, Wales, Scotland, and Northern Ireland. +ONS_BASE_URL = ( + "https://api.beta.ons.gov.uk/v1/datasets/TS007A/editions/2021/versions/1" +) + +REF_YEAR = 2021 +MIN_POPULATION = 50_000 +# ONS rate limit is 120 requests per 10 seconds; a single paginated call is fine. +PAGE_SIZE = 500 +MAX_PAGES = 10 # safety bound; all LADs fit in page 1 at limit=500 + + +def _fetch_all_observations(session: niquests.Session) -> list[dict]: + """Fetch all LAD population rows, paginating if needed.""" + rows: list[dict] = [] + offset = 0 + + for page in range(MAX_PAGES): + url = f"{ONS_BASE_URL}/observations?geography=*&age=0&limit={PAGE_SIZE}&offset={offset}" + resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS) + resp.raise_for_status() + data = resp.json() + + observations = data.get("observations", []) + if not observations: + break + + for obs in observations: + # Each observation: {dimensions: [{id: "geography", option: {id: "E08000003", label: "Manchester"}}...], observation: "553230"} + geo_dim = next( + (d for d in obs.get("dimensions", []) if d.get("dimension_id") == "geography"), + None, + ) + if not geo_dim: + continue + lad_code = geo_dim.get("option", {}).get("id", "").strip() + lad_name = geo_dim.get("option", {}).get("label", "").strip() + if not lad_code or not lad_name: + continue + try: + population = int(obs.get("observation", "0").replace(",", "")) + except (ValueError, TypeError): + continue + rows.append({ + "lad_code": lad_code, + "lad_name": lad_name, + "population": population, + }) + + total = data.get("total_observations", len(rows)) + offset += len(observations) + if offset >= total: + break + + logger.info("fetched page %d (%d rows so far)", page + 1, len(rows)) + + return rows + + +def _aggregate_by_lad(raw_rows: list[dict]) -> list[dict]: + """Sum population across all age groups per LAD. + + TS007A breaks population down by single year of age, so we need to aggregate. + """ + totals: dict[str, dict] = {} + for row in raw_rows: + key = row["lad_code"] + if key not in totals: + totals[key] = {"lad_code": row["lad_code"], "lad_name": row["lad_name"], "population": 0} + totals[key]["population"] += row["population"] + return list(totals.values()) + + +def extract( + landing_dir: Path, + year_month: str, + conn: sqlite3.Connection, + session: niquests.Session, +) -> dict: + """Fetch ONS LAD population. Skips if already run this month.""" + last_cursor = get_last_cursor(conn, EXTRACTOR_NAME) + if last_cursor == year_month: + logger.info("already have data for %s — skipping", year_month) + return {"files_written": 0, "files_skipped": 1, "bytes_written": 0} + + year, month = year_month.split("/") + + logger.info("GET ONS TS007A LAD population (2021 Census)") + raw_rows = _fetch_all_observations(session) + lad_rows = _aggregate_by_lad(raw_rows) + + filtered = [ + { + "lad_code": r["lad_code"], + "lad_name": r["lad_name"], + "population": r["population"], + "ref_year": REF_YEAR, + "country_code": "GB", + } + for r in lad_rows + if r["population"] >= MIN_POPULATION + ] + + assert len(filtered) > 50, f"Expected >50 UK LADs ≥50K pop, got {len(filtered)}" + logger.info("parsed %d UK LADs with population ≥%d", len(filtered), MIN_POPULATION) + + dest_dir = landing_path(landing_dir, "ons_uk", year, month) + dest = dest_dir / "lad_population.json.gz" + payload = json.dumps({"rows": filtered, "count": len(filtered)}).encode() + bytes_written = write_gzip_atomic(dest, payload) + logger.info("written %s bytes compressed", f"{bytes_written:,}") + + return { + "files_written": 1, + "files_skipped": 0, + "bytes_written": bytes_written, + "cursor_value": year_month, + } + + +def main() -> None: + run_extractor(EXTRACTOR_NAME, extract) + + +if __name__ == "__main__": + main() diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql index 8f28aa4..01be95b 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql @@ -3,14 +3,17 @@ -- tracks cities where padel venues actually exist, not an administrative city list. -- -- Conformed dimension: used by city_market_profile and all pSEO serving models. --- Integrates two sources: --- dim_venues → city list, venue count, coordinates (Playtomic + OSM) --- stg_income → country-level median income (Eurostat) +-- Integrates four sources: +-- dim_venues → city list, venue count, coordinates (Playtomic + OSM) +-- stg_income → country-level median income (Eurostat) +-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities) +-- stg_population → Eurostat city-level population (EU, joined via city code) +-- stg_population_usa → US Census ACS place population +-- stg_population_uk → ONS LAD population +-- stg_population_geonames → GeoNames global fallback -- --- Population note: Eurostat uses coded identifiers (e.g. DE001C = Berlin) with no --- city name column in the dataset we extract. City-level population requires a --- separate code→name lookup extract (future improvement). Population is set to 0 --- until that source is available; market_score degrades gracefully. +-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0. +-- City name matching is case/whitespace-insensitive within each country. -- -- Grain: (country_code, city_slug) — two cities in different countries can share a -- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs. @@ -42,6 +45,39 @@ country_income AS ( SELECT country_code, median_income_pps, ref_year AS income_year FROM staging.stg_income QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1 +), +-- Eurostat EU population: join city labels (code→name) with population values. +-- QUALIFY keeps only the most recent year per (country, city name). +eurostat_pop AS ( + SELECT + cl.country_code, + cl.city_name, + p.population, + p.ref_year + FROM staging.stg_city_labels cl + JOIN staging.stg_population p ON cl.city_code = p.city_code + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY cl.country_code, cl.city_name + ORDER BY p.ref_year DESC + ) = 1 +), +-- US Census ACS population (place-level, filtered to ≥50K) +us_pop AS ( + SELECT city_name, country_code, population, ref_year + FROM staging.stg_population_usa + QUALIFY ROW_NUMBER() OVER (PARTITION BY place_fips ORDER BY ref_year DESC) = 1 +), +-- ONS UK Local Authority District population +uk_pop AS ( + SELECT lad_name AS city_name, country_code, population, ref_year + FROM staging.stg_population_uk + QUALIFY ROW_NUMBER() OVER (PARTITION BY lad_code ORDER BY ref_year DESC) = 1 +), +-- GeoNames global fallback (all cities ≥50K) +geonames_pop AS ( + SELECT city_name, country_code, population, ref_year + FROM staging.stg_population_geonames + QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1 ) SELECT vc.country_code, @@ -99,15 +135,43 @@ SELECT )) AS country_slug, vc.centroid_lat AS lat, vc.centroid_lon AS lon, - -- Population: requires code→name Eurostat lookup (not yet extracted); defaults to 0. - -- market_score uses LOG(GREATEST(population, 1)) so 0 degrades score gracefully. - 0::BIGINT AS population, - 0::INTEGER AS population_year, + -- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0. + -- City name match is case/whitespace-insensitive within each country. + COALESCE( + ep.population, + usa.population, + uk.population, + gn.population, + 0 + )::BIGINT AS population, + COALESCE( + ep.ref_year, + usa.ref_year, + uk.ref_year, + gn.ref_year, + 0 + )::INTEGER AS population_year, vc.padel_venue_count, ci.median_income_pps, ci.income_year FROM venue_cities vc LEFT JOIN country_income ci ON vc.country_code = ci.country_code +-- Eurostat EU population (via city code→name lookup) +LEFT JOIN eurostat_pop ep + ON vc.country_code = ep.country_code + AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(ep.city_name)) +-- US Census population +LEFT JOIN us_pop usa + ON vc.country_code = usa.country_code + AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(usa.city_name)) +-- ONS UK population +LEFT JOIN uk_pop uk + ON vc.country_code = uk.country_code + AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(uk.city_name)) +-- GeoNames global fallback +LEFT JOIN geonames_pop gn + ON vc.country_code = gn.country_code + AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(gn.city_name)) -- Enforce grain: if two cities in the same country have the same slug -- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues QUALIFY ROW_NUMBER() OVER ( diff --git a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql index c8bc24d..338d18c 100644 --- a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql +++ b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql @@ -1,10 +1,11 @@ -- One Big Table: per-city padel market intelligence. -- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints. -- --- Market score (0–100) is a simple composite: --- 40% population (log-scaled, city > 500K = max) --- 40% venue density (courts per 100K residents) --- 20% data confidence (completeness of both population + venue data) +-- Market score v2 (0–100): +-- 30 pts population — log-scaled to 1M+ city ceiling (was 40pts/500K) +-- 25 pts income PPS — normalised to 200 ceiling (covers CH/NO/LU outliers) +-- 30 pts demand — observed occupancy if available, else venue density +-- 15 pts data quality — completeness discount, not a market signal MODEL ( name serving.city_market_profile, @@ -37,19 +38,41 @@ WITH base AS ( WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0 WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5 ELSE 0.0 - END AS data_confidence + END AS data_confidence, + -- Pricing / occupancy from Playtomic (NULL when no availability data) + vpb.median_hourly_rate, + vpb.median_peak_rate, + vpb.median_offpeak_rate, + vpb.median_occupancy_rate, + vpb.median_daily_revenue_per_venue, + vpb.price_currency FROM foundation.dim_cities c + LEFT JOIN serving.venue_pricing_benchmarks vpb + ON c.country_code = vpb.country_code + AND LOWER(TRIM(c.city_name)) = LOWER(TRIM(vpb.city)) WHERE c.padel_venue_count > 0 ), scored AS ( SELECT *, ROUND( - -- Population component (log scale, 500K+ city → 40 pts) - 40.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000)) - -- Density component (5 courts/100K → 40 pts) - + 40.0 * LEAST(1.0, COALESCE(venues_per_100k, 0) / 5.0) - -- Confidence component - + 20.0 * data_confidence + -- Population (30 pts): log-scale, 1M+ city = full marks. + -- LN(1) = 0 so unpopulated cities score 0 here — they still score on demand. + 30.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000)) + -- Economic power (25 pts): income PPS normalised to 200 ceiling. + -- 200 covers high-income outliers (CH ~190, NO ~180, LU ~200+). + -- Drives pricing power and willingness-to-pay directly. + + 25.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0) + -- Demand evidence (30 pts): observed occupancy is the best signal + -- (proves real demand). If unavailable, venue density is the proxy + -- (proves market exists; caps at 4/100K to avoid penalising dense cities). + + 30.0 * CASE + WHEN median_occupancy_rate IS NOT NULL + THEN LEAST(1.0, median_occupancy_rate / 0.65) + ELSE LEAST(1.0, COALESCE(venues_per_100k, 0) / 4.0) + END + -- Data quality (15 pts): measures completeness, not market quality. + -- Reduced from 20pts — kept as confidence discount, not market signal. + + 15.0 * data_confidence , 1) AS market_score FROM base ) @@ -69,16 +92,12 @@ SELECT s.market_score, s.median_income_pps, s.income_year, - -- Playtomic pricing/occupancy (NULL when no availability data) - vpb.median_hourly_rate, - vpb.median_peak_rate, - vpb.median_offpeak_rate, - vpb.median_occupancy_rate, - vpb.median_daily_revenue_per_venue, - vpb.price_currency, + s.median_hourly_rate, + s.median_peak_rate, + s.median_offpeak_rate, + s.median_occupancy_rate, + s.median_daily_revenue_per_venue, + s.price_currency, CURRENT_DATE AS refreshed_date FROM scored s -LEFT JOIN serving.venue_pricing_benchmarks vpb - ON s.country_code = vpb.country_code - AND LOWER(TRIM(s.city_name)) = LOWER(TRIM(vpb.city)) ORDER BY s.market_score DESC diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_city_labels.sql b/transform/sqlmesh_padelnomics/models/staging/stg_city_labels.sql new file mode 100644 index 0000000..98d7c26 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_city_labels.sql @@ -0,0 +1,31 @@ +-- Eurostat SDMX city codelist: city_code → city_name mapping. +-- Maps coded identifiers (e.g. DE001C) to human-readable names (e.g. Berlin). +-- This is the bridge table that lets stg_population join to dim_cities. +-- +-- Source: data/landing/eurostat_city_labels/{year}/{month}/cities_codelist.json.gz + +MODEL ( + name staging.stg_city_labels, + kind FULL, + cron '@daily', + grain city_code +); + +WITH raw AS ( + SELECT unnest(rows) AS r + FROM read_json( + @LANDING_DIR || '/eurostat_city_labels/*/*/cities_codelist.json.gz', + auto_detect = true + ) +) +SELECT + UPPER(TRIM(r ->> 'city_code')) AS city_code, + TRIM(r ->> 'city_name') AS city_name, + -- Country code is always the first two letters of the city code (e.g. DE001C → DE) + UPPER(LEFT(TRIM(r ->> 'city_code'), 2)) AS country_code, + CURRENT_DATE AS extracted_date +FROM raw +WHERE (r ->> 'city_code') IS NOT NULL + AND (r ->> 'city_name') IS NOT NULL + AND LENGTH(TRIM(r ->> 'city_code')) > 0 + AND LENGTH(TRIM(r ->> 'city_name')) > 0 diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_population_geonames.sql b/transform/sqlmesh_padelnomics/models/staging/stg_population_geonames.sql new file mode 100644 index 0000000..cad9eca --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_population_geonames.sql @@ -0,0 +1,42 @@ +-- GeoNames global city population (cities15000 bulk dataset, filtered to ≥50K). +-- Global fallback for countries not covered by Eurostat, Census, or ONS. +-- One row per geoname_id (GeoNames stable numeric identifier). +-- +-- Source: data/landing/geonames/{year}/{month}/cities_global.json.gz + +MODEL ( + name staging.stg_population_geonames, + kind FULL, + cron '@daily', + grain geoname_id +); + +WITH parsed AS ( + SELECT + TRY_CAST(row ->> 'geoname_id' AS INTEGER) AS geoname_id, + row ->> 'city_name' AS city_name, + row ->> 'country_code' AS country_code, + TRY_CAST(row ->> 'population' AS BIGINT) AS population, + TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year, + CURRENT_DATE AS extracted_date + FROM ( + SELECT UNNEST(rows) AS row + FROM read_json( + @LANDING_DIR || '/geonames/*/*/cities_global.json.gz', + auto_detect = true + ) + ) + WHERE (row ->> 'geoname_id') IS NOT NULL +) +SELECT + geoname_id, + TRIM(city_name) AS city_name, + UPPER(country_code) AS country_code, + population, + ref_year, + extracted_date +FROM parsed +WHERE population IS NOT NULL + AND population > 0 + AND geoname_id IS NOT NULL + AND city_name IS NOT NULL diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_population_uk.sql b/transform/sqlmesh_padelnomics/models/staging/stg_population_uk.sql new file mode 100644 index 0000000..2d8aad6 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_population_uk.sql @@ -0,0 +1,41 @@ +-- ONS 2021 Census population by Local Authority District (LAD). +-- Reads pre-processed landing zone JSON from ons_uk extractor. +-- One row per (lad_code, ref_year) — LAD code is the ONS area identifier. +-- +-- Source: data/landing/ons_uk/{year}/{month}/lad_population.json.gz + +MODEL ( + name staging.stg_population_uk, + kind FULL, + cron '@daily', + grain (lad_code, ref_year) +); + +WITH parsed AS ( + SELECT + row ->> 'lad_code' AS lad_code, + row ->> 'lad_name' AS lad_name, + TRY_CAST(row ->> 'population' AS BIGINT) AS population, + TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year, + row ->> 'country_code' AS country_code, + CURRENT_DATE AS extracted_date + FROM ( + SELECT UNNEST(rows) AS row + FROM read_json( + @LANDING_DIR || '/ons_uk/*/*/lad_population.json.gz', + auto_detect = true + ) + ) + WHERE (row ->> 'lad_code') IS NOT NULL +) +SELECT + UPPER(TRIM(lad_code)) AS lad_code, + TRIM(lad_name) AS lad_name, + population, + ref_year, + UPPER(country_code) AS country_code, + extracted_date +FROM parsed +WHERE population IS NOT NULL + AND population > 0 + AND lad_code IS NOT NULL diff --git a/transform/sqlmesh_padelnomics/models/staging/stg_population_usa.sql b/transform/sqlmesh_padelnomics/models/staging/stg_population_usa.sql new file mode 100644 index 0000000..c97c9a7 --- /dev/null +++ b/transform/sqlmesh_padelnomics/models/staging/stg_population_usa.sql @@ -0,0 +1,43 @@ +-- US Census ACS 5-year place-level population. +-- Reads pre-processed landing zone JSON from census_usa extractor. +-- One row per (place_fips, ref_year) — surrogate key is the Census FIPS code. +-- +-- Source: data/landing/census_usa/{year}/{month}/acs5_places.json.gz + +MODEL ( + name staging.stg_population_usa, + kind FULL, + cron '@daily', + grain (place_fips, ref_year) +); + +WITH parsed AS ( + SELECT + row ->> 'city_name' AS city_name, + row ->> 'state_fips' AS state_fips, + row ->> 'place_fips' AS place_fips, + TRY_CAST(row ->> 'population' AS BIGINT) AS population, + TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year, + row ->> 'country_code' AS country_code, + CURRENT_DATE AS extracted_date + FROM ( + SELECT UNNEST(rows) AS row + FROM read_json( + @LANDING_DIR || '/census_usa/*/*/acs5_places.json.gz', + auto_detect = true + ) + ) + WHERE (row ->> 'place_fips') IS NOT NULL +) +SELECT + TRIM(city_name) AS city_name, + state_fips, + place_fips, + population, + ref_year, + UPPER(country_code) AS country_code, + extracted_date +FROM parsed +WHERE population IS NOT NULL + AND population > 0 + AND place_fips IS NOT NULL diff --git a/web/src/padelnomics/admin/templates/admin/template_detail.html b/web/src/padelnomics/admin/templates/admin/template_detail.html index 1d46268..e053c34 100644 --- a/web/src/padelnomics/admin/templates/admin/template_detail.html +++ b/web/src/padelnomics/admin/templates/admin/template_detail.html @@ -81,24 +81,24 @@ + {% for col in columns %} {% endfor %} - {% for row in sample_rows %} + {% for col in columns %} {% endfor %} - {% endfor %} diff --git a/web/src/padelnomics/content/__init__.py b/web/src/padelnomics/content/__init__.py index b975302..548c17e 100644 --- a/web/src/padelnomics/content/__init__.py +++ b/web/src/padelnomics/content/__init__.py @@ -133,10 +133,22 @@ def _validate_table_name(data_table: str) -> None: # ── Rendering helpers ──────────────────────────────────────────────────────── +def _datetimeformat(value: str, fmt: str = "%Y-%m-%d") -> str: + """Jinja2 filter: format a date string (or 'now') with strftime.""" + from datetime import datetime, UTC + + if value == "now": + dt = datetime.now(UTC) + else: + dt = datetime.fromisoformat(value) + return dt.strftime(fmt) + + def _render_pattern(pattern: str, context: dict) -> str: """Render a Jinja2 pattern string with context variables.""" env = Environment() env.filters["slugify"] = slugify + env.filters["datetimeformat"] = _datetimeformat return env.from_string(pattern).render(**context) diff --git a/web/src/padelnomics/planner/calculator.py b/web/src/padelnomics/planner/calculator.py index 96e75c2..3608268 100644 --- a/web/src/padelnomics/planner/calculator.py +++ b/web/src/padelnomics/planner/calculator.py @@ -85,6 +85,9 @@ DEFAULTS = { "holdYears": 5, "exitMultiple": 6, "annualRevGrowth": 2, + "annualOpexGrowth": 2, + "hurdleRate": 12, + "interestOnlyMonths": 0, "budgetTarget": 0, "country": "DE", "permitsCompliance": 12000, @@ -336,6 +339,9 @@ def calc(s: dict, lang: str = "en") -> dict: d["netCFMonth"] = d["ebitdaMonth"] - d["monthlyPayment"] # -- 60-month cash flow projection -- + # Fix 1: annualRevGrowth applied to all revenue streams. + # Fix 8: annualOpexGrowth applied to all operating costs (utilities, staff, insurance inflate). + # Fix 10: interest-only period — first N months pay only interest, not P+I. months: list[dict] = [] for m in range(1, 61): cm = (m - 1) % 12 @@ -345,19 +351,32 @@ def calc(s: dict, lang: str = "en") -> dict: eff_util = (s["utilTarget"] / 100) * ramp * seas avail = s["hoursPerDay"] * dpm * total_courts if seas > 0 else 0 booked = avail * eff_util - court_rev = booked * w_rate + + # Revenue growth compounds from Year 2 onwards (Year 1 = base) + rev_growth = math.pow(1 + s["annualRevGrowth"] / 100, max(0, yr - 1)) + court_rev = booked * w_rate * rev_growth fees = -court_rev * (s["bookingFee"] / 100) ancillary = booked * ( (s["racketRentalRate"] / 100) * s["racketQty"] * s["racketPrice"] + (s["ballRate"] / 100) * (s["ballPrice"] - s["ballCost"]) - ) - membership = total_courts * s["membershipRevPerCourt"] * (ramp if seas > 0 else 0) - fb = total_courts * s["fbRevPerCourt"] * (ramp if seas > 0 else 0) - coaching = total_courts * s["coachingRevPerCourt"] * (ramp if seas > 0 else 0) - retail = total_courts * s["retailRevPerCourt"] * (ramp if seas > 0 else 0) + ) * rev_growth + membership = total_courts * s["membershipRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth + fb = total_courts * s["fbRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth + coaching = total_courts * s["coachingRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth + retail = total_courts * s["retailRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth total_rev = court_rev + fees + ancillary + membership + fb + coaching + retail - opex_val = -d["opex"] - loan = -d["monthlyPayment"] + + # OPEX inflates from Year 2 onwards (utilities, staff, insurance) + opex_growth = math.pow(1 + s["annualOpexGrowth"] / 100, max(0, yr - 1)) + opex_val = -(d["opex"] * opex_growth) + + # Fix 10: interest-only period — lower debt service during construction/ramp + if m <= s["interestOnlyMonths"] and d["loanAmount"] > 0: + # Interest-only payment: loan balance × monthly rate + loan = -(d["loanAmount"] * s["interestRate"] / 100 / 12) + else: + loan = -d["monthlyPayment"] + ebitda = total_rev + opex_val ncf = ebitda + loan prev = months[-1] if months else None @@ -387,29 +406,95 @@ def calc(s: dict, lang: str = "en") -> dict: d["annuals"] = annuals # -- Returns & exit -- - y3_ebitda = annuals[2]["ebitda"] if len(annuals) >= 3 else 0 - d["stabEbitda"] = y3_ebitda - d["exitValue"] = y3_ebitda * s["exitMultiple"] - d["remainingLoan"] = d["loanAmount"] * max(0, 1 - s["holdYears"] / (max(s["loanTerm"], 1) * 1.5)) + # Fix 5: use terminal year EBITDA (exit year), not hardcoded Year 3 + exit_yr_idx = min(s["holdYears"] - 1, len(annuals) - 1) + d["stabEbitda"] = annuals[exit_yr_idx]["ebitda"] + d["exitValue"] = d["stabEbitda"] * s["exitMultiple"] + + # Fix 4: remaining loan via amortization math (PV of remaining payments), + # replacing the heuristic loanAmount * max(0, 1 - holdYears / (loanTerm * 1.5)) + k = s["holdYears"] * 12 # number of P+I payments made (after interest-only period) + n = max(s["loanTerm"], 1) * 12 + r_monthly_loan = s["interestRate"] / 100 / 12 + if r_monthly_loan > 0 and d["loanAmount"] > 0 and n > k: + d["remainingLoan"] = _round( + d["monthlyPayment"] * (1 - math.pow(1 + r_monthly_loan, -(n - k))) / r_monthly_loan + ) + elif d["loanAmount"] > 0 and n > k: + # Zero-interest loan: straight-line amortization + d["remainingLoan"] = _round(d["loanAmount"] * (n - k) / n) + else: + d["remainingLoan"] = 0 + d["netExit"] = d["exitValue"] - d["remainingLoan"] - irr_cfs = [-d["capex"]] + # Fix 2: equity IRR — use equity invested as initial outflow (not full capex). + # NCFs are already post-debt-service (levered), so the denominator must match. + # Using capex here would produce a hybrid metric that's neither equity IRR + # nor project IRR — it systematically understates returns for leveraged deals. + irr_cfs = [-d["equity"]] for y in range(s["holdYears"]): ycf = annuals[y]["ncf"] if y < len(annuals) else (annuals[-1]["ncf"] if annuals else 0) if y == s["holdYears"] - 1: irr_cfs.append(ycf + d["netExit"]) else: irr_cfs.append(ycf) - d["irr"] = calc_irr(irr_cfs) + + # Project IRR (unlevered): uses full capex as outflow and EBITDA as cash flows. + # Useful for lender analysis and comparing across capital structures. + unlevered_cfs = [-d["capex"]] + for y in range(s["holdYears"]): + ya = annuals[y] if y < len(annuals) else annuals[-1] + if y == s["holdYears"] - 1: + unlevered_cfs.append(ya["ebitda"] + d["netExit"]) + else: + unlevered_cfs.append(ya["ebitda"]) + d["projectIrr"] = calc_irr(unlevered_cfs) + + # Fix 3: NPV at hurdle rate (discounts equity NCFs + exit at hurdleRate) + r_hurdle_monthly = math.pow(1 + s["hurdleRate"] / 100, 1 / 12) - 1 + pv_ncf = sum(m["ncf"] / math.pow(1 + r_hurdle_monthly, m["m"]) for m in months) + pv_exit = d["netExit"] / math.pow(1 + s["hurdleRate"] / 100, s["holdYears"]) + d["npv"] = _round(-d["equity"] + pv_ncf + pv_exit) + d["npvPositive"] = d["npv"] >= 0 + d["totalReturned"] = sum(irr_cfs[1:]) - d["moic"] = d["totalReturned"] / d["capex"] if d["capex"] > 0 else 0 + + # Fix 6: leveraged MOIC (equity cash flows / equity invested — what the investor earns). + # Also keep project MOIC (total returns / capex) for reference. + equity_cfs = irr_cfs[1:] + d["moic"] = sum(equity_cfs) / d["equity"] if d["equity"] > 0 else 0 + d["projectMoic"] = d["totalReturned"] / d["capex"] if d["capex"] > 0 else 0 + + # Fix 7: return decomposition / value bridge (PE-style attribution). + # Shows what drove equity returns: operational improvement vs. financial leverage. + entry_ebitda = annuals[0]["ebitda"] if annuals else 0 + ebitda_growth_value = (d["stabEbitda"] - entry_ebitda) * s["exitMultiple"] + deleverage_value = d["loanAmount"] - d["remainingLoan"] + d["valueDrivers"] = { + "ebitda_growth": _round(ebitda_growth_value), + "deleverage": _round(deleverage_value), + "entry_equity": d["equity"], + "exit_equity": _round(d["netExit"]), + } d["dscr"] = [ {"year": a["year"], "dscr": a["ebitda"] / a["ds"] if a["ds"] > 0 else 999} for a in annuals ] + # Fix 9: LTV and DSCR warnings for lender compliance thresholds + d["ltvWarning"] = d["ltv"] > 0.75 # above typical commercial RE lending limit + d["dscrWarning"] = any(row["dscr"] < 1.25 for row in d["dscr"] if row["dscr"] < 999) + d["dscrMinYear"] = None + if d["dscrWarning"]: + d["dscrMinYear"] = min( + (row["year"] for row in d["dscr"] if row["dscr"] < 999), + key=lambda yr: next(r["dscr"] for r in d["dscr"] if r["year"] == yr), + default=None, + ) + payback_idx = -1 for i, m in enumerate(months): if m["cum"] >= 0: diff --git a/web/src/padelnomics/planner/templates/partials/tab_metrics.html b/web/src/padelnomics/planner/templates/partials/tab_metrics.html index ef3749e..6d7f995 100644 --- a/web/src/padelnomics/planner/templates/partials/tab_metrics.html +++ b/web/src/padelnomics/planner/templates/partials/tab_metrics.html @@ -61,11 +61,17 @@
DSCR (Y3) i{{ t.tip_result_dscr }}
{{ '∞' if y3_dscr > 99 else y3_dscr | fmt_x }}
Min 1.2x for banks
+ {% if d.dscrWarning %} +
⚠ DSCR < 1.25x in Y{{ d.dscrMinYear }} — bank covenant breach risk
+ {% endif %}
LTV
-
{{ d.ltv | fmt_pct }}
+
{{ d.ltv | fmt_pct }}
Loan ÷ Total Investment
+ {% if d.ltvWarning %} +
⚠ LTV > 75% — above typical commercial lending limit
+ {% endif %}
Debt Yield i{{ t.tip_result_debt_yield }}
@@ -101,7 +107,7 @@
Exit Value
{{ d.exitValue | fmt_k }}
-
{{ s.exitMultiple }}x Y3 EBITDA
+
{{ s.exitMultiple }}x Y{{ s.holdYears }} EBITDA
diff --git a/web/src/padelnomics/planner/templates/partials/tab_returns.html b/web/src/padelnomics/planner/templates/partials/tab_returns.html index 409ca30..f5c4e39 100644 --- a/web/src/padelnomics/planner/templates/partials/tab_returns.html +++ b/web/src/padelnomics/planner/templates/partials/tab_returns.html @@ -1,24 +1,24 @@
-
{{ t.card_irr }} i{{ t.tip_result_irr }}
+
{{ t.card_irr }} (Equity) i{{ t.tip_result_irr }}
{{ d.irr | fmt_pct if d.irr_ok else 'N/A' }}
{{ '✓ Above 20%' if d.irr_ok and d.irr > 0.2 else '✗ Below target' }}
-
{{ t.card_moic }} i{{ t.tip_result_moic }}
+
{{ t.card_moic }} (Equity) i{{ t.tip_result_moic }}
{{ d.moic | fmt_x }}
{{ '✓ Above 2.0x' if d.moic > 2 else '✗ Below 2.0x' }}
+
+
NPV iAt {{ s.hurdleRate }}% hurdle rate
+
{{ d.npv | fmt_k }}
+
{{ '✓ Value-creating' if d.npvPositive else '✗ Destroys value' }} at {{ s.hurdleRate }}%
+
{{ t.card_break_even }} i{{ t.tip_result_break_even }}
{{ d.breakEvenUtil | fmt_pct }}
{{ d.breakEvenHrsPerCourt | round(1) }} hrs/court/day
-
-
{{ t.card_cash_on_cash }} i{{ t.tip_result_coc }}
-
{{ d.cashOnCash | fmt_pct }}
-
Year 3 NCF ÷ Equity
-
@@ -33,8 +33,9 @@ (t.wf_net_exit, d.netExit | int | fmt_currency, 'c-green' if d.netExit > 0 else 'c-red'), (t.wf_cum_cf, (d.totalReturned - d.netExit) | int | fmt_currency, 'c-head'), (t.wf_total_returns, d.totalReturned | int | fmt_currency, 'c-green' if d.totalReturned > 0 else 'c-red'), - (t.wf_investment, d.capex | fmt_currency, 'c-head'), - (t.wf_moic, d.moic | fmt_x, 'c-green' if d.moic > 2 else 'c-red'), + ('Equity invested', d.equity | fmt_currency, 'c-head'), + ('Equity MOIC', d.moic | fmt_x, 'c-green' if d.moic > 2 else 'c-red'), + ('Project MOIC (on CAPEX)', d.projectMoic | fmt_x, 'c-head'), ] %} {% for label, value, cls in wf_rows %}
@@ -44,12 +45,50 @@ {% endfor %}
+
+
Value Bridge (Equity Attribution)
+
+ {% set vd = d.valueDrivers %} +
+ Equity invested + {{ vd.entry_equity | fmt_currency }} +
+
+ + EBITDA growth value iImprovement in EBITDA × exit multiple + {{ vd.ebitda_growth | fmt_currency }} +
+
+ + Debt paydown iLoan balance reduction over hold period + {{ vd.deleverage | fmt_currency }} +
+
+ Net exit proceeds + {{ vd.exit_equity | fmt_currency }} +
+
+ Project IRR (unlevered) + {{ d.projectIrr | fmt_pct }} +
+
+ Equity IRR (levered) + {{ d.irr | fmt_pct if d.irr_ok else 'N/A' }} +
+
+
+ + +
{{ t.planner_chart_dscr }}
+
+
Cash Flow Cumulative
+
+
+

{{ t.planner_section_util_sensitivity }}

diff --git a/web/src/padelnomics/planner/templates/planner.html b/web/src/padelnomics/planner/templates/planner.html index 88340a0..3f0cdc6 100644 --- a/web/src/padelnomics/planner/templates/planner.html +++ b/web/src/padelnomics/planner/templates/planner.html @@ -329,6 +329,7 @@ {{ slider('interestRate', t.sl_interest_rate, 0, 15, 0.1, s.interestRate, t.tip_interest_rate) }} {{ slider('loanTerm', t.sl_loan_term, 0, 30, 1, s.loanTerm, t.tip_loan_term) }} {{ slider('constructionMonths', t.sl_construction_months, 0, 24, 1, s.constructionMonths, t.tip_construction_months) }} + {{ slider('interestOnlyMonths', t.sl_interest_only_months|default('Interest-Only Period (mo)'), 0, 24, 1, s.interestOnlyMonths, t.tip_interest_only_months|default('Months of interest-only payments before P+I amortization begins. Reduces early cash flow drag during ramp.')) }}
@@ -338,6 +339,8 @@ {{ slider('holdYears', t.sl_hold_years, 1, 20, 1, s.holdYears, t.tip_hold_years) }} {{ slider('exitMultiple', t.sl_exit_multiple, 0, 20, 0.5, s.exitMultiple, t.tip_exit_multiple) }} {{ slider('annualRevGrowth', t.sl_annual_rev_growth, 0, 15, 0.5, s.annualRevGrowth, t.tip_annual_rev_growth) }} + {{ slider('annualOpexGrowth', t.sl_annual_opex_growth|default('Annual OpEx Growth (%)'), 0, 10, 0.5, s.annualOpexGrowth, t.tip_annual_opex_growth|default('Annual cost inflation for utilities, staff, and insurance. 2% matches Western European CPI. Without this, Year 4–5 EBITDA is overstated.')) }} + {{ slider('hurdleRate', t.sl_hurdle_rate|default('Hurdle Rate (%)'), 5, 35, 1, s.hurdleRate, t.tip_hurdle_rate|default('Minimum equity return required. NPV is positive when equity IRR exceeds this rate. 12% is typical for mid-market sports venues in Western Europe.')) }} diff --git a/web/tests/test_calculator.py b/web/tests/test_calculator.py index 33a89c1..9c536d9 100644 --- a/web/tests/test_calculator.py +++ b/web/tests/test_calculator.py @@ -298,8 +298,11 @@ class TestCalcDefaultScenario: month_rev = sum(m["totalRev"] for m in d["months"] if m["yr"] == y) assert annual["revenue"] == approx(month_rev) - def test_stab_ebitda_is_year3(self, d): - assert d["stabEbitda"] == d["annuals"][2]["ebitda"] + def test_stab_ebitda_is_exit_year(self, d): + # stabEbitda uses the terminal year (holdYears - 1), not hardcoded Year 3. + s = default_state() + exit_idx = min(s["holdYears"] - 1, len(d["annuals"]) - 1) + assert d["stabEbitda"] == d["annuals"][exit_idx]["ebitda"] def test_exit_value(self, d): assert d["exitValue"] == approx(d["stabEbitda"] * 6)
{{ col.name }}Preview
+ Preview + {{ row[col.name] }} - Preview -