merge: data foundation + calculator v2
Part A — Population pipeline (Sprints 1–5): - Eurostat SDMX city labels extractor (city_code → city_name) - US Census ACS, ONS UK, GeoNames extractors - 4 new staging models + stg_city_labels - dim_cities: 5-source population cascade (Eurostat > Census > ONS > GeoNames > 0) - city_market_profile: market score formula v2 (30/25/30/15 weights) Part B — Calculator fixes 1–10: - Fix 2 (HIGH): equity IRR uses -equity outflow, adds projectIrr (unlevered) - Fix 8 (HIGH): OPEX inflates at annualOpexGrowth% from Y2 - Fix 1: annualRevGrowth now applied to all revenue streams - Fix 3: NPV at hurdle rate (hurdleRate slider, npv/npvPositive) - Fix 4: remaining loan via amortization math (not heuristic) - Fix 5: exit EBITDA uses holdYears terminal year (not hardcoded Y3) - Fix 6: leveraged MOIC + projectMoic - Fix 7: value bridge (EBITDA growth vs debt paydown attribution) - Fix 9: LTV/DSCR warnings in tab_metrics.html - Fix 10: interest-only period slider 1229 tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -15,6 +15,10 @@ extract-eurostat = "padelnomics_extract.eurostat:main"
|
|||||||
extract-playtomic-tenants = "padelnomics_extract.playtomic_tenants:main"
|
extract-playtomic-tenants = "padelnomics_extract.playtomic_tenants:main"
|
||||||
extract-playtomic-availability = "padelnomics_extract.playtomic_availability:main"
|
extract-playtomic-availability = "padelnomics_extract.playtomic_availability:main"
|
||||||
extract-playtomic-recheck = "padelnomics_extract.playtomic_availability:main_recheck"
|
extract-playtomic-recheck = "padelnomics_extract.playtomic_availability:main_recheck"
|
||||||
|
extract-eurostat-city-labels = "padelnomics_extract.eurostat_city_labels:main"
|
||||||
|
extract-census-usa = "padelnomics_extract.census_usa:main"
|
||||||
|
extract-ons-uk = "padelnomics_extract.ons_uk:main"
|
||||||
|
extract-geonames = "padelnomics_extract.geonames:main"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["hatchling"]
|
requires = ["hatchling"]
|
||||||
|
|||||||
@@ -5,8 +5,16 @@ Each extractor gets its own state tracking row in .state.sqlite.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from ._shared import run_extractor, setup_logging
|
from ._shared import run_extractor, setup_logging
|
||||||
|
from .census_usa import EXTRACTOR_NAME as CENSUS_USA_NAME
|
||||||
|
from .census_usa import extract as extract_census_usa
|
||||||
from .eurostat import EXTRACTOR_NAME as EUROSTAT_NAME
|
from .eurostat import EXTRACTOR_NAME as EUROSTAT_NAME
|
||||||
from .eurostat import extract as extract_eurostat
|
from .eurostat import extract as extract_eurostat
|
||||||
|
from .eurostat_city_labels import EXTRACTOR_NAME as EUROSTAT_CITY_LABELS_NAME
|
||||||
|
from .eurostat_city_labels import extract as extract_eurostat_city_labels
|
||||||
|
from .geonames import EXTRACTOR_NAME as GEONAMES_NAME
|
||||||
|
from .geonames import extract as extract_geonames
|
||||||
|
from .ons_uk import EXTRACTOR_NAME as ONS_UK_NAME
|
||||||
|
from .ons_uk import extract as extract_ons_uk
|
||||||
from .overpass import EXTRACTOR_NAME as OVERPASS_NAME
|
from .overpass import EXTRACTOR_NAME as OVERPASS_NAME
|
||||||
from .overpass import extract as extract_overpass
|
from .overpass import extract as extract_overpass
|
||||||
from .playtomic_availability import EXTRACTOR_NAME as AVAILABILITY_NAME
|
from .playtomic_availability import EXTRACTOR_NAME as AVAILABILITY_NAME
|
||||||
@@ -19,6 +27,10 @@ logger = setup_logging("padelnomics.extract")
|
|||||||
EXTRACTORS = [
|
EXTRACTORS = [
|
||||||
(OVERPASS_NAME, extract_overpass),
|
(OVERPASS_NAME, extract_overpass),
|
||||||
(EUROSTAT_NAME, extract_eurostat),
|
(EUROSTAT_NAME, extract_eurostat),
|
||||||
|
(EUROSTAT_CITY_LABELS_NAME, extract_eurostat_city_labels),
|
||||||
|
(CENSUS_USA_NAME, extract_census_usa),
|
||||||
|
(ONS_UK_NAME, extract_ons_uk),
|
||||||
|
(GEONAMES_NAME, extract_geonames),
|
||||||
(TENANTS_NAME, extract_tenants),
|
(TENANTS_NAME, extract_tenants),
|
||||||
(AVAILABILITY_NAME, extract_availability),
|
(AVAILABILITY_NAME, extract_availability),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -0,0 +1,139 @@
|
|||||||
|
"""US Census Bureau ACS 5-year population extractor.
|
||||||
|
|
||||||
|
Fetches city-level (Census place) population from the American Community Survey
|
||||||
|
5-year estimates. Requires a free API key from api.census.gov.
|
||||||
|
|
||||||
|
Env var: CENSUS_API_KEY (register free at https://api.census.gov/data/key_signup.html)
|
||||||
|
|
||||||
|
Landing: {LANDING_DIR}/census_usa/{year}/{month}/acs5_places.json.gz
|
||||||
|
Output: {"rows": [{"city_name": "Los Angeles", "state_fips": "06",
|
||||||
|
"place_fips": "0644000", "population": 3990456,
|
||||||
|
"ref_year": 2023, "country_code": "US"}], "count": N}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import niquests
|
||||||
|
|
||||||
|
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||||
|
from .utils import get_last_cursor, landing_path, write_gzip_atomic
|
||||||
|
|
||||||
|
logger = setup_logging("padelnomics.extract.census_usa")
|
||||||
|
|
||||||
|
EXTRACTOR_NAME = "census_usa"
|
||||||
|
|
||||||
|
# ACS 5-year estimates, 2023 vintage — refreshed annually by Census Bureau.
|
||||||
|
# B01003_001E = total population; NAME = place name + state.
|
||||||
|
ACS_URL = (
|
||||||
|
"https://api.census.gov/data/2023/acs/acs5"
|
||||||
|
"?get=B01003_001E,NAME&for=place:*&in=state:*"
|
||||||
|
)
|
||||||
|
|
||||||
|
REF_YEAR = 2023
|
||||||
|
MIN_POPULATION = 50_000
|
||||||
|
MAX_RETRIES = 2
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_city_name(full_name: str) -> str:
|
||||||
|
"""Extract city name from Census place name.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
'Los Angeles city, California' → 'Los Angeles'
|
||||||
|
'New York city, New York' → 'New York'
|
||||||
|
'Miami city, Florida' → 'Miami'
|
||||||
|
"""
|
||||||
|
# Take everything before the first comma
|
||||||
|
before_comma = full_name.split(",")[0].strip()
|
||||||
|
# Strip common suffixes: ' city', ' town', ' CDP', ' borough', ' village'
|
||||||
|
for suffix in (" city", " town", " CDP", " borough", " village", " municipality"):
|
||||||
|
if before_comma.lower().endswith(suffix):
|
||||||
|
before_comma = before_comma[: -len(suffix)].strip()
|
||||||
|
break
|
||||||
|
return before_comma
|
||||||
|
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
landing_dir: Path,
|
||||||
|
year_month: str,
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
session: niquests.Session,
|
||||||
|
) -> dict:
|
||||||
|
"""Fetch ACS 5-year place population. Skips if already run this month."""
|
||||||
|
api_key = os.environ.get("CENSUS_API_KEY", "").strip()
|
||||||
|
if not api_key:
|
||||||
|
logger.warning("CENSUS_API_KEY not set — skipping US Census extract")
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
# Skip if we already have data for this month (annual data, monthly cursor)
|
||||||
|
last_cursor = get_last_cursor(conn, EXTRACTOR_NAME)
|
||||||
|
if last_cursor == year_month:
|
||||||
|
logger.info("already have data for %s — skipping", year_month)
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
year, month = year_month.split("/")
|
||||||
|
url = f"{ACS_URL}&key={api_key}"
|
||||||
|
|
||||||
|
logger.info("GET ACS 5-year places (vintage %d)", REF_YEAR)
|
||||||
|
resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 2)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
raw = resp.json()
|
||||||
|
assert isinstance(raw, list) and len(raw) > 1, "ACS response must be a non-empty list"
|
||||||
|
|
||||||
|
# First row is headers: ["B01003_001E", "NAME", "state", "place"]
|
||||||
|
headers = raw[0]
|
||||||
|
assert "B01003_001E" in headers, f"Population column missing from ACS response: {headers}"
|
||||||
|
pop_idx = headers.index("B01003_001E")
|
||||||
|
name_idx = headers.index("NAME")
|
||||||
|
state_idx = headers.index("state")
|
||||||
|
place_idx = headers.index("place")
|
||||||
|
|
||||||
|
rows: list[dict] = []
|
||||||
|
for row in raw[1:]:
|
||||||
|
try:
|
||||||
|
population = int(row[pop_idx])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
continue
|
||||||
|
if population < MIN_POPULATION:
|
||||||
|
continue
|
||||||
|
full_name = row[name_idx]
|
||||||
|
city_name = _parse_city_name(full_name)
|
||||||
|
if not city_name:
|
||||||
|
continue
|
||||||
|
state_fips = row[state_idx]
|
||||||
|
place_fips = state_fips + row[place_idx]
|
||||||
|
rows.append({
|
||||||
|
"city_name": city_name,
|
||||||
|
"state_fips": state_fips,
|
||||||
|
"place_fips": place_fips,
|
||||||
|
"population": population,
|
||||||
|
"ref_year": REF_YEAR,
|
||||||
|
"country_code": "US",
|
||||||
|
})
|
||||||
|
|
||||||
|
assert len(rows) > 500, f"Expected >500 US cities ≥50K pop, got {len(rows)} — parse may have failed"
|
||||||
|
logger.info("parsed %d US cities with population ≥%d", len(rows), MIN_POPULATION)
|
||||||
|
|
||||||
|
dest_dir = landing_path(landing_dir, "census_usa", year, month)
|
||||||
|
dest = dest_dir / "acs5_places.json.gz"
|
||||||
|
payload = json.dumps({"rows": rows, "count": len(rows)}).encode()
|
||||||
|
bytes_written = write_gzip_atomic(dest, payload)
|
||||||
|
logger.info("written %s bytes compressed", f"{bytes_written:,}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"files_written": 1,
|
||||||
|
"files_skipped": 0,
|
||||||
|
"bytes_written": bytes_written,
|
||||||
|
"cursor_value": year_month,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
run_extractor(EXTRACTOR_NAME, extract)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,123 @@
|
|||||||
|
"""Eurostat SDMX city codelist extractor — city_code → city_name mapping.
|
||||||
|
|
||||||
|
The Eurostat Urban Audit population dataset (urb_cpop1) uses coded city identifiers
|
||||||
|
(e.g. DE001C = Berlin) with no city name column. This extractor fetches the SDMX
|
||||||
|
codelist that maps those codes to human-readable names, enabling stg_city_labels to
|
||||||
|
join population data to dim_cities (which has names, not codes).
|
||||||
|
|
||||||
|
The codelist changes very rarely so ETag dedup means most runs produce a 304 skip.
|
||||||
|
|
||||||
|
Landing: {LANDING_DIR}/eurostat_city_labels/{year}/{month}/cities_codelist.json.gz
|
||||||
|
Output: {"rows": [{"city_code": "DE001C", "city_name": "Berlin"}, ...], "count": N}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import niquests
|
||||||
|
|
||||||
|
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||||
|
from .utils import landing_path, write_gzip_atomic
|
||||||
|
|
||||||
|
logger = setup_logging("padelnomics.extract.eurostat_city_labels")
|
||||||
|
|
||||||
|
EXTRACTOR_NAME = "eurostat_city_labels"
|
||||||
|
|
||||||
|
# SDMX codelist endpoint — returns the full CITIES dimension codes with labels
|
||||||
|
# format=JSON gives a compact JSON-stat-like structure for the codelist
|
||||||
|
CODELIST_URL = (
|
||||||
|
"https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/codelist/ESTAT/CITIES"
|
||||||
|
"?format=JSON&lang=EN"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_sdmx_codelist(data: dict) -> list[dict]:
|
||||||
|
"""Extract city_code → city_name pairs from SDMX codelist JSON response.
|
||||||
|
|
||||||
|
The SDMX 2.1 JSON structure varies by endpoint. This endpoint returns a
|
||||||
|
structure.codelists[0].codes list where each code has id and name[0].name.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
codelists = data["structure"]["codelists"]
|
||||||
|
except (KeyError, TypeError) as e:
|
||||||
|
raise ValueError(f"Unexpected SDMX structure — missing codelists: {e}") from e
|
||||||
|
|
||||||
|
assert len(codelists) > 0, "SDMX response has empty codelists array"
|
||||||
|
|
||||||
|
codes = codelists[0].get("codes", [])
|
||||||
|
assert len(codes) > 0, "SDMX codelist has no codes — API response may have changed"
|
||||||
|
|
||||||
|
rows: list[dict] = []
|
||||||
|
for code in codes:
|
||||||
|
city_code = code.get("id", "").strip()
|
||||||
|
if not city_code:
|
||||||
|
continue
|
||||||
|
# Name is a list of {lang, name} objects; pick the first (EN requested above)
|
||||||
|
names = code.get("name", [])
|
||||||
|
if isinstance(names, list) and names:
|
||||||
|
city_name = names[0].get("name", "").strip()
|
||||||
|
elif isinstance(names, str):
|
||||||
|
city_name = names.strip()
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if city_name:
|
||||||
|
rows.append({"city_code": city_code, "city_name": city_name})
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def _etag_path(dest: Path) -> Path:
|
||||||
|
return dest.parent / (dest.name + ".etag")
|
||||||
|
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
landing_dir: Path,
|
||||||
|
year_month: str,
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
session: niquests.Session,
|
||||||
|
) -> dict:
|
||||||
|
"""Fetch Eurostat CITIES codelist with ETag dedup. Returns run metrics."""
|
||||||
|
year, month = year_month.split("/")
|
||||||
|
dest_dir = landing_path(landing_dir, "eurostat_city_labels", year, month)
|
||||||
|
dest = dest_dir / "cities_codelist.json.gz"
|
||||||
|
etag_file = _etag_path(dest)
|
||||||
|
|
||||||
|
headers: dict[str, str] = {}
|
||||||
|
if etag_file.exists():
|
||||||
|
headers["If-None-Match"] = etag_file.read_text().strip()
|
||||||
|
|
||||||
|
logger.info("GET CITIES codelist")
|
||||||
|
resp = session.get(CODELIST_URL, headers=headers, timeout=HTTP_TIMEOUT_SECONDS)
|
||||||
|
|
||||||
|
if resp.status_code == 304:
|
||||||
|
logger.info("CITIES codelist not modified (304)")
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
rows = _parse_sdmx_codelist(resp.json())
|
||||||
|
assert len(rows) > 100, f"Expected >100 city codes, got {len(rows)} — parse may have failed"
|
||||||
|
|
||||||
|
payload = json.dumps({"rows": rows, "count": len(rows)}).encode()
|
||||||
|
bytes_written = write_gzip_atomic(dest, payload)
|
||||||
|
logger.info("written %d city codes (%s bytes compressed)", len(rows), f"{bytes_written:,}")
|
||||||
|
|
||||||
|
if etag := resp.headers.get("etag"):
|
||||||
|
etag_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
etag_file.write_text(etag)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"files_written": 1,
|
||||||
|
"files_skipped": 0,
|
||||||
|
"bytes_written": bytes_written,
|
||||||
|
"cursor_value": year_month,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
run_extractor(EXTRACTOR_NAME, extract)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
157
extract/padelnomics_extract/src/padelnomics_extract/geonames.py
Normal file
157
extract/padelnomics_extract/src/padelnomics_extract/geonames.py
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
"""GeoNames global city population extractor.
|
||||||
|
|
||||||
|
Downloads the cities15000.zip bulk file (~1.5MB compressed, ~26K entries) from
|
||||||
|
GeoNames and filters to cities with population ≥ 50,000 and feature codes in
|
||||||
|
{PPLA, PPLA2, PPLC, PPL} (populated places, avoiding parks, airports, etc.).
|
||||||
|
|
||||||
|
Used as the global fallback for population when Eurostat/Census/ONS don't cover
|
||||||
|
a country. Padel is expanding globally so this catches UAE, Australia, Argentina, etc.
|
||||||
|
|
||||||
|
Requires: GEONAMES_USERNAME env var (free registration at geonames.org)
|
||||||
|
|
||||||
|
Landing: {LANDING_DIR}/geonames/{year}/{month}/cities_global.json.gz
|
||||||
|
Output: {"rows": [{"geoname_id": 2950159, "city_name": "Berlin",
|
||||||
|
"country_code": "DE", "population": 3644826,
|
||||||
|
"ref_year": 2024}], "count": N}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import zipfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import niquests
|
||||||
|
|
||||||
|
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||||
|
from .utils import get_last_cursor, landing_path, write_gzip_atomic
|
||||||
|
|
||||||
|
logger = setup_logging("padelnomics.extract.geonames")
|
||||||
|
|
||||||
|
EXTRACTOR_NAME = "geonames"
|
||||||
|
|
||||||
|
DOWNLOAD_URL = "https://download.geonames.org/export/dump/cities15000.zip"
|
||||||
|
|
||||||
|
# Only populated place feature codes — excludes airports, parks, admin areas, etc.
|
||||||
|
# PPLC = capital of a political entity
|
||||||
|
# PPLA = seat of a first-order administrative division
|
||||||
|
# PPLA2 = seat of a second-order admin division
|
||||||
|
# PPL = populated place
|
||||||
|
VALID_FEATURE_CODES = {"PPLC", "PPLA", "PPLA2", "PPL"}
|
||||||
|
|
||||||
|
MIN_POPULATION = 50_000
|
||||||
|
|
||||||
|
# GeoNames tab-separated column layout for cities15000.txt
|
||||||
|
# https://download.geonames.org/export/dump/readme.txt
|
||||||
|
COL_GEONAME_ID = 0
|
||||||
|
COL_NAME = 1
|
||||||
|
COL_ASCIINAME = 2
|
||||||
|
COL_COUNTRY_CODE = 8
|
||||||
|
COL_FEATURE_CODE = 7
|
||||||
|
COL_POPULATION = 14
|
||||||
|
COL_MODIFICATION_DATE = 18
|
||||||
|
|
||||||
|
# Approximate year of last data update (GeoNames doesn't provide a precise vintage)
|
||||||
|
REF_YEAR = 2024
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_cities_txt(content: bytes) -> list[dict]:
|
||||||
|
"""Parse GeoNames cities TSV into filtered rows."""
|
||||||
|
rows: list[dict] = []
|
||||||
|
for line in content.decode("utf-8").splitlines():
|
||||||
|
if not line.strip():
|
||||||
|
continue
|
||||||
|
parts = line.split("\t")
|
||||||
|
if len(parts) < 15:
|
||||||
|
continue
|
||||||
|
feature_code = parts[COL_FEATURE_CODE].strip()
|
||||||
|
if feature_code not in VALID_FEATURE_CODES:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
population = int(parts[COL_POPULATION])
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
continue
|
||||||
|
if population < MIN_POPULATION:
|
||||||
|
continue
|
||||||
|
geoname_id_str = parts[COL_GEONAME_ID].strip()
|
||||||
|
try:
|
||||||
|
geoname_id = int(geoname_id_str)
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
# Prefer ASCII name for matching (avoids diacritic mismatch); fall back to name
|
||||||
|
ascii_name = parts[COL_ASCIINAME].strip()
|
||||||
|
name = parts[COL_NAME].strip()
|
||||||
|
city_name = ascii_name if ascii_name else name
|
||||||
|
country_code = parts[COL_COUNTRY_CODE].strip().upper()
|
||||||
|
if not city_name or not country_code:
|
||||||
|
continue
|
||||||
|
rows.append({
|
||||||
|
"geoname_id": geoname_id,
|
||||||
|
"city_name": city_name,
|
||||||
|
"country_code": country_code,
|
||||||
|
"population": population,
|
||||||
|
"ref_year": REF_YEAR,
|
||||||
|
})
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
landing_dir: Path,
|
||||||
|
year_month: str,
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
session: niquests.Session,
|
||||||
|
) -> dict:
|
||||||
|
"""Download GeoNames cities15000.zip. Skips if already run this month."""
|
||||||
|
username = os.environ.get("GEONAMES_USERNAME", "").strip()
|
||||||
|
if not username:
|
||||||
|
logger.warning("GEONAMES_USERNAME not set — skipping GeoNames extract")
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
last_cursor = get_last_cursor(conn, EXTRACTOR_NAME)
|
||||||
|
if last_cursor == year_month:
|
||||||
|
logger.info("already have data for %s — skipping", year_month)
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
year, month = year_month.split("/")
|
||||||
|
|
||||||
|
# GeoNames bulk downloads don't require the username in the URL for cities15000.zip,
|
||||||
|
# but the username signals acceptance of their terms of use and helps their monitoring.
|
||||||
|
url = f"{DOWNLOAD_URL}?username={username}"
|
||||||
|
logger.info("GET cities15000.zip (~1.5MB compressed)")
|
||||||
|
resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 4)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
assert len(resp.content) > 100_000, (
|
||||||
|
f"cities15000.zip too small ({len(resp.content)} bytes) — download may have failed"
|
||||||
|
)
|
||||||
|
|
||||||
|
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
||||||
|
txt_name = next((n for n in zf.namelist() if n.endswith(".txt")), None)
|
||||||
|
assert txt_name, f"No .txt file in cities15000.zip: {zf.namelist()}"
|
||||||
|
txt_content = zf.read(txt_name)
|
||||||
|
|
||||||
|
rows = _parse_cities_txt(txt_content)
|
||||||
|
assert len(rows) > 5_000, f"Expected >5000 global cities ≥50K pop, got {len(rows)}"
|
||||||
|
logger.info("parsed %d global cities with population ≥%d", len(rows), MIN_POPULATION)
|
||||||
|
|
||||||
|
dest_dir = landing_path(landing_dir, "geonames", year, month)
|
||||||
|
dest = dest_dir / "cities_global.json.gz"
|
||||||
|
payload = json.dumps({"rows": rows, "count": len(rows)}).encode()
|
||||||
|
bytes_written = write_gzip_atomic(dest, payload)
|
||||||
|
logger.info("written %s bytes compressed", f"{bytes_written:,}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"files_written": 1,
|
||||||
|
"files_skipped": 0,
|
||||||
|
"bytes_written": bytes_written,
|
||||||
|
"cursor_value": year_month,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
run_extractor(EXTRACTOR_NAME, extract)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
153
extract/padelnomics_extract/src/padelnomics_extract/ons_uk.py
Normal file
153
extract/padelnomics_extract/src/padelnomics_extract/ons_uk.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
"""ONS (Office for National Statistics) UK population extractor.
|
||||||
|
|
||||||
|
Fetches 2021 Census population by Local Authority District (LAD) from the ONS
|
||||||
|
beta API. No authentication required.
|
||||||
|
|
||||||
|
Landing: {LANDING_DIR}/ons_uk/{year}/{month}/lad_population.json.gz
|
||||||
|
Output: {"rows": [{"lad_code": "E08000003", "lad_name": "Manchester",
|
||||||
|
"population": 553230, "ref_year": 2021,
|
||||||
|
"country_code": "GB"}], "count": N}
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import niquests
|
||||||
|
|
||||||
|
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||||
|
from .utils import get_last_cursor, landing_path, write_gzip_atomic
|
||||||
|
|
||||||
|
logger = setup_logging("padelnomics.extract.ons_uk")
|
||||||
|
|
||||||
|
EXTRACTOR_NAME = "ons_uk"
|
||||||
|
|
||||||
|
# ONS beta API — 2021 Census population estimates by Local Authority District.
|
||||||
|
# TS007A = "Age by single year" dataset; aggregate gives total population per LAD.
|
||||||
|
# We use the observations endpoint which returns flat rows.
|
||||||
|
# limit=500 covers all ~380 LADs in England, Wales, Scotland, and Northern Ireland.
|
||||||
|
ONS_BASE_URL = (
|
||||||
|
"https://api.beta.ons.gov.uk/v1/datasets/TS007A/editions/2021/versions/1"
|
||||||
|
)
|
||||||
|
|
||||||
|
REF_YEAR = 2021
|
||||||
|
MIN_POPULATION = 50_000
|
||||||
|
# ONS rate limit is 120 requests per 10 seconds; a single paginated call is fine.
|
||||||
|
PAGE_SIZE = 500
|
||||||
|
MAX_PAGES = 10 # safety bound; all LADs fit in page 1 at limit=500
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_all_observations(session: niquests.Session) -> list[dict]:
|
||||||
|
"""Fetch all LAD population rows, paginating if needed."""
|
||||||
|
rows: list[dict] = []
|
||||||
|
offset = 0
|
||||||
|
|
||||||
|
for page in range(MAX_PAGES):
|
||||||
|
url = f"{ONS_BASE_URL}/observations?geography=*&age=0&limit={PAGE_SIZE}&offset={offset}"
|
||||||
|
resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
|
||||||
|
observations = data.get("observations", [])
|
||||||
|
if not observations:
|
||||||
|
break
|
||||||
|
|
||||||
|
for obs in observations:
|
||||||
|
# Each observation: {dimensions: [{id: "geography", option: {id: "E08000003", label: "Manchester"}}...], observation: "553230"}
|
||||||
|
geo_dim = next(
|
||||||
|
(d for d in obs.get("dimensions", []) if d.get("dimension_id") == "geography"),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
if not geo_dim:
|
||||||
|
continue
|
||||||
|
lad_code = geo_dim.get("option", {}).get("id", "").strip()
|
||||||
|
lad_name = geo_dim.get("option", {}).get("label", "").strip()
|
||||||
|
if not lad_code or not lad_name:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
population = int(obs.get("observation", "0").replace(",", ""))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
continue
|
||||||
|
rows.append({
|
||||||
|
"lad_code": lad_code,
|
||||||
|
"lad_name": lad_name,
|
||||||
|
"population": population,
|
||||||
|
})
|
||||||
|
|
||||||
|
total = data.get("total_observations", len(rows))
|
||||||
|
offset += len(observations)
|
||||||
|
if offset >= total:
|
||||||
|
break
|
||||||
|
|
||||||
|
logger.info("fetched page %d (%d rows so far)", page + 1, len(rows))
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def _aggregate_by_lad(raw_rows: list[dict]) -> list[dict]:
|
||||||
|
"""Sum population across all age groups per LAD.
|
||||||
|
|
||||||
|
TS007A breaks population down by single year of age, so we need to aggregate.
|
||||||
|
"""
|
||||||
|
totals: dict[str, dict] = {}
|
||||||
|
for row in raw_rows:
|
||||||
|
key = row["lad_code"]
|
||||||
|
if key not in totals:
|
||||||
|
totals[key] = {"lad_code": row["lad_code"], "lad_name": row["lad_name"], "population": 0}
|
||||||
|
totals[key]["population"] += row["population"]
|
||||||
|
return list(totals.values())
|
||||||
|
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
landing_dir: Path,
|
||||||
|
year_month: str,
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
session: niquests.Session,
|
||||||
|
) -> dict:
|
||||||
|
"""Fetch ONS LAD population. Skips if already run this month."""
|
||||||
|
last_cursor = get_last_cursor(conn, EXTRACTOR_NAME)
|
||||||
|
if last_cursor == year_month:
|
||||||
|
logger.info("already have data for %s — skipping", year_month)
|
||||||
|
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||||
|
|
||||||
|
year, month = year_month.split("/")
|
||||||
|
|
||||||
|
logger.info("GET ONS TS007A LAD population (2021 Census)")
|
||||||
|
raw_rows = _fetch_all_observations(session)
|
||||||
|
lad_rows = _aggregate_by_lad(raw_rows)
|
||||||
|
|
||||||
|
filtered = [
|
||||||
|
{
|
||||||
|
"lad_code": r["lad_code"],
|
||||||
|
"lad_name": r["lad_name"],
|
||||||
|
"population": r["population"],
|
||||||
|
"ref_year": REF_YEAR,
|
||||||
|
"country_code": "GB",
|
||||||
|
}
|
||||||
|
for r in lad_rows
|
||||||
|
if r["population"] >= MIN_POPULATION
|
||||||
|
]
|
||||||
|
|
||||||
|
assert len(filtered) > 50, f"Expected >50 UK LADs ≥50K pop, got {len(filtered)}"
|
||||||
|
logger.info("parsed %d UK LADs with population ≥%d", len(filtered), MIN_POPULATION)
|
||||||
|
|
||||||
|
dest_dir = landing_path(landing_dir, "ons_uk", year, month)
|
||||||
|
dest = dest_dir / "lad_population.json.gz"
|
||||||
|
payload = json.dumps({"rows": filtered, "count": len(filtered)}).encode()
|
||||||
|
bytes_written = write_gzip_atomic(dest, payload)
|
||||||
|
logger.info("written %s bytes compressed", f"{bytes_written:,}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"files_written": 1,
|
||||||
|
"files_skipped": 0,
|
||||||
|
"bytes_written": bytes_written,
|
||||||
|
"cursor_value": year_month,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
run_extractor(EXTRACTOR_NAME, extract)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -3,14 +3,17 @@
|
|||||||
-- tracks cities where padel venues actually exist, not an administrative city list.
|
-- tracks cities where padel venues actually exist, not an administrative city list.
|
||||||
--
|
--
|
||||||
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
-- Conformed dimension: used by city_market_profile and all pSEO serving models.
|
||||||
-- Integrates two sources:
|
-- Integrates four sources:
|
||||||
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
-- dim_venues → city list, venue count, coordinates (Playtomic + OSM)
|
||||||
-- stg_income → country-level median income (Eurostat)
|
-- stg_income → country-level median income (Eurostat)
|
||||||
|
-- stg_city_labels → Eurostat city_code → city_name mapping (EU cities)
|
||||||
|
-- stg_population → Eurostat city-level population (EU, joined via city code)
|
||||||
|
-- stg_population_usa → US Census ACS place population
|
||||||
|
-- stg_population_uk → ONS LAD population
|
||||||
|
-- stg_population_geonames → GeoNames global fallback
|
||||||
--
|
--
|
||||||
-- Population note: Eurostat uses coded identifiers (e.g. DE001C = Berlin) with no
|
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0.
|
||||||
-- city name column in the dataset we extract. City-level population requires a
|
-- City name matching is case/whitespace-insensitive within each country.
|
||||||
-- separate code→name lookup extract (future improvement). Population is set to 0
|
|
||||||
-- until that source is available; market_score degrades gracefully.
|
|
||||||
--
|
--
|
||||||
-- Grain: (country_code, city_slug) — two cities in different countries can share a
|
-- Grain: (country_code, city_slug) — two cities in different countries can share a
|
||||||
-- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs.
|
-- city name. QUALIFY enforces no duplicate (country_code, city_slug) pairs.
|
||||||
@@ -42,6 +45,39 @@ country_income AS (
|
|||||||
SELECT country_code, median_income_pps, ref_year AS income_year
|
SELECT country_code, median_income_pps, ref_year AS income_year
|
||||||
FROM staging.stg_income
|
FROM staging.stg_income
|
||||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY country_code ORDER BY ref_year DESC) = 1
|
||||||
|
),
|
||||||
|
-- Eurostat EU population: join city labels (code→name) with population values.
|
||||||
|
-- QUALIFY keeps only the most recent year per (country, city name).
|
||||||
|
eurostat_pop AS (
|
||||||
|
SELECT
|
||||||
|
cl.country_code,
|
||||||
|
cl.city_name,
|
||||||
|
p.population,
|
||||||
|
p.ref_year
|
||||||
|
FROM staging.stg_city_labels cl
|
||||||
|
JOIN staging.stg_population p ON cl.city_code = p.city_code
|
||||||
|
QUALIFY ROW_NUMBER() OVER (
|
||||||
|
PARTITION BY cl.country_code, cl.city_name
|
||||||
|
ORDER BY p.ref_year DESC
|
||||||
|
) = 1
|
||||||
|
),
|
||||||
|
-- US Census ACS population (place-level, filtered to ≥50K)
|
||||||
|
us_pop AS (
|
||||||
|
SELECT city_name, country_code, population, ref_year
|
||||||
|
FROM staging.stg_population_usa
|
||||||
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY place_fips ORDER BY ref_year DESC) = 1
|
||||||
|
),
|
||||||
|
-- ONS UK Local Authority District population
|
||||||
|
uk_pop AS (
|
||||||
|
SELECT lad_name AS city_name, country_code, population, ref_year
|
||||||
|
FROM staging.stg_population_uk
|
||||||
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY lad_code ORDER BY ref_year DESC) = 1
|
||||||
|
),
|
||||||
|
-- GeoNames global fallback (all cities ≥50K)
|
||||||
|
geonames_pop AS (
|
||||||
|
SELECT city_name, country_code, population, ref_year
|
||||||
|
FROM staging.stg_population_geonames
|
||||||
|
QUALIFY ROW_NUMBER() OVER (PARTITION BY geoname_id ORDER BY ref_year DESC) = 1
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
vc.country_code,
|
vc.country_code,
|
||||||
@@ -99,15 +135,43 @@ SELECT
|
|||||||
)) AS country_slug,
|
)) AS country_slug,
|
||||||
vc.centroid_lat AS lat,
|
vc.centroid_lat AS lat,
|
||||||
vc.centroid_lon AS lon,
|
vc.centroid_lon AS lon,
|
||||||
-- Population: requires code→name Eurostat lookup (not yet extracted); defaults to 0.
|
-- Population cascade: Eurostat EU > US Census > ONS UK > GeoNames > 0.
|
||||||
-- market_score uses LOG(GREATEST(population, 1)) so 0 degrades score gracefully.
|
-- City name match is case/whitespace-insensitive within each country.
|
||||||
0::BIGINT AS population,
|
COALESCE(
|
||||||
0::INTEGER AS population_year,
|
ep.population,
|
||||||
|
usa.population,
|
||||||
|
uk.population,
|
||||||
|
gn.population,
|
||||||
|
0
|
||||||
|
)::BIGINT AS population,
|
||||||
|
COALESCE(
|
||||||
|
ep.ref_year,
|
||||||
|
usa.ref_year,
|
||||||
|
uk.ref_year,
|
||||||
|
gn.ref_year,
|
||||||
|
0
|
||||||
|
)::INTEGER AS population_year,
|
||||||
vc.padel_venue_count,
|
vc.padel_venue_count,
|
||||||
ci.median_income_pps,
|
ci.median_income_pps,
|
||||||
ci.income_year
|
ci.income_year
|
||||||
FROM venue_cities vc
|
FROM venue_cities vc
|
||||||
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
LEFT JOIN country_income ci ON vc.country_code = ci.country_code
|
||||||
|
-- Eurostat EU population (via city code→name lookup)
|
||||||
|
LEFT JOIN eurostat_pop ep
|
||||||
|
ON vc.country_code = ep.country_code
|
||||||
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(ep.city_name))
|
||||||
|
-- US Census population
|
||||||
|
LEFT JOIN us_pop usa
|
||||||
|
ON vc.country_code = usa.country_code
|
||||||
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(usa.city_name))
|
||||||
|
-- ONS UK population
|
||||||
|
LEFT JOIN uk_pop uk
|
||||||
|
ON vc.country_code = uk.country_code
|
||||||
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(uk.city_name))
|
||||||
|
-- GeoNames global fallback
|
||||||
|
LEFT JOIN geonames_pop gn
|
||||||
|
ON vc.country_code = gn.country_code
|
||||||
|
AND LOWER(TRIM(vc.city_name)) = LOWER(TRIM(gn.city_name))
|
||||||
-- Enforce grain: if two cities in the same country have the same slug
|
-- Enforce grain: if two cities in the same country have the same slug
|
||||||
-- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues
|
-- (e.g. 'São Paulo' and 'Sao Paulo'), keep the one with more venues
|
||||||
QUALIFY ROW_NUMBER() OVER (
|
QUALIFY ROW_NUMBER() OVER (
|
||||||
|
|||||||
@@ -1,10 +1,11 @@
|
|||||||
-- One Big Table: per-city padel market intelligence.
|
-- One Big Table: per-city padel market intelligence.
|
||||||
-- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints.
|
-- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints.
|
||||||
--
|
--
|
||||||
-- Market score (0–100) is a simple composite:
|
-- Market score v2 (0–100):
|
||||||
-- 40% population (log-scaled, city > 500K = max)
|
-- 30 pts population — log-scaled to 1M+ city ceiling (was 40pts/500K)
|
||||||
-- 40% venue density (courts per 100K residents)
|
-- 25 pts income PPS — normalised to 200 ceiling (covers CH/NO/LU outliers)
|
||||||
-- 20% data confidence (completeness of both population + venue data)
|
-- 30 pts demand — observed occupancy if available, else venue density
|
||||||
|
-- 15 pts data quality — completeness discount, not a market signal
|
||||||
|
|
||||||
MODEL (
|
MODEL (
|
||||||
name serving.city_market_profile,
|
name serving.city_market_profile,
|
||||||
@@ -37,19 +38,41 @@ WITH base AS (
|
|||||||
WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0
|
WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0
|
||||||
WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5
|
WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5
|
||||||
ELSE 0.0
|
ELSE 0.0
|
||||||
END AS data_confidence
|
END AS data_confidence,
|
||||||
|
-- Pricing / occupancy from Playtomic (NULL when no availability data)
|
||||||
|
vpb.median_hourly_rate,
|
||||||
|
vpb.median_peak_rate,
|
||||||
|
vpb.median_offpeak_rate,
|
||||||
|
vpb.median_occupancy_rate,
|
||||||
|
vpb.median_daily_revenue_per_venue,
|
||||||
|
vpb.price_currency
|
||||||
FROM foundation.dim_cities c
|
FROM foundation.dim_cities c
|
||||||
|
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
||||||
|
ON c.country_code = vpb.country_code
|
||||||
|
AND LOWER(TRIM(c.city_name)) = LOWER(TRIM(vpb.city))
|
||||||
WHERE c.padel_venue_count > 0
|
WHERE c.padel_venue_count > 0
|
||||||
),
|
),
|
||||||
scored AS (
|
scored AS (
|
||||||
SELECT *,
|
SELECT *,
|
||||||
ROUND(
|
ROUND(
|
||||||
-- Population component (log scale, 500K+ city → 40 pts)
|
-- Population (30 pts): log-scale, 1M+ city = full marks.
|
||||||
40.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000))
|
-- LN(1) = 0 so unpopulated cities score 0 here — they still score on demand.
|
||||||
-- Density component (5 courts/100K → 40 pts)
|
30.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(1000000))
|
||||||
+ 40.0 * LEAST(1.0, COALESCE(venues_per_100k, 0) / 5.0)
|
-- Economic power (25 pts): income PPS normalised to 200 ceiling.
|
||||||
-- Confidence component
|
-- 200 covers high-income outliers (CH ~190, NO ~180, LU ~200+).
|
||||||
+ 20.0 * data_confidence
|
-- Drives pricing power and willingness-to-pay directly.
|
||||||
|
+ 25.0 * LEAST(1.0, COALESCE(median_income_pps, 100) / 200.0)
|
||||||
|
-- Demand evidence (30 pts): observed occupancy is the best signal
|
||||||
|
-- (proves real demand). If unavailable, venue density is the proxy
|
||||||
|
-- (proves market exists; caps at 4/100K to avoid penalising dense cities).
|
||||||
|
+ 30.0 * CASE
|
||||||
|
WHEN median_occupancy_rate IS NOT NULL
|
||||||
|
THEN LEAST(1.0, median_occupancy_rate / 0.65)
|
||||||
|
ELSE LEAST(1.0, COALESCE(venues_per_100k, 0) / 4.0)
|
||||||
|
END
|
||||||
|
-- Data quality (15 pts): measures completeness, not market quality.
|
||||||
|
-- Reduced from 20pts — kept as confidence discount, not market signal.
|
||||||
|
+ 15.0 * data_confidence
|
||||||
, 1) AS market_score
|
, 1) AS market_score
|
||||||
FROM base
|
FROM base
|
||||||
)
|
)
|
||||||
@@ -69,16 +92,12 @@ SELECT
|
|||||||
s.market_score,
|
s.market_score,
|
||||||
s.median_income_pps,
|
s.median_income_pps,
|
||||||
s.income_year,
|
s.income_year,
|
||||||
-- Playtomic pricing/occupancy (NULL when no availability data)
|
s.median_hourly_rate,
|
||||||
vpb.median_hourly_rate,
|
s.median_peak_rate,
|
||||||
vpb.median_peak_rate,
|
s.median_offpeak_rate,
|
||||||
vpb.median_offpeak_rate,
|
s.median_occupancy_rate,
|
||||||
vpb.median_occupancy_rate,
|
s.median_daily_revenue_per_venue,
|
||||||
vpb.median_daily_revenue_per_venue,
|
s.price_currency,
|
||||||
vpb.price_currency,
|
|
||||||
CURRENT_DATE AS refreshed_date
|
CURRENT_DATE AS refreshed_date
|
||||||
FROM scored s
|
FROM scored s
|
||||||
LEFT JOIN serving.venue_pricing_benchmarks vpb
|
|
||||||
ON s.country_code = vpb.country_code
|
|
||||||
AND LOWER(TRIM(s.city_name)) = LOWER(TRIM(vpb.city))
|
|
||||||
ORDER BY s.market_score DESC
|
ORDER BY s.market_score DESC
|
||||||
|
|||||||
@@ -0,0 +1,31 @@
|
|||||||
|
-- Eurostat SDMX city codelist: city_code → city_name mapping.
|
||||||
|
-- Maps coded identifiers (e.g. DE001C) to human-readable names (e.g. Berlin).
|
||||||
|
-- This is the bridge table that lets stg_population join to dim_cities.
|
||||||
|
--
|
||||||
|
-- Source: data/landing/eurostat_city_labels/{year}/{month}/cities_codelist.json.gz
|
||||||
|
|
||||||
|
MODEL (
|
||||||
|
name staging.stg_city_labels,
|
||||||
|
kind FULL,
|
||||||
|
cron '@daily',
|
||||||
|
grain city_code
|
||||||
|
);
|
||||||
|
|
||||||
|
WITH raw AS (
|
||||||
|
SELECT unnest(rows) AS r
|
||||||
|
FROM read_json(
|
||||||
|
@LANDING_DIR || '/eurostat_city_labels/*/*/cities_codelist.json.gz',
|
||||||
|
auto_detect = true
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
UPPER(TRIM(r ->> 'city_code')) AS city_code,
|
||||||
|
TRIM(r ->> 'city_name') AS city_name,
|
||||||
|
-- Country code is always the first two letters of the city code (e.g. DE001C → DE)
|
||||||
|
UPPER(LEFT(TRIM(r ->> 'city_code'), 2)) AS country_code,
|
||||||
|
CURRENT_DATE AS extracted_date
|
||||||
|
FROM raw
|
||||||
|
WHERE (r ->> 'city_code') IS NOT NULL
|
||||||
|
AND (r ->> 'city_name') IS NOT NULL
|
||||||
|
AND LENGTH(TRIM(r ->> 'city_code')) > 0
|
||||||
|
AND LENGTH(TRIM(r ->> 'city_name')) > 0
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
-- GeoNames global city population (cities15000 bulk dataset, filtered to ≥50K).
|
||||||
|
-- Global fallback for countries not covered by Eurostat, Census, or ONS.
|
||||||
|
-- One row per geoname_id (GeoNames stable numeric identifier).
|
||||||
|
--
|
||||||
|
-- Source: data/landing/geonames/{year}/{month}/cities_global.json.gz
|
||||||
|
|
||||||
|
MODEL (
|
||||||
|
name staging.stg_population_geonames,
|
||||||
|
kind FULL,
|
||||||
|
cron '@daily',
|
||||||
|
grain geoname_id
|
||||||
|
);
|
||||||
|
|
||||||
|
WITH parsed AS (
|
||||||
|
SELECT
|
||||||
|
TRY_CAST(row ->> 'geoname_id' AS INTEGER) AS geoname_id,
|
||||||
|
row ->> 'city_name' AS city_name,
|
||||||
|
row ->> 'country_code' AS country_code,
|
||||||
|
TRY_CAST(row ->> 'population' AS BIGINT) AS population,
|
||||||
|
TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year,
|
||||||
|
CURRENT_DATE AS extracted_date
|
||||||
|
FROM (
|
||||||
|
SELECT UNNEST(rows) AS row
|
||||||
|
FROM read_json(
|
||||||
|
@LANDING_DIR || '/geonames/*/*/cities_global.json.gz',
|
||||||
|
auto_detect = true
|
||||||
|
)
|
||||||
|
)
|
||||||
|
WHERE (row ->> 'geoname_id') IS NOT NULL
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
geoname_id,
|
||||||
|
TRIM(city_name) AS city_name,
|
||||||
|
UPPER(country_code) AS country_code,
|
||||||
|
population,
|
||||||
|
ref_year,
|
||||||
|
extracted_date
|
||||||
|
FROM parsed
|
||||||
|
WHERE population IS NOT NULL
|
||||||
|
AND population > 0
|
||||||
|
AND geoname_id IS NOT NULL
|
||||||
|
AND city_name IS NOT NULL
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
-- ONS 2021 Census population by Local Authority District (LAD).
|
||||||
|
-- Reads pre-processed landing zone JSON from ons_uk extractor.
|
||||||
|
-- One row per (lad_code, ref_year) — LAD code is the ONS area identifier.
|
||||||
|
--
|
||||||
|
-- Source: data/landing/ons_uk/{year}/{month}/lad_population.json.gz
|
||||||
|
|
||||||
|
MODEL (
|
||||||
|
name staging.stg_population_uk,
|
||||||
|
kind FULL,
|
||||||
|
cron '@daily',
|
||||||
|
grain (lad_code, ref_year)
|
||||||
|
);
|
||||||
|
|
||||||
|
WITH parsed AS (
|
||||||
|
SELECT
|
||||||
|
row ->> 'lad_code' AS lad_code,
|
||||||
|
row ->> 'lad_name' AS lad_name,
|
||||||
|
TRY_CAST(row ->> 'population' AS BIGINT) AS population,
|
||||||
|
TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year,
|
||||||
|
row ->> 'country_code' AS country_code,
|
||||||
|
CURRENT_DATE AS extracted_date
|
||||||
|
FROM (
|
||||||
|
SELECT UNNEST(rows) AS row
|
||||||
|
FROM read_json(
|
||||||
|
@LANDING_DIR || '/ons_uk/*/*/lad_population.json.gz',
|
||||||
|
auto_detect = true
|
||||||
|
)
|
||||||
|
)
|
||||||
|
WHERE (row ->> 'lad_code') IS NOT NULL
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
UPPER(TRIM(lad_code)) AS lad_code,
|
||||||
|
TRIM(lad_name) AS lad_name,
|
||||||
|
population,
|
||||||
|
ref_year,
|
||||||
|
UPPER(country_code) AS country_code,
|
||||||
|
extracted_date
|
||||||
|
FROM parsed
|
||||||
|
WHERE population IS NOT NULL
|
||||||
|
AND population > 0
|
||||||
|
AND lad_code IS NOT NULL
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
-- US Census ACS 5-year place-level population.
|
||||||
|
-- Reads pre-processed landing zone JSON from census_usa extractor.
|
||||||
|
-- One row per (place_fips, ref_year) — surrogate key is the Census FIPS code.
|
||||||
|
--
|
||||||
|
-- Source: data/landing/census_usa/{year}/{month}/acs5_places.json.gz
|
||||||
|
|
||||||
|
MODEL (
|
||||||
|
name staging.stg_population_usa,
|
||||||
|
kind FULL,
|
||||||
|
cron '@daily',
|
||||||
|
grain (place_fips, ref_year)
|
||||||
|
);
|
||||||
|
|
||||||
|
WITH parsed AS (
|
||||||
|
SELECT
|
||||||
|
row ->> 'city_name' AS city_name,
|
||||||
|
row ->> 'state_fips' AS state_fips,
|
||||||
|
row ->> 'place_fips' AS place_fips,
|
||||||
|
TRY_CAST(row ->> 'population' AS BIGINT) AS population,
|
||||||
|
TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year,
|
||||||
|
row ->> 'country_code' AS country_code,
|
||||||
|
CURRENT_DATE AS extracted_date
|
||||||
|
FROM (
|
||||||
|
SELECT UNNEST(rows) AS row
|
||||||
|
FROM read_json(
|
||||||
|
@LANDING_DIR || '/census_usa/*/*/acs5_places.json.gz',
|
||||||
|
auto_detect = true
|
||||||
|
)
|
||||||
|
)
|
||||||
|
WHERE (row ->> 'place_fips') IS NOT NULL
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
TRIM(city_name) AS city_name,
|
||||||
|
state_fips,
|
||||||
|
place_fips,
|
||||||
|
population,
|
||||||
|
ref_year,
|
||||||
|
UPPER(country_code) AS country_code,
|
||||||
|
extracted_date
|
||||||
|
FROM parsed
|
||||||
|
WHERE population IS NOT NULL
|
||||||
|
AND population > 0
|
||||||
|
AND place_fips IS NOT NULL
|
||||||
@@ -81,24 +81,24 @@
|
|||||||
<table class="table text-sm">
|
<table class="table text-sm">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
|
<th></th>
|
||||||
{% for col in columns %}
|
{% for col in columns %}
|
||||||
<th>{{ col.name }}</th>
|
<th>{{ col.name }}</th>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<th>Preview</th>
|
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for row in sample_rows %}
|
{% for row in sample_rows %}
|
||||||
<tr>
|
<tr>
|
||||||
|
<td>
|
||||||
|
<a href="{{ url_for('admin.template_preview', slug=config_data.slug, row_key=row[config_data.natural_key]) }}"
|
||||||
|
class="btn-outline btn-sm">Preview</a>
|
||||||
|
</td>
|
||||||
{% for col in columns %}
|
{% for col in columns %}
|
||||||
<td class="mono" style="max-width:200px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap">
|
<td class="mono" style="max-width:200px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap">
|
||||||
{{ row[col.name] }}
|
{{ row[col.name] }}
|
||||||
</td>
|
</td>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<td>
|
|
||||||
<a href="{{ url_for('admin.template_preview', slug=config_data.slug, row_key=row[config_data.natural_key]) }}"
|
|
||||||
class="btn-outline btn-sm">Preview</a>
|
|
||||||
</td>
|
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|||||||
@@ -133,10 +133,22 @@ def _validate_table_name(data_table: str) -> None:
|
|||||||
|
|
||||||
# ── Rendering helpers ────────────────────────────────────────────────────────
|
# ── Rendering helpers ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _datetimeformat(value: str, fmt: str = "%Y-%m-%d") -> str:
|
||||||
|
"""Jinja2 filter: format a date string (or 'now') with strftime."""
|
||||||
|
from datetime import datetime, UTC
|
||||||
|
|
||||||
|
if value == "now":
|
||||||
|
dt = datetime.now(UTC)
|
||||||
|
else:
|
||||||
|
dt = datetime.fromisoformat(value)
|
||||||
|
return dt.strftime(fmt)
|
||||||
|
|
||||||
|
|
||||||
def _render_pattern(pattern: str, context: dict) -> str:
|
def _render_pattern(pattern: str, context: dict) -> str:
|
||||||
"""Render a Jinja2 pattern string with context variables."""
|
"""Render a Jinja2 pattern string with context variables."""
|
||||||
env = Environment()
|
env = Environment()
|
||||||
env.filters["slugify"] = slugify
|
env.filters["slugify"] = slugify
|
||||||
|
env.filters["datetimeformat"] = _datetimeformat
|
||||||
return env.from_string(pattern).render(**context)
|
return env.from_string(pattern).render(**context)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -85,6 +85,9 @@ DEFAULTS = {
|
|||||||
"holdYears": 5,
|
"holdYears": 5,
|
||||||
"exitMultiple": 6,
|
"exitMultiple": 6,
|
||||||
"annualRevGrowth": 2,
|
"annualRevGrowth": 2,
|
||||||
|
"annualOpexGrowth": 2,
|
||||||
|
"hurdleRate": 12,
|
||||||
|
"interestOnlyMonths": 0,
|
||||||
"budgetTarget": 0,
|
"budgetTarget": 0,
|
||||||
"country": "DE",
|
"country": "DE",
|
||||||
"permitsCompliance": 12000,
|
"permitsCompliance": 12000,
|
||||||
@@ -336,6 +339,9 @@ def calc(s: dict, lang: str = "en") -> dict:
|
|||||||
d["netCFMonth"] = d["ebitdaMonth"] - d["monthlyPayment"]
|
d["netCFMonth"] = d["ebitdaMonth"] - d["monthlyPayment"]
|
||||||
|
|
||||||
# -- 60-month cash flow projection --
|
# -- 60-month cash flow projection --
|
||||||
|
# Fix 1: annualRevGrowth applied to all revenue streams.
|
||||||
|
# Fix 8: annualOpexGrowth applied to all operating costs (utilities, staff, insurance inflate).
|
||||||
|
# Fix 10: interest-only period — first N months pay only interest, not P+I.
|
||||||
months: list[dict] = []
|
months: list[dict] = []
|
||||||
for m in range(1, 61):
|
for m in range(1, 61):
|
||||||
cm = (m - 1) % 12
|
cm = (m - 1) % 12
|
||||||
@@ -345,19 +351,32 @@ def calc(s: dict, lang: str = "en") -> dict:
|
|||||||
eff_util = (s["utilTarget"] / 100) * ramp * seas
|
eff_util = (s["utilTarget"] / 100) * ramp * seas
|
||||||
avail = s["hoursPerDay"] * dpm * total_courts if seas > 0 else 0
|
avail = s["hoursPerDay"] * dpm * total_courts if seas > 0 else 0
|
||||||
booked = avail * eff_util
|
booked = avail * eff_util
|
||||||
court_rev = booked * w_rate
|
|
||||||
|
# Revenue growth compounds from Year 2 onwards (Year 1 = base)
|
||||||
|
rev_growth = math.pow(1 + s["annualRevGrowth"] / 100, max(0, yr - 1))
|
||||||
|
court_rev = booked * w_rate * rev_growth
|
||||||
fees = -court_rev * (s["bookingFee"] / 100)
|
fees = -court_rev * (s["bookingFee"] / 100)
|
||||||
ancillary = booked * (
|
ancillary = booked * (
|
||||||
(s["racketRentalRate"] / 100) * s["racketQty"] * s["racketPrice"]
|
(s["racketRentalRate"] / 100) * s["racketQty"] * s["racketPrice"]
|
||||||
+ (s["ballRate"] / 100) * (s["ballPrice"] - s["ballCost"])
|
+ (s["ballRate"] / 100) * (s["ballPrice"] - s["ballCost"])
|
||||||
)
|
) * rev_growth
|
||||||
membership = total_courts * s["membershipRevPerCourt"] * (ramp if seas > 0 else 0)
|
membership = total_courts * s["membershipRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth
|
||||||
fb = total_courts * s["fbRevPerCourt"] * (ramp if seas > 0 else 0)
|
fb = total_courts * s["fbRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth
|
||||||
coaching = total_courts * s["coachingRevPerCourt"] * (ramp if seas > 0 else 0)
|
coaching = total_courts * s["coachingRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth
|
||||||
retail = total_courts * s["retailRevPerCourt"] * (ramp if seas > 0 else 0)
|
retail = total_courts * s["retailRevPerCourt"] * (ramp if seas > 0 else 0) * rev_growth
|
||||||
total_rev = court_rev + fees + ancillary + membership + fb + coaching + retail
|
total_rev = court_rev + fees + ancillary + membership + fb + coaching + retail
|
||||||
opex_val = -d["opex"]
|
|
||||||
|
# OPEX inflates from Year 2 onwards (utilities, staff, insurance)
|
||||||
|
opex_growth = math.pow(1 + s["annualOpexGrowth"] / 100, max(0, yr - 1))
|
||||||
|
opex_val = -(d["opex"] * opex_growth)
|
||||||
|
|
||||||
|
# Fix 10: interest-only period — lower debt service during construction/ramp
|
||||||
|
if m <= s["interestOnlyMonths"] and d["loanAmount"] > 0:
|
||||||
|
# Interest-only payment: loan balance × monthly rate
|
||||||
|
loan = -(d["loanAmount"] * s["interestRate"] / 100 / 12)
|
||||||
|
else:
|
||||||
loan = -d["monthlyPayment"]
|
loan = -d["monthlyPayment"]
|
||||||
|
|
||||||
ebitda = total_rev + opex_val
|
ebitda = total_rev + opex_val
|
||||||
ncf = ebitda + loan
|
ncf = ebitda + loan
|
||||||
prev = months[-1] if months else None
|
prev = months[-1] if months else None
|
||||||
@@ -387,29 +406,95 @@ def calc(s: dict, lang: str = "en") -> dict:
|
|||||||
d["annuals"] = annuals
|
d["annuals"] = annuals
|
||||||
|
|
||||||
# -- Returns & exit --
|
# -- Returns & exit --
|
||||||
y3_ebitda = annuals[2]["ebitda"] if len(annuals) >= 3 else 0
|
# Fix 5: use terminal year EBITDA (exit year), not hardcoded Year 3
|
||||||
d["stabEbitda"] = y3_ebitda
|
exit_yr_idx = min(s["holdYears"] - 1, len(annuals) - 1)
|
||||||
d["exitValue"] = y3_ebitda * s["exitMultiple"]
|
d["stabEbitda"] = annuals[exit_yr_idx]["ebitda"]
|
||||||
d["remainingLoan"] = d["loanAmount"] * max(0, 1 - s["holdYears"] / (max(s["loanTerm"], 1) * 1.5))
|
d["exitValue"] = d["stabEbitda"] * s["exitMultiple"]
|
||||||
|
|
||||||
|
# Fix 4: remaining loan via amortization math (PV of remaining payments),
|
||||||
|
# replacing the heuristic loanAmount * max(0, 1 - holdYears / (loanTerm * 1.5))
|
||||||
|
k = s["holdYears"] * 12 # number of P+I payments made (after interest-only period)
|
||||||
|
n = max(s["loanTerm"], 1) * 12
|
||||||
|
r_monthly_loan = s["interestRate"] / 100 / 12
|
||||||
|
if r_monthly_loan > 0 and d["loanAmount"] > 0 and n > k:
|
||||||
|
d["remainingLoan"] = _round(
|
||||||
|
d["monthlyPayment"] * (1 - math.pow(1 + r_monthly_loan, -(n - k))) / r_monthly_loan
|
||||||
|
)
|
||||||
|
elif d["loanAmount"] > 0 and n > k:
|
||||||
|
# Zero-interest loan: straight-line amortization
|
||||||
|
d["remainingLoan"] = _round(d["loanAmount"] * (n - k) / n)
|
||||||
|
else:
|
||||||
|
d["remainingLoan"] = 0
|
||||||
|
|
||||||
d["netExit"] = d["exitValue"] - d["remainingLoan"]
|
d["netExit"] = d["exitValue"] - d["remainingLoan"]
|
||||||
|
|
||||||
irr_cfs = [-d["capex"]]
|
# Fix 2: equity IRR — use equity invested as initial outflow (not full capex).
|
||||||
|
# NCFs are already post-debt-service (levered), so the denominator must match.
|
||||||
|
# Using capex here would produce a hybrid metric that's neither equity IRR
|
||||||
|
# nor project IRR — it systematically understates returns for leveraged deals.
|
||||||
|
irr_cfs = [-d["equity"]]
|
||||||
for y in range(s["holdYears"]):
|
for y in range(s["holdYears"]):
|
||||||
ycf = annuals[y]["ncf"] if y < len(annuals) else (annuals[-1]["ncf"] if annuals else 0)
|
ycf = annuals[y]["ncf"] if y < len(annuals) else (annuals[-1]["ncf"] if annuals else 0)
|
||||||
if y == s["holdYears"] - 1:
|
if y == s["holdYears"] - 1:
|
||||||
irr_cfs.append(ycf + d["netExit"])
|
irr_cfs.append(ycf + d["netExit"])
|
||||||
else:
|
else:
|
||||||
irr_cfs.append(ycf)
|
irr_cfs.append(ycf)
|
||||||
|
|
||||||
d["irr"] = calc_irr(irr_cfs)
|
d["irr"] = calc_irr(irr_cfs)
|
||||||
|
|
||||||
|
# Project IRR (unlevered): uses full capex as outflow and EBITDA as cash flows.
|
||||||
|
# Useful for lender analysis and comparing across capital structures.
|
||||||
|
unlevered_cfs = [-d["capex"]]
|
||||||
|
for y in range(s["holdYears"]):
|
||||||
|
ya = annuals[y] if y < len(annuals) else annuals[-1]
|
||||||
|
if y == s["holdYears"] - 1:
|
||||||
|
unlevered_cfs.append(ya["ebitda"] + d["netExit"])
|
||||||
|
else:
|
||||||
|
unlevered_cfs.append(ya["ebitda"])
|
||||||
|
d["projectIrr"] = calc_irr(unlevered_cfs)
|
||||||
|
|
||||||
|
# Fix 3: NPV at hurdle rate (discounts equity NCFs + exit at hurdleRate)
|
||||||
|
r_hurdle_monthly = math.pow(1 + s["hurdleRate"] / 100, 1 / 12) - 1
|
||||||
|
pv_ncf = sum(m["ncf"] / math.pow(1 + r_hurdle_monthly, m["m"]) for m in months)
|
||||||
|
pv_exit = d["netExit"] / math.pow(1 + s["hurdleRate"] / 100, s["holdYears"])
|
||||||
|
d["npv"] = _round(-d["equity"] + pv_ncf + pv_exit)
|
||||||
|
d["npvPositive"] = d["npv"] >= 0
|
||||||
|
|
||||||
d["totalReturned"] = sum(irr_cfs[1:])
|
d["totalReturned"] = sum(irr_cfs[1:])
|
||||||
d["moic"] = d["totalReturned"] / d["capex"] if d["capex"] > 0 else 0
|
|
||||||
|
# Fix 6: leveraged MOIC (equity cash flows / equity invested — what the investor earns).
|
||||||
|
# Also keep project MOIC (total returns / capex) for reference.
|
||||||
|
equity_cfs = irr_cfs[1:]
|
||||||
|
d["moic"] = sum(equity_cfs) / d["equity"] if d["equity"] > 0 else 0
|
||||||
|
d["projectMoic"] = d["totalReturned"] / d["capex"] if d["capex"] > 0 else 0
|
||||||
|
|
||||||
|
# Fix 7: return decomposition / value bridge (PE-style attribution).
|
||||||
|
# Shows what drove equity returns: operational improvement vs. financial leverage.
|
||||||
|
entry_ebitda = annuals[0]["ebitda"] if annuals else 0
|
||||||
|
ebitda_growth_value = (d["stabEbitda"] - entry_ebitda) * s["exitMultiple"]
|
||||||
|
deleverage_value = d["loanAmount"] - d["remainingLoan"]
|
||||||
|
d["valueDrivers"] = {
|
||||||
|
"ebitda_growth": _round(ebitda_growth_value),
|
||||||
|
"deleverage": _round(deleverage_value),
|
||||||
|
"entry_equity": d["equity"],
|
||||||
|
"exit_equity": _round(d["netExit"]),
|
||||||
|
}
|
||||||
|
|
||||||
d["dscr"] = [
|
d["dscr"] = [
|
||||||
{"year": a["year"], "dscr": a["ebitda"] / a["ds"] if a["ds"] > 0 else 999}
|
{"year": a["year"], "dscr": a["ebitda"] / a["ds"] if a["ds"] > 0 else 999}
|
||||||
for a in annuals
|
for a in annuals
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Fix 9: LTV and DSCR warnings for lender compliance thresholds
|
||||||
|
d["ltvWarning"] = d["ltv"] > 0.75 # above typical commercial RE lending limit
|
||||||
|
d["dscrWarning"] = any(row["dscr"] < 1.25 for row in d["dscr"] if row["dscr"] < 999)
|
||||||
|
d["dscrMinYear"] = None
|
||||||
|
if d["dscrWarning"]:
|
||||||
|
d["dscrMinYear"] = min(
|
||||||
|
(row["year"] for row in d["dscr"] if row["dscr"] < 999),
|
||||||
|
key=lambda yr: next(r["dscr"] for r in d["dscr"] if r["year"] == yr),
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
payback_idx = -1
|
payback_idx = -1
|
||||||
for i, m in enumerate(months):
|
for i, m in enumerate(months):
|
||||||
if m["cum"] >= 0:
|
if m["cum"] >= 0:
|
||||||
|
|||||||
@@ -61,11 +61,17 @@
|
|||||||
<div class="metric-card__label">DSCR (Y3) <span class="ti">i<span class="tp">{{ t.tip_result_dscr }}</span></span></div>
|
<div class="metric-card__label">DSCR (Y3) <span class="ti">i<span class="tp">{{ t.tip_result_dscr }}</span></span></div>
|
||||||
<div class="metric-card__value {{ 'c-green' if y3_dscr >= 1.2 else 'c-red' }}">{{ '∞' if y3_dscr > 99 else y3_dscr | fmt_x }}</div>
|
<div class="metric-card__value {{ 'c-green' if y3_dscr >= 1.2 else 'c-red' }}">{{ '∞' if y3_dscr > 99 else y3_dscr | fmt_x }}</div>
|
||||||
<div class="metric-card__sub">Min 1.2x for banks</div>
|
<div class="metric-card__sub">Min 1.2x for banks</div>
|
||||||
|
{% if d.dscrWarning %}
|
||||||
|
<div class="metric-card__warn c-red" style="font-size:10px;margin-top:4px">⚠ DSCR < 1.25x in Y{{ d.dscrMinYear }} — bank covenant breach risk</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="metric-card metric-card-sm">
|
<div class="metric-card metric-card-sm">
|
||||||
<div class="metric-card__label">LTV</div>
|
<div class="metric-card__label">LTV</div>
|
||||||
<div class="metric-card__value c-head">{{ d.ltv | fmt_pct }}</div>
|
<div class="metric-card__value {{ 'c-amber' if d.ltvWarning else 'c-head' }}">{{ d.ltv | fmt_pct }}</div>
|
||||||
<div class="metric-card__sub">Loan ÷ Total Investment</div>
|
<div class="metric-card__sub">Loan ÷ Total Investment</div>
|
||||||
|
{% if d.ltvWarning %}
|
||||||
|
<div class="metric-card__warn c-amber" style="font-size:10px;margin-top:4px">⚠ LTV > 75% — above typical commercial lending limit</div>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="metric-card metric-card-sm">
|
<div class="metric-card metric-card-sm">
|
||||||
<div class="metric-card__label">Debt Yield <span class="ti">i<span class="tp">{{ t.tip_result_debt_yield }}</span></span></div>
|
<div class="metric-card__label">Debt Yield <span class="ti">i<span class="tp">{{ t.tip_result_debt_yield }}</span></span></div>
|
||||||
@@ -101,7 +107,7 @@
|
|||||||
<div class="metric-card metric-card-sm">
|
<div class="metric-card metric-card-sm">
|
||||||
<div class="metric-card__label">Exit Value</div>
|
<div class="metric-card__label">Exit Value</div>
|
||||||
<div class="metric-card__value c-head">{{ d.exitValue | fmt_k }}</div>
|
<div class="metric-card__value c-head">{{ d.exitValue | fmt_k }}</div>
|
||||||
<div class="metric-card__sub">{{ s.exitMultiple }}x Y3 EBITDA</div>
|
<div class="metric-card__sub">{{ s.exitMultiple }}x Y{{ s.holdYears }} EBITDA</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,24 +1,24 @@
|
|||||||
<div class="grid-4 mb-4">
|
<div class="grid-4 mb-4">
|
||||||
<div class="metric-card">
|
<div class="metric-card">
|
||||||
<div class="metric-card__label">{{ t.card_irr }} <span class="ti">i<span class="tp">{{ t.tip_result_irr }}</span></span></div>
|
<div class="metric-card__label">{{ t.card_irr }} (Equity) <span class="ti">i<span class="tp">{{ t.tip_result_irr }}</span></span></div>
|
||||||
<div class="metric-card__value {{ 'c-green' if d.irr_ok and d.irr > 0.2 else 'c-red' }}">{{ d.irr | fmt_pct if d.irr_ok else 'N/A' }}</div>
|
<div class="metric-card__value {{ 'c-green' if d.irr_ok and d.irr > 0.2 else 'c-red' }}">{{ d.irr | fmt_pct if d.irr_ok else 'N/A' }}</div>
|
||||||
<div class="metric-card__sub">{{ '✓ Above 20%' if d.irr_ok and d.irr > 0.2 else '✗ Below target' }}</div>
|
<div class="metric-card__sub">{{ '✓ Above 20%' if d.irr_ok and d.irr > 0.2 else '✗ Below target' }}</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="metric-card">
|
<div class="metric-card">
|
||||||
<div class="metric-card__label">{{ t.card_moic }} <span class="ti">i<span class="tp">{{ t.tip_result_moic }}</span></span></div>
|
<div class="metric-card__label">{{ t.card_moic }} (Equity) <span class="ti">i<span class="tp">{{ t.tip_result_moic }}</span></span></div>
|
||||||
<div class="metric-card__value {{ 'c-green' if d.moic > 2 else 'c-red' }}">{{ d.moic | fmt_x }}</div>
|
<div class="metric-card__value {{ 'c-green' if d.moic > 2 else 'c-red' }}">{{ d.moic | fmt_x }}</div>
|
||||||
<div class="metric-card__sub">{{ '✓ Above 2.0x' if d.moic > 2 else '✗ Below 2.0x' }}</div>
|
<div class="metric-card__sub">{{ '✓ Above 2.0x' if d.moic > 2 else '✗ Below 2.0x' }}</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="metric-card">
|
||||||
|
<div class="metric-card__label">NPV <span class="ti">i<span class="tp">At {{ s.hurdleRate }}% hurdle rate</span></span></div>
|
||||||
|
<div class="metric-card__value {{ 'c-green' if d.npvPositive else 'c-red' }}">{{ d.npv | fmt_k }}</div>
|
||||||
|
<div class="metric-card__sub">{{ '✓ Value-creating' if d.npvPositive else '✗ Destroys value' }} at {{ s.hurdleRate }}%</div>
|
||||||
|
</div>
|
||||||
<div class="metric-card">
|
<div class="metric-card">
|
||||||
<div class="metric-card__label">{{ t.card_break_even }} <span class="ti">i<span class="tp">{{ t.tip_result_break_even }}</span></span></div>
|
<div class="metric-card__label">{{ t.card_break_even }} <span class="ti">i<span class="tp">{{ t.tip_result_break_even }}</span></span></div>
|
||||||
<div class="metric-card__value {{ 'c-green' if d.breakEvenUtil < 0.35 else 'c-amber' }}">{{ d.breakEvenUtil | fmt_pct }}</div>
|
<div class="metric-card__value {{ 'c-green' if d.breakEvenUtil < 0.35 else 'c-amber' }}">{{ d.breakEvenUtil | fmt_pct }}</div>
|
||||||
<div class="metric-card__sub">{{ d.breakEvenHrsPerCourt | round(1) }} hrs/court/day</div>
|
<div class="metric-card__sub">{{ d.breakEvenHrsPerCourt | round(1) }} hrs/court/day</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="metric-card">
|
|
||||||
<div class="metric-card__label">{{ t.card_cash_on_cash }} <span class="ti">i<span class="tp">{{ t.tip_result_coc }}</span></span></div>
|
|
||||||
<div class="metric-card__value {{ 'c-green' if d.cashOnCash > 0.15 else 'c-amber' }}">{{ d.cashOnCash | fmt_pct }}</div>
|
|
||||||
<div class="metric-card__sub">Year 3 NCF ÷ Equity</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="grid-2 mb-4">
|
<div class="grid-2 mb-4">
|
||||||
@@ -33,8 +33,9 @@
|
|||||||
(t.wf_net_exit, d.netExit | int | fmt_currency, 'c-green' if d.netExit > 0 else 'c-red'),
|
(t.wf_net_exit, d.netExit | int | fmt_currency, 'c-green' if d.netExit > 0 else 'c-red'),
|
||||||
(t.wf_cum_cf, (d.totalReturned - d.netExit) | int | fmt_currency, 'c-head'),
|
(t.wf_cum_cf, (d.totalReturned - d.netExit) | int | fmt_currency, 'c-head'),
|
||||||
(t.wf_total_returns, d.totalReturned | int | fmt_currency, 'c-green' if d.totalReturned > 0 else 'c-red'),
|
(t.wf_total_returns, d.totalReturned | int | fmt_currency, 'c-green' if d.totalReturned > 0 else 'c-red'),
|
||||||
(t.wf_investment, d.capex | fmt_currency, 'c-head'),
|
('Equity invested', d.equity | fmt_currency, 'c-head'),
|
||||||
(t.wf_moic, d.moic | fmt_x, 'c-green' if d.moic > 2 else 'c-red'),
|
('Equity MOIC', d.moic | fmt_x, 'c-green' if d.moic > 2 else 'c-red'),
|
||||||
|
('Project MOIC (on CAPEX)', d.projectMoic | fmt_x, 'c-head'),
|
||||||
] %}
|
] %}
|
||||||
{% for label, value, cls in wf_rows %}
|
{% for label, value, cls in wf_rows %}
|
||||||
<div class="waterfall-row">
|
<div class="waterfall-row">
|
||||||
@@ -44,12 +45,50 @@
|
|||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-container__label" style="font-size:10px">Value Bridge (Equity Attribution)</div>
|
||||||
|
<div id="valueBridge" style="margin-top:10px">
|
||||||
|
{% set vd = d.valueDrivers %}
|
||||||
|
<div class="waterfall-row">
|
||||||
|
<span class="waterfall-row__label">Equity invested</span>
|
||||||
|
<span class="waterfall-row__value c-head">{{ vd.entry_equity | fmt_currency }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="waterfall-row">
|
||||||
|
<span class="waterfall-row__label">+ EBITDA growth value <span class="ti" style="font-size:10px">i<span class="tp">Improvement in EBITDA × exit multiple</span></span></span>
|
||||||
|
<span class="waterfall-row__value {{ 'c-green' if vd.ebitda_growth >= 0 else 'c-red' }}">{{ vd.ebitda_growth | fmt_currency }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="waterfall-row">
|
||||||
|
<span class="waterfall-row__label">+ Debt paydown <span class="ti" style="font-size:10px">i<span class="tp">Loan balance reduction over hold period</span></span></span>
|
||||||
|
<span class="waterfall-row__value c-green">{{ vd.deleverage | fmt_currency }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="waterfall-row" style="border-top:1px solid var(--c-border);margin-top:4px;padding-top:4px">
|
||||||
|
<span class="waterfall-row__label"><b>Net exit proceeds</b></span>
|
||||||
|
<span class="waterfall-row__value {{ 'c-green' if vd.exit_equity > vd.entry_equity else 'c-red' }}"><b>{{ vd.exit_equity | fmt_currency }}</b></span>
|
||||||
|
</div>
|
||||||
|
<div class="waterfall-row" style="margin-top:8px">
|
||||||
|
<span class="waterfall-row__label">Project IRR (unlevered)</span>
|
||||||
|
<span class="waterfall-row__value c-head">{{ d.projectIrr | fmt_pct }}</span>
|
||||||
|
</div>
|
||||||
|
<div class="waterfall-row">
|
||||||
|
<span class="waterfall-row__label">Equity IRR (levered)</span>
|
||||||
|
<span class="waterfall-row__value {{ 'c-green' if d.irr_ok and d.irr > 0.2 else 'c-red' }}">{{ d.irr | fmt_pct if d.irr_ok else 'N/A' }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="grid-2 mb-4">
|
||||||
<div class="chart-container">
|
<div class="chart-container">
|
||||||
<div class="chart-container__label">{{ t.planner_chart_dscr }}</div>
|
<div class="chart-container__label">{{ t.planner_chart_dscr }}</div>
|
||||||
<div class="chart-h-44 chart-container__canvas"><canvas id="chartDSCR"></canvas></div>
|
<div class="chart-h-44 chart-container__canvas"><canvas id="chartDSCR"></canvas></div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="chart-container">
|
||||||
|
<div class="chart-container__label">Cash Flow Cumulative</div>
|
||||||
|
<div class="chart-h-44 chart-container__canvas"><canvas id="chartCum"></canvas></div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script type="application/json" id="chartDSCR-data">{{ d.dscr_chart | tojson }}</script>
|
<script type="application/json" id="chartDSCR-data">{{ d.dscr_chart | tojson }}</script>
|
||||||
|
<script type="application/json" id="chartCum-data">{{ d.cum_chart | tojson }}</script>
|
||||||
|
|
||||||
<div class="mb-section">
|
<div class="mb-section">
|
||||||
<div class="section-header"><h3>{{ t.planner_section_util_sensitivity }}</h3></div>
|
<div class="section-header"><h3>{{ t.planner_section_util_sensitivity }}</h3></div>
|
||||||
|
|||||||
@@ -329,6 +329,7 @@
|
|||||||
{{ slider('interestRate', t.sl_interest_rate, 0, 15, 0.1, s.interestRate, t.tip_interest_rate) }}
|
{{ slider('interestRate', t.sl_interest_rate, 0, 15, 0.1, s.interestRate, t.tip_interest_rate) }}
|
||||||
{{ slider('loanTerm', t.sl_loan_term, 0, 30, 1, s.loanTerm, t.tip_loan_term) }}
|
{{ slider('loanTerm', t.sl_loan_term, 0, 30, 1, s.loanTerm, t.tip_loan_term) }}
|
||||||
{{ slider('constructionMonths', t.sl_construction_months, 0, 24, 1, s.constructionMonths, t.tip_construction_months) }}
|
{{ slider('constructionMonths', t.sl_construction_months, 0, 24, 1, s.constructionMonths, t.tip_construction_months) }}
|
||||||
|
{{ slider('interestOnlyMonths', t.sl_interest_only_months|default('Interest-Only Period (mo)'), 0, 24, 1, s.interestOnlyMonths, t.tip_interest_only_months|default('Months of interest-only payments before P+I amortization begins. Reduces early cash flow drag during ramp.')) }}
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
@@ -338,6 +339,8 @@
|
|||||||
{{ slider('holdYears', t.sl_hold_years, 1, 20, 1, s.holdYears, t.tip_hold_years) }}
|
{{ slider('holdYears', t.sl_hold_years, 1, 20, 1, s.holdYears, t.tip_hold_years) }}
|
||||||
{{ slider('exitMultiple', t.sl_exit_multiple, 0, 20, 0.5, s.exitMultiple, t.tip_exit_multiple) }}
|
{{ slider('exitMultiple', t.sl_exit_multiple, 0, 20, 0.5, s.exitMultiple, t.tip_exit_multiple) }}
|
||||||
{{ slider('annualRevGrowth', t.sl_annual_rev_growth, 0, 15, 0.5, s.annualRevGrowth, t.tip_annual_rev_growth) }}
|
{{ slider('annualRevGrowth', t.sl_annual_rev_growth, 0, 15, 0.5, s.annualRevGrowth, t.tip_annual_rev_growth) }}
|
||||||
|
{{ slider('annualOpexGrowth', t.sl_annual_opex_growth|default('Annual OpEx Growth (%)'), 0, 10, 0.5, s.annualOpexGrowth, t.tip_annual_opex_growth|default('Annual cost inflation for utilities, staff, and insurance. 2% matches Western European CPI. Without this, Year 4–5 EBITDA is overstated.')) }}
|
||||||
|
{{ slider('hurdleRate', t.sl_hurdle_rate|default('Hurdle Rate (%)'), 5, 35, 1, s.hurdleRate, t.tip_hurdle_rate|default('Minimum equity return required. NPV is positive when equity IRR exceeds this rate. 12% is typical for mid-market sports venues in Western Europe.')) }}
|
||||||
</div>
|
</div>
|
||||||
</details>
|
</details>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -298,8 +298,11 @@ class TestCalcDefaultScenario:
|
|||||||
month_rev = sum(m["totalRev"] for m in d["months"] if m["yr"] == y)
|
month_rev = sum(m["totalRev"] for m in d["months"] if m["yr"] == y)
|
||||||
assert annual["revenue"] == approx(month_rev)
|
assert annual["revenue"] == approx(month_rev)
|
||||||
|
|
||||||
def test_stab_ebitda_is_year3(self, d):
|
def test_stab_ebitda_is_exit_year(self, d):
|
||||||
assert d["stabEbitda"] == d["annuals"][2]["ebitda"]
|
# stabEbitda uses the terminal year (holdYears - 1), not hardcoded Year 3.
|
||||||
|
s = default_state()
|
||||||
|
exit_idx = min(s["holdYears"] - 1, len(d["annuals"]) - 1)
|
||||||
|
assert d["stabEbitda"] == d["annuals"][exit_idx]["ebitda"]
|
||||||
|
|
||||||
def test_exit_value(self, d):
|
def test_exit_value(self, d):
|
||||||
assert d["exitValue"] == approx(d["stabEbitda"] * 6)
|
assert d["exitValue"] == approx(d["stabEbitda"] * 6)
|
||||||
|
|||||||
Reference in New Issue
Block a user