import os from sqlmesh import macro @macro() def padelnomics_glob(evaluator) -> str: """Return a quoted glob path for all padelnomics CSV gz files under LANDING_DIR. Used in raw models: SELECT * FROM read_csv(@padelnomics_glob(), ...) The LANDING_DIR variable is read from the SQLMesh config variables block first, then falls back to the LANDING_DIR environment variable, then to 'data/landing'. """ landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing") return f"'{landing_dir}/padelnomics/**/*.csv.gz'" # ── Country code helpers ───────────────────────────────────────────────────── # Shared lookup used by dim_cities and dim_locations. _COUNTRY_NAMES = { "DE": "Germany", "ES": "Spain", "GB": "United Kingdom", "FR": "France", "IT": "Italy", "PT": "Portugal", "AT": "Austria", "CH": "Switzerland", "NL": "Netherlands", "BE": "Belgium", "SE": "Sweden", "NO": "Norway", "DK": "Denmark", "FI": "Finland", "US": "United States", "AR": "Argentina", "MX": "Mexico", "AE": "UAE", "AU": "Australia", "IE": "Ireland", } def _country_case(col: str) -> str: """Build a CASE expression mapping ISO 3166-1 alpha-2 → English name.""" whens = "\n ".join( f"WHEN '{code}' THEN '{name}'" for code, name in _COUNTRY_NAMES.items() ) return f"CASE {col}\n {whens}\n ELSE {col}\n END" @macro() def country_name(evaluator, code_col) -> str: """CASE expression: country code → English name. Usage in SQL: @country_name(vc.country_code) AS country_name_en """ return _country_case(str(code_col)) @macro() def country_slug(evaluator, code_col) -> str: """CASE expression: country code → URL-safe slug (lowercased, spaces → dashes). Usage in SQL: @country_slug(vc.country_code) AS country_slug """ return f"LOWER(REGEXP_REPLACE({_country_case(str(code_col))}, '[^a-zA-Z0-9]+', '-'))" @macro() def normalize_eurostat_country(evaluator, code_col) -> str: """Normalize Eurostat country codes to ISO 3166-1 alpha-2: EL→GR, UK→GB. Usage in SQL: @normalize_eurostat_country(geo_code) AS country_code """ col = str(code_col) return f"CASE {col} WHEN 'EL' THEN 'GR' WHEN 'UK' THEN 'GB' ELSE {col} END" @macro() def normalize_eurostat_nuts(evaluator, code_col) -> str: """Normalize NUTS code prefix: EL→GR, UK→GB, preserving the suffix. Usage in SQL: @normalize_eurostat_nuts(geo_code) AS nuts_code """ col = str(code_col) return ( f"CASE" f" WHEN {col} LIKE 'EL%' THEN 'GR' || SUBSTR({col}, 3)" f" WHEN {col} LIKE 'UK%' THEN 'GB' || SUBSTR({col}, 3)" f" ELSE {col}" f" END" ) @macro() def slugify(evaluator, col) -> str: """URL-safe slug: lowercase → ß→ss → strip accents → non-alnum to dashes → trim. Usage in SQL: @slugify(city) AS city_slug """ c = str(col) return ( f"TRIM(REGEXP_REPLACE(" f"LOWER(STRIP_ACCENTS(REPLACE(LOWER({c}), 'ß', 'ss'))), " f"'[^a-z0-9]+', '-'" f"), '-')" ) @macro() def infer_country_from_coords(evaluator, lat_col, lon_col) -> str: """Infer ISO country code from lat/lon using bounding boxes for 8 European markets. Usage in SQL: COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), @infer_country_from_coords(lat, lon)) AS country_code """ lat = str(lat_col) lon = str(lon_col) return ( f"CASE" f" WHEN {lat} BETWEEN 47.27 AND 55.06 AND {lon} BETWEEN 5.87 AND 15.04 THEN 'DE'" f" WHEN {lat} BETWEEN 35.95 AND 43.79 AND {lon} BETWEEN -9.39 AND 4.33 THEN 'ES'" f" WHEN {lat} BETWEEN 49.90 AND 60.85 AND {lon} BETWEEN -8.62 AND 1.77 THEN 'GB'" f" WHEN {lat} BETWEEN 41.36 AND 51.09 AND {lon} BETWEEN -5.14 AND 9.56 THEN 'FR'" f" WHEN {lat} BETWEEN 45.46 AND 47.80 AND {lon} BETWEEN 5.96 AND 10.49 THEN 'CH'" f" WHEN {lat} BETWEEN 46.37 AND 49.02 AND {lon} BETWEEN 9.53 AND 17.16 THEN 'AT'" f" WHEN {lat} BETWEEN 36.35 AND 47.09 AND {lon} BETWEEN 6.62 AND 18.51 THEN 'IT'" f" WHEN {lat} BETWEEN 37.00 AND 42.15 AND {lon} BETWEEN -9.50 AND -6.19 THEN 'PT'" f" ELSE NULL" f" END" )