diff --git a/data/content/articles/padel-hall-location-guide-en.md b/data/content/articles/padel-hall-location-guide-en.md index 0f2f411..3a81402 100644 --- a/data/content/articles/padel-hall-location-guide-en.md +++ b/data/content/articles/padel-hall-location-guide-en.md @@ -176,7 +176,7 @@ Before committing to a site search in any city, calibrate where it sits on this Padelnomics tracks venue density, booking platform utilisation, and demographic fit for cities across Europe. Use the country market overview to read the maturity stage of your target city before evaluating individual sites. -[→ View market data by country](/markets/germany) +[→ View market data by country](/en/markets/germany) --- diff --git a/data/content/articles/padel-standort-analyse-de.md b/data/content/articles/padel-standort-analyse-de.md index c3872ce..40939a6 100644 --- a/data/content/articles/padel-standort-analyse-de.md +++ b/data/content/articles/padel-standort-analyse-de.md @@ -166,7 +166,7 @@ Bevor Sie in einer Stadt konkret nach Objekten suchen, sollten Sie deren Marktre Padelnomics erfasst Anlagendichte, Buchungsplattform-Auslastung und demografische Kennzahlen für Städte europaweit. Den aktuellen Marktüberblick für Ihr Zielland finden Sie hier: -[→ Marktüberblick nach Land](/markets/germany) +[→ Marktüberblick nach Land](/de/markets/germany) --- diff --git a/transform/sqlmesh_padelnomics/macros/__init__.py b/transform/sqlmesh_padelnomics/macros/__init__.py index 860bef5..60819a7 100644 --- a/transform/sqlmesh_padelnomics/macros/__init__.py +++ b/transform/sqlmesh_padelnomics/macros/__init__.py @@ -82,6 +82,21 @@ def normalize_eurostat_nuts(evaluator, code_col) -> str: ) +@macro() +def slugify(evaluator, col) -> str: + """URL-safe slug: lowercase → ß→ss → strip accents → non-alnum to dashes → trim. + + Usage in SQL: @slugify(city) AS city_slug + """ + c = str(col) + return ( + f"TRIM(REGEXP_REPLACE(" + f"LOWER(STRIP_ACCENTS(REPLACE(LOWER({c}), 'ß', 'ss'))), " + f"'[^a-z0-9]+', '-'" + f"), '-')" + ) + + @macro() def infer_country_from_coords(evaluator, lat_col, lon_col) -> str: """Infer ISO country code from lat/lon using bounding boxes for 8 European markets. diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql index ba9a51a..b74e7cf 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql @@ -33,8 +33,7 @@ venue_cities AS ( SELECT country_code, city AS city_name, - -- Lowercase before regex so uppercase letters aren't stripped to '-' - LOWER(REGEXP_REPLACE(LOWER(city), '[^a-z0-9]+', '-')) AS city_slug, + @slugify(city) AS city_slug, COUNT(*) AS padel_venue_count, AVG(lat) AS centroid_lat, AVG(lon) AS centroid_lon diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql index f86673a..ffc41c2 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql @@ -38,7 +38,7 @@ locations AS ( geoname_id, city_name AS location_name, -- URL-safe location slug - LOWER(REGEXP_REPLACE(LOWER(city_name), '[^a-z0-9]+', '-')) AS location_slug, + @slugify(city_name) AS location_slug, country_code, lat, lon, diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql index 484e3b6..b40cc5a 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql @@ -99,7 +99,7 @@ SELECT indoor_court_count, outdoor_court_count, -- Conformed city key: enables deterministic joins to dim_cities / venue_pricing_benchmarks - LOWER(REGEXP_REPLACE(LOWER(COALESCE(city, '')), '[^a-z0-9]+', '-')) AS city_slug, + @slugify(COALESCE(city, '')) AS city_slug, extracted_date FROM ranked QUALIFY ROW_NUMBER() OVER ( diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql index b895095..fcff9bd 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql @@ -20,15 +20,15 @@ SELECT SUM(padel_venue_count) AS total_venues, ROUND(AVG(market_score), 1) AS avg_market_score, MAX(market_score) AS top_city_market_score, - -- Top 5 cities by market score for internal linking (DuckDB list slice syntax) - LIST(city_slug ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_slugs, - LIST(city_name ORDER BY market_score DESC NULLS LAST)[1:5] AS top_city_names, + -- Top 5 cities by venue count (prominence), then score for internal linking + LIST(city_slug ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_slugs, + LIST(city_name ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5] AS top_city_names, -- Opportunity score aggregates (NULL-safe: cities without geoname_id match excluded from AVG) ROUND(AVG(opportunity_score), 1) AS avg_opportunity_score, MAX(opportunity_score) AS top_opportunity_score, - -- Top 5 cities by opportunity score (may differ from top market score cities) - LIST(city_slug ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs, - LIST(city_name ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_names, + -- Top 5 opportunity cities by population (prominence), then opportunity score + LIST(city_slug ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs, + LIST(city_name ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_names, -- Pricing medians across cities (NULL when no Playtomic coverage in country) ROUND(MEDIAN(median_hourly_rate), 0) AS median_hourly_rate, ROUND(MEDIAN(median_peak_rate), 0) AS median_peak_rate, diff --git a/web/src/padelnomics/core.py b/web/src/padelnomics/core.py index ed5c905..3bf3d3d 100644 --- a/web/src/padelnomics/core.py +++ b/web/src/padelnomics/core.py @@ -740,9 +740,14 @@ async def get_all_paddle_prices() -> dict[str, str]: def slugify(text: str, max_length_chars: int = 80) -> str: - """Convert text to URL-safe slug.""" + """Convert text to URL-safe slug. + + Pre-replaces ß→ss before NFKD normalization so output matches the SQL + @slugify macro (which uses DuckDB STRIP_ACCENTS + REPLACE). + """ + text = text.lower().replace("ß", "ss") text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode() - text = re.sub(r"[^\w\s-]", "", text.lower()) + text = re.sub(r"[^\w\s-]", "", text) text = re.sub(r"[-\s]+", "-", text).strip("-") return text[:max_length_chars]