fix(content): slugify transliteration + article links + country overview ranking

- Add @slugify SQLMesh macro (STRIP_ACCENTS + ß→ss) replacing broken inline REGEXP_REPLACE that dropped non-ASCII chars (Düsseldorf → d-sseldorf) - Apply @slugify to dim_venues, dim_cities, dim_locations - Fix Python slugify() to pre-replace ß→ss before NFKD normalization - Add language prefix to B2B article market links (/markets/germany → /de/markets/germany) - Change country overview top-5 ranking: venue count (not raw market_score) for top cities, population for top opportunity cities Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 10:46:30 +01:00
parent 0fc0ca66b1
commit a00c8727d7
8 changed files with 33 additions and 14 deletions
--- a/data/content/articles/padel-hall-location-guide-en.md
+++ b/data/content/articles/padel-hall-location-guide-en.md
@@ -176,7 +176,7 @@ Before committing to a site search in any city, calibrate where it sits on this

 Padelnomics tracks venue density, booking platform utilisation, and demographic fit for cities across Europe. Use the country market overview to read the maturity stage of your target city before evaluating individual sites.

-[→ View market data by country](/markets/germany)
+[→ View market data by country](/en/markets/germany)

 ---

--- a/data/content/articles/padel-standort-analyse-de.md
+++ b/data/content/articles/padel-standort-analyse-de.md
@@ -166,7 +166,7 @@ Bevor Sie in einer Stadt konkret nach Objekten suchen, sollten Sie deren Marktre

 Padelnomics erfasst Anlagendichte, Buchungsplattform-Auslastung und demografische Kennzahlen für Städte europaweit. Den aktuellen Marktüberblick für Ihr Zielland finden Sie hier:

-[→ Marktüberblick nach Land](/markets/germany)
+[→ Marktüberblick nach Land](/de/markets/germany)

 ---

--- a/transform/sqlmesh_padelnomics/macros/init.py
+++ b/transform/sqlmesh_padelnomics/macros/init.py
@@ -82,6 +82,21 @@ def normalize_eurostat_nuts(evaluator, code_col) -> str:
    )


+@macro()
+def slugify(evaluator, col) -> str:
+    """URL-safe slug: lowercase → ß→ss → strip accents → non-alnum to dashes → trim.
+
+    Usage in SQL: @slugify(city) AS city_slug
+    """
+    c = str(col)
+    return (
+        f"TRIM(REGEXP_REPLACE("
+        f"LOWER(STRIP_ACCENTS(REPLACE(LOWER({c}), 'ß', 'ss'))), "
+        f"'[^a-z0-9]+', '-'"
+        f"), '-')"
+    )
+
+
@macro()
 def infer_country_from_coords(evaluator, lat_col, lon_col) -> str:
    """Infer ISO country code from lat/lon using bounding boxes for 8 European markets.
--- a/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql
+++ b/transform/sqlmesh_padelnomics/models/foundation/dim_cities.sql
@@ -33,8 +33,7 @@ venue_cities AS (
  SELECT
    country_code,
    city                                                     AS city_name,
-    -- Lowercase before regex so uppercase letters aren't stripped to '-'
-    LOWER(REGEXP_REPLACE(LOWER(city), '[^a-z0-9]+', '-'))    AS city_slug,
+    @slugify(city)                                           AS city_slug,
    COUNT(*)                                                 AS padel_venue_count,
    AVG(lat)                                                 AS centroid_lat,
    AVG(lon)                                                 AS centroid_lon
--- a/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql
+++ b/transform/sqlmesh_padelnomics/models/foundation/dim_locations.sql
@@ -38,7 +38,7 @@ locations AS (
    geoname_id,
    city_name                                                            AS location_name,
    -- URL-safe location slug
-    LOWER(REGEXP_REPLACE(LOWER(city_name), '[^a-z0-9]+', '-'))          AS location_slug,
+    @slugify(city_name)                                                 AS location_slug,
    country_code,
    lat,
    lon,
--- a/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql
+++ b/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql
@@ -99,7 +99,7 @@ SELECT
  indoor_court_count,
  outdoor_court_count,
  -- Conformed city key: enables deterministic joins to dim_cities / venue_pricing_benchmarks
-  LOWER(REGEXP_REPLACE(LOWER(COALESCE(city, '')), '[^a-z0-9]+', '-')) AS city_slug,
+  @slugify(COALESCE(city, '')) AS city_slug,
  extracted_date
 FROM ranked
 QUALIFY ROW_NUMBER() OVER (
--- a/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql
+++ b/transform/sqlmesh_padelnomics/models/serving/pseo_country_overview.sql
@@ -20,15 +20,15 @@ SELECT
  SUM(padel_venue_count)                                         AS total_venues,
  ROUND(AVG(market_score), 1)                                    AS avg_market_score,
  MAX(market_score)                                              AS top_city_market_score,
-  -- Top 5 cities by market score for internal linking (DuckDB list slice syntax)
-  LIST(city_slug ORDER BY market_score DESC NULLS LAST)[1:5]     AS top_city_slugs,
-  LIST(city_name ORDER BY market_score DESC NULLS LAST)[1:5]     AS top_city_names,
+  -- Top 5 cities by venue count (prominence), then score for internal linking
+  LIST(city_slug ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5]     AS top_city_slugs,
+  LIST(city_name ORDER BY padel_venue_count DESC, market_score DESC NULLS LAST)[1:5]     AS top_city_names,
  -- Opportunity score aggregates (NULL-safe: cities without geoname_id match excluded from AVG)
  ROUND(AVG(opportunity_score), 1)                               AS avg_opportunity_score,
  MAX(opportunity_score)                                         AS top_opportunity_score,
-  -- Top 5 cities by opportunity score (may differ from top market score cities)
-  LIST(city_slug ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs,
-  LIST(city_name ORDER BY opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_names,
+  -- Top 5 opportunity cities by population (prominence), then opportunity score
+  LIST(city_slug ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_slugs,
+  LIST(city_name ORDER BY population DESC, opportunity_score DESC NULLS LAST)[1:5] AS top_opportunity_names,
  -- Pricing medians across cities (NULL when no Playtomic coverage in country)
  ROUND(MEDIAN(median_hourly_rate), 0)                           AS median_hourly_rate,
  ROUND(MEDIAN(median_peak_rate), 0)                             AS median_peak_rate,
--- a/web/src/padelnomics/core.py
+++ b/web/src/padelnomics/core.py
@@ -740,9 +740,14 @@ async def get_all_paddle_prices() -> dict[str, str]:


 def slugify(text: str, max_length_chars: int = 80) -> str:
-    """Convert text to URL-safe slug."""
+    """Convert text to URL-safe slug.
+
+    Pre-replaces ß→ss before NFKD normalization so output matches the SQL
+    @slugify macro (which uses DuckDB STRIP_ACCENTS + REPLACE).
+    """
+    text = text.lower().replace("ß", "ss")
    text = unicodedata.normalize("NFKD", text).encode("ascii", "ignore").decode()
-    text = re.sub(r"[^\w\s-]", "", text.lower())
+    text = re.sub(r"[^\w\s-]", "", text)
    text = re.sub(r"[-\s]+", "-", text).strip("-")
    return text[:max_length_chars]