diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_venue_capacity.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_venue_capacity.sql index 049b245..484e7e4 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_venue_capacity.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_venue_capacity.sql @@ -34,6 +34,7 @@ SELECT v.tenant_id, v.country_code, v.city, + v.city_slug, cc.active_court_count, ROUND(wh.hours_open_per_week, 1) AS hours_open_per_week, ROUND(wh.avg_hours_open_per_day, 1) AS avg_hours_open_per_day, @@ -42,6 +43,6 @@ SELECT ROUND(cc.active_court_count * wh.avg_hours_open_per_day, 1) AS capacity_court_hours_per_day, -- Total bookable court-hours per week ROUND(cc.active_court_count * wh.hours_open_per_week, 1) AS capacity_court_hours_per_week -FROM staging.stg_playtomic_venues v +FROM foundation.dim_venues v JOIN court_counts cc ON v.tenant_id = cc.tenant_id JOIN weekly_hours wh ON v.tenant_id = wh.tenant_id diff --git a/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql b/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql index 9b69982..484e3b6 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/dim_venues.sql @@ -98,6 +98,8 @@ SELECT court_count, indoor_court_count, outdoor_court_count, + -- Conformed city key: enables deterministic joins to dim_cities / venue_pricing_benchmarks + LOWER(REGEXP_REPLACE(LOWER(COALESCE(city, '')), '[^a-z0-9]+', '-')) AS city_slug, extracted_date FROM ranked QUALIFY ROW_NUMBER() OVER ( diff --git a/transform/sqlmesh_padelnomics/models/foundation/fct_daily_availability.sql b/transform/sqlmesh_padelnomics/models/foundation/fct_daily_availability.sql index 5e908d0..74b8b8a 100644 --- a/transform/sqlmesh_padelnomics/models/foundation/fct_daily_availability.sql +++ b/transform/sqlmesh_padelnomics/models/foundation/fct_daily_availability.sql @@ -44,6 +44,7 @@ SELECT sa.tenant_id, cap.country_code, cap.city, + cap.city_slug, cap.active_court_count, cap.capacity_court_hours_per_day, sa.available_slot_count, diff --git a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql index 9956094..963ea8a 100644 --- a/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql +++ b/transform/sqlmesh_padelnomics/models/serving/city_market_profile.sql @@ -57,7 +57,7 @@ WITH base AS ( FROM foundation.dim_cities c LEFT JOIN serving.venue_pricing_benchmarks vpb ON c.country_code = vpb.country_code - AND LOWER(TRIM(c.city_name)) = LOWER(TRIM(vpb.city)) + AND c.city_slug = vpb.city_slug WHERE c.padel_venue_count > 0 ), scored AS ( diff --git a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql index 3a9a41b..31c7fb4 100644 --- a/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql +++ b/transform/sqlmesh_padelnomics/models/serving/planner_defaults.sql @@ -21,6 +21,7 @@ city_benchmarks AS ( SELECT country_code, city, + city_slug, median_peak_rate, median_offpeak_rate, median_occupancy_rate, @@ -128,7 +129,7 @@ SELECT FROM city_profiles cp LEFT JOIN city_benchmarks cb ON cp.country_code = cb.country_code - AND LOWER(TRIM(cp.city_name)) = LOWER(TRIM(cb.city)) + AND cp.city_slug = cb.city_slug LEFT JOIN country_benchmarks ctb ON cp.country_code = ctb.country_code LEFT JOIN hardcoded_fallbacks hf diff --git a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql index 1d0a8fb..ad306c1 100644 --- a/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql +++ b/transform/sqlmesh_padelnomics/models/serving/pseo_city_pricing.sql @@ -41,6 +41,6 @@ FROM serving.venue_pricing_benchmarks vpb -- Join city_market_profile to get the canonical city_slug and country metadata INNER JOIN serving.city_market_profile c ON vpb.country_code = c.country_code - AND LOWER(TRIM(vpb.city)) = LOWER(TRIM(c.city_name)) + AND vpb.city_slug = c.city_slug -- Only cities with enough venues for meaningful pricing statistics WHERE vpb.venue_count >= 2 diff --git a/transform/sqlmesh_padelnomics/models/serving/venue_pricing_benchmarks.sql b/transform/sqlmesh_padelnomics/models/serving/venue_pricing_benchmarks.sql index f1c62cf..a305ad4 100644 --- a/transform/sqlmesh_padelnomics/models/serving/venue_pricing_benchmarks.sql +++ b/transform/sqlmesh_padelnomics/models/serving/venue_pricing_benchmarks.sql @@ -17,6 +17,7 @@ WITH venue_stats AS ( da.tenant_id, da.country_code, da.city, + da.city_slug, da.price_currency, AVG(da.occupancy_rate) AS avg_occupancy_rate, MEDIAN(da.median_price) AS median_hourly_rate, @@ -29,12 +30,13 @@ WITH venue_stats AS ( WHERE TRY_CAST(da.snapshot_date AS DATE) >= CURRENT_DATE - INTERVAL '30 days' AND da.occupancy_rate IS NOT NULL AND da.occupancy_rate BETWEEN 0 AND 1.5 - GROUP BY da.tenant_id, da.country_code, da.city, da.price_currency + GROUP BY da.tenant_id, da.country_code, da.city, da.city_slug, da.price_currency HAVING COUNT(DISTINCT da.snapshot_date) >= 3 ) SELECT country_code, city, + city_slug, price_currency, COUNT(*) AS venue_count, -- Pricing benchmarks @@ -54,4 +56,4 @@ SELECT SUM(days_observed) AS total_venue_days_observed, CURRENT_DATE AS refreshed_date FROM venue_stats -GROUP BY country_code, city, price_currency +GROUP BY country_code, city, city_slug, price_currency diff --git a/web/src/padelnomics/admin/pipeline_routes.py b/web/src/padelnomics/admin/pipeline_routes.py index 3ce3ac8..cc27504 100644 --- a/web/src/padelnomics/admin/pipeline_routes.py +++ b/web/src/padelnomics/admin/pipeline_routes.py @@ -100,7 +100,7 @@ _DAG: dict[str, list[str]] = { "stg_regional_income", "stg_income_usa", "stg_padel_courts", "stg_tennis_courts", ], "dim_venue_capacity": [ - "stg_playtomic_venues", "stg_playtomic_resources", "stg_playtomic_opening_hours", + "dim_venues", "stg_playtomic_resources", "stg_playtomic_opening_hours", ], "fct_availability_slot": ["stg_playtomic_availability"], "fct_daily_availability": ["fct_availability_slot", "dim_venue_capacity"],