-- Deduplicated venue dimension combining OSM courts and Playtomic tenants. -- Venues from both sources are unioned; near-duplicates (within ~100m) are -- collapsed to a single record preferring Playtomic data (richer metadata). -- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km. -- -- Playtomic venues include court counts, indoor/outdoor split, currency, and timezone. MODEL ( name foundation.dim_venues, kind FULL, cron '@daily', grain venue_id ); WITH playtomic_venues AS ( SELECT 'pt:' || v.tenant_id AS venue_id, v.tenant_id, 'playtomic' AS source, v.lat, v.lon, v.country_code, v.name, v.city, v.postcode, v.tenant_type, v.timezone, v.vat_rate, v.default_currency, -- Court counts from resources COUNT(r.resource_id) AS court_count, COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'indoor') AS indoor_court_count, COUNT(r.resource_id) FILTER (WHERE r.resource_type = 'outdoor') AS outdoor_court_count, v.extracted_date FROM staging.stg_playtomic_venues v LEFT JOIN staging.stg_playtomic_resources r ON v.tenant_id = r.tenant_id AND r.is_active = TRUE WHERE v.country_code IS NOT NULL GROUP BY v.tenant_id, v.lat, v.lon, v.country_code, v.name, v.city, v.postcode, v.tenant_type, v.timezone, v.vat_rate, v.default_currency, v.extracted_date ), osm_venues AS ( SELECT 'osm:' || osm_id::TEXT AS venue_id, NULL AS tenant_id, 'osm' AS source, lat, lon, country_code, name, city, postcode, NULL AS tenant_type, NULL AS timezone, NULL AS vat_rate, NULL AS default_currency, NULL AS court_count, NULL AS indoor_court_count, NULL AS outdoor_court_count, extracted_date FROM staging.stg_padel_courts WHERE country_code IS NOT NULL ), all_venues AS ( SELECT * FROM playtomic_venues UNION ALL SELECT * FROM osm_venues ), ranked AS ( SELECT *, CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank FROM all_venues ) -- Deduplicate on ~111m grid cells, preferring Playtomic SELECT MIN(venue_id) OVER ( PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT ORDER BY source_rank ) AS venue_id, tenant_id, country_code, lat, lon, COALESCE( MAX(CASE WHEN source = 'playtomic' THEN name END) OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT), name ) AS name, COALESCE(city, '') AS city, postcode, source, tenant_type, timezone, vat_rate, default_currency, court_count, indoor_court_count, outdoor_court_count, -- Conformed city key: enables deterministic joins to dim_cities / venue_pricing_benchmarks LOWER(REGEXP_REPLACE(LOWER(COALESCE(city, '')), '[^a-z0-9]+', '-')) AS city_slug, extracted_date FROM ranked QUALIFY ROW_NUMBER() OVER ( PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT ORDER BY source_rank ) = 1