refactor: flatten padelnomics/padelnomics/ → repo root
git mv all tracked files from the nested padelnomics/ workspace directory to the git repo root. Merged .gitignore files. No code changes — pure path rename. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
-- City dimension: canonical city records with population and venue count.
|
||||
-- Built from Eurostat Urban Audit codes joined to venue locations.
|
||||
-- Cities without Eurostat coverage (US, non-EU) are derived from venue clusters.
|
||||
|
||||
MODEL (
|
||||
name padelnomics.dim_cities,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain city_code
|
||||
);
|
||||
|
||||
WITH -- Eurostat cities: latest population per city code
|
||||
eurostat_cities AS (
|
||||
SELECT
|
||||
city_code,
|
||||
country_code,
|
||||
population,
|
||||
ref_year
|
||||
FROM padelnomics.stg_population
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY city_code ORDER BY ref_year DESC) = 1
|
||||
),
|
||||
-- Venue counts per (country_code, city) from dim_venues
|
||||
venue_counts AS (
|
||||
SELECT
|
||||
country_code,
|
||||
city,
|
||||
COUNT(*) AS venue_count,
|
||||
AVG(lat) AS centroid_lat,
|
||||
AVG(lon) AS centroid_lon
|
||||
FROM padelnomics.dim_venues
|
||||
WHERE city IS NOT NULL AND city != ''
|
||||
GROUP BY country_code, city
|
||||
),
|
||||
-- Eurostat city label mapping to canonical city names
|
||||
-- (Eurostat uses codes like DE001C → Berlin; we keep both)
|
||||
eurostat_labels AS (
|
||||
SELECT DISTINCT
|
||||
city_code,
|
||||
country_code,
|
||||
-- Derive a slug-friendly city name from the code as fallback
|
||||
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
|
||||
FROM eurostat_cities
|
||||
)
|
||||
SELECT
|
||||
ec.city_code,
|
||||
ec.country_code,
|
||||
COALESCE(vc.city, ec.city_code) AS city_name,
|
||||
LOWER(REGEXP_REPLACE(
|
||||
COALESCE(vc.city, ec.city_slug_raw), '[^a-z0-9]+', '-'
|
||||
)) AS city_slug,
|
||||
COALESCE(vc.centroid_lat, 0::DOUBLE) AS lat,
|
||||
COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon,
|
||||
ec.population,
|
||||
ec.ref_year AS population_year,
|
||||
COALESCE(vc.venue_count, 0) AS padel_venue_count
|
||||
FROM eurostat_cities ec
|
||||
LEFT JOIN venue_counts vc
|
||||
ON ec.country_code = vc.country_code
|
||||
AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'
|
||||
@@ -0,0 +1,75 @@
|
||||
-- Deduplicated venue dimension combining OSM courts and Playtomic tenants.
|
||||
-- Venues from both sources are unioned; near-duplicates (within ~100m) are
|
||||
-- collapsed to a single record preferring Playtomic data (richer metadata).
|
||||
-- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km.
|
||||
|
||||
MODEL (
|
||||
name padelnomics.dim_venues,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
grain venue_id
|
||||
);
|
||||
|
||||
WITH all_venues AS (
|
||||
SELECT
|
||||
'osm:' || osm_id::TEXT AS venue_id,
|
||||
source,
|
||||
lat,
|
||||
lon,
|
||||
country_code,
|
||||
name,
|
||||
city,
|
||||
postcode,
|
||||
NULL AS tenant_type,
|
||||
extracted_date
|
||||
FROM padelnomics.stg_padel_courts
|
||||
WHERE country_code IS NOT NULL
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'pt:' || tenant_id AS venue_id,
|
||||
source,
|
||||
lat,
|
||||
lon,
|
||||
country_code,
|
||||
name,
|
||||
city,
|
||||
postcode,
|
||||
tenant_type,
|
||||
extracted_date
|
||||
FROM padelnomics.stg_playtomic_venues
|
||||
WHERE country_code IS NOT NULL
|
||||
),
|
||||
-- Rank venues so Playtomic records win ties in proximity dedup
|
||||
ranked AS (
|
||||
SELECT *,
|
||||
CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank
|
||||
FROM all_venues
|
||||
)
|
||||
-- Note: full proximity dedup (haversine clustering) is expensive in SQL.
|
||||
-- For now, deduplicate on exact (country_code, ROUND(lat,3), ROUND(lon,3))
|
||||
-- — ≈111m grid cells. Refine with spatial index if volumes grow.
|
||||
SELECT
|
||||
MIN(venue_id) OVER (
|
||||
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
|
||||
ORDER BY source_rank
|
||||
) AS venue_id,
|
||||
country_code,
|
||||
lat,
|
||||
lon,
|
||||
COALESCE(
|
||||
MAX(CASE WHEN source = 'playtomic' THEN name END)
|
||||
OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT),
|
||||
name
|
||||
) AS name,
|
||||
COALESCE(city, '') AS city,
|
||||
postcode,
|
||||
source,
|
||||
tenant_type,
|
||||
extracted_date
|
||||
FROM ranked
|
||||
QUALIFY ROW_NUMBER() OVER (
|
||||
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
|
||||
ORDER BY source_rank
|
||||
) = 1
|
||||
Reference in New Issue
Block a user