refactor: flatten padelnomics/padelnomics/ → repo root

git mv all tracked files from the nested padelnomics/ workspace
directory to the git repo root. Merged .gitignore files.
No code changes — pure path rename.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-22 00:44:40 +01:00
parent 5e471567b9
commit 4ae00b35d1
235 changed files with 45 additions and 42 deletions

View File

@@ -0,0 +1,59 @@
-- City dimension: canonical city records with population and venue count.
-- Built from Eurostat Urban Audit codes joined to venue locations.
-- Cities without Eurostat coverage (US, non-EU) are derived from venue clusters.
MODEL (
name padelnomics.dim_cities,
kind FULL,
cron '@daily',
grain city_code
);
WITH -- Eurostat cities: latest population per city code
eurostat_cities AS (
SELECT
city_code,
country_code,
population,
ref_year
FROM padelnomics.stg_population
QUALIFY ROW_NUMBER() OVER (PARTITION BY city_code ORDER BY ref_year DESC) = 1
),
-- Venue counts per (country_code, city) from dim_venues
venue_counts AS (
SELECT
country_code,
city,
COUNT(*) AS venue_count,
AVG(lat) AS centroid_lat,
AVG(lon) AS centroid_lon
FROM padelnomics.dim_venues
WHERE city IS NOT NULL AND city != ''
GROUP BY country_code, city
),
-- Eurostat city label mapping to canonical city names
-- (Eurostat uses codes like DE001C → Berlin; we keep both)
eurostat_labels AS (
SELECT DISTINCT
city_code,
country_code,
-- Derive a slug-friendly city name from the code as fallback
LOWER(REPLACE(city_code, country_code, '')) AS city_slug_raw
FROM eurostat_cities
)
SELECT
ec.city_code,
ec.country_code,
COALESCE(vc.city, ec.city_code) AS city_name,
LOWER(REGEXP_REPLACE(
COALESCE(vc.city, ec.city_slug_raw), '[^a-z0-9]+', '-'
)) AS city_slug,
COALESCE(vc.centroid_lat, 0::DOUBLE) AS lat,
COALESCE(vc.centroid_lon, 0::DOUBLE) AS lon,
ec.population,
ec.ref_year AS population_year,
COALESCE(vc.venue_count, 0) AS padel_venue_count
FROM eurostat_cities ec
LEFT JOIN venue_counts vc
ON ec.country_code = vc.country_code
AND LOWER(TRIM(vc.city)) LIKE '%' || LOWER(LEFT(ec.city_code, 2)) || '%'

View File

@@ -0,0 +1,75 @@
-- Deduplicated venue dimension combining OSM courts and Playtomic tenants.
-- Venues from both sources are unioned; near-duplicates (within ~100m) are
-- collapsed to a single record preferring Playtomic data (richer metadata).
-- Proximity dedup uses haversine approximation: 1 degree lat ≈ 111 km.
MODEL (
name padelnomics.dim_venues,
kind FULL,
cron '@daily',
grain venue_id
);
WITH all_venues AS (
SELECT
'osm:' || osm_id::TEXT AS venue_id,
source,
lat,
lon,
country_code,
name,
city,
postcode,
NULL AS tenant_type,
extracted_date
FROM padelnomics.stg_padel_courts
WHERE country_code IS NOT NULL
UNION ALL
SELECT
'pt:' || tenant_id AS venue_id,
source,
lat,
lon,
country_code,
name,
city,
postcode,
tenant_type,
extracted_date
FROM padelnomics.stg_playtomic_venues
WHERE country_code IS NOT NULL
),
-- Rank venues so Playtomic records win ties in proximity dedup
ranked AS (
SELECT *,
CASE source WHEN 'playtomic' THEN 1 ELSE 2 END AS source_rank
FROM all_venues
)
-- Note: full proximity dedup (haversine clustering) is expensive in SQL.
-- For now, deduplicate on exact (country_code, ROUND(lat,3), ROUND(lon,3))
-- — ≈111m grid cells. Refine with spatial index if volumes grow.
SELECT
MIN(venue_id) OVER (
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
ORDER BY source_rank
) AS venue_id,
country_code,
lat,
lon,
COALESCE(
MAX(CASE WHEN source = 'playtomic' THEN name END)
OVER (PARTITION BY country_code, ROUND(lat,3)::TEXT, ROUND(lon,3)::TEXT),
name
) AS name,
COALESCE(city, '') AS city,
postcode,
source,
tenant_type,
extracted_date
FROM ranked
QUALIFY ROW_NUMBER() OVER (
PARTITION BY country_code, ROUND(lat, 3)::TEXT, ROUND(lon, 3)::TEXT
ORDER BY source_rank
) = 1

View File

@@ -0,0 +1,64 @@
-- Raw Eurostat Urban Audit city population (dataset: urb_cpop1).
-- Source: data/landing/eurostat/{year}/{month}/urb_cpop1.json.gz
-- Format: Eurostat JSON Statistics API (dimensions + flat value array).
--
-- The Eurostat JSON format encodes dimensions separately from values:
-- dimension.cities.category.index → maps city code to flat array position
-- dimension.time.category.index → maps year to flat array position
-- values → flat object {position_str: value}
--
-- This model stores one row per (city_code, year) by computing positions.
-- Reference: https://wikis.ec.europa.eu/display/EUROSTATHELP/API+Statistics
MODEL (
name padelnomics.raw_eurostat_population,
kind FULL,
cron '@daily',
grain (city_code, ref_year)
);
WITH raw AS (
SELECT
raw_json,
filename
FROM read_json(
@LANDING_DIR || '/eurostat/*/*/urb_cpop1.json.gz',
format = 'auto',
filename = true,
columns = { 'raw_json': 'JSON' }
)
),
-- Unnest city codes with their ordinal positions
cities AS (
SELECT
city_code,
(city_pos)::INTEGER AS city_pos,
filename,
raw_json,
(json_extract(raw_json, '$.size[1]'))::INTEGER AS n_times
FROM raw,
LATERAL (
SELECT key AS city_code, value::INTEGER AS city_pos
FROM json_each(json_extract(raw_json, '$.dimension.cities.category.index'))
)
),
-- Unnest time (year) values with positions
times AS (
SELECT key AS ref_year, value::INTEGER AS time_pos
FROM (SELECT raw_json FROM raw LIMIT 1),
LATERAL (
SELECT key, value
FROM json_each(json_extract(raw_json, '$.dimension.time.category.index'))
)
)
SELECT
c.city_code,
t.ref_year,
TRY_CAST(
json_extract(c.raw_json, '$.' || (c.city_pos * c.n_times + t.time_pos)::TEXT)
AS DOUBLE
) AS population,
c.filename AS source_file,
CURRENT_DATE AS extracted_date
FROM cities c
CROSS JOIN times t

View File

@@ -0,0 +1,42 @@
-- Raw OpenStreetMap padel courts from Overpass API landing files.
-- Source: data/landing/overpass/{year}/{month}/courts.json.gz
-- Format: {"version": ..., "elements": [{type, id, lat, lon, tags}, ...]}
--
-- Only node elements carry direct lat/lon. Way and relation elements need
-- centroid calculation from member nodes (not done here — filter to node only
-- for the initial raw layer; ways/relations retained as-is for future enrichment).
MODEL (
name padelnomics.raw_overpass_courts,
kind FULL,
cron '@daily',
grain (osm_type, osm_id)
);
SELECT
elem ->> 'type' AS osm_type,
(elem ->> 'id')::BIGINT AS osm_id,
TRY_CAST(elem ->> 'lat' AS DOUBLE) AS lat,
TRY_CAST(elem ->> 'lon' AS DOUBLE) AS lon,
elem -> 'tags' ->> 'name' AS name,
elem -> 'tags' ->> 'sport' AS sport,
elem -> 'tags' ->> 'leisure' AS leisure,
elem -> 'tags' ->> 'addr:country' AS country_code,
elem -> 'tags' ->> 'addr:city' AS city_tag,
elem -> 'tags' ->> 'addr:postcode' AS postcode,
elem -> 'tags' ->> 'operator' AS operator_name,
elem -> 'tags' ->> 'opening_hours' AS opening_hours,
elem -> 'tags' ->> 'fee' AS fee,
filename AS source_file,
CURRENT_DATE AS extracted_date
FROM (
SELECT
UNNEST(elements) AS elem,
filename
FROM read_json(
@LANDING_DIR || '/overpass/*/*/courts.json.gz',
format = 'auto',
filename = true
)
)
WHERE (elem ->> 'type') IS NOT NULL

View File

@@ -0,0 +1,35 @@
-- Raw Playtomic venue (tenant) listings from unauthenticated tenant search API.
-- Source: data/landing/playtomic/{year}/{month}/tenants.json.gz
-- Format: {"tenants": [{tenant_id, name, address, sport_ids, ...}], "count": N}
MODEL (
name padelnomics.raw_playtomic_tenants,
kind FULL,
cron '@daily',
grain tenant_id
);
SELECT
tenant ->> 'tenant_id' AS tenant_id,
tenant ->> 'tenant_name' AS tenant_name,
tenant -> 'address' ->> 'street' AS street,
tenant -> 'address' ->> 'city' AS city,
tenant -> 'address' ->> 'postal_code' AS postal_code,
tenant -> 'address' ->> 'country_code' AS country_code,
TRY_CAST(tenant -> 'address' ->> 'coordinate_lat' AS DOUBLE) AS lat,
TRY_CAST(tenant -> 'address' ->> 'coordinate_lon' AS DOUBLE) AS lon,
tenant ->> 'sport_ids' AS sport_ids_raw,
tenant ->> 'tenant_type' AS tenant_type,
filename AS source_file,
CURRENT_DATE AS extracted_date
FROM (
SELECT
UNNEST(tenants) AS tenant,
filename
FROM read_json(
@LANDING_DIR || '/playtomic/*/*/tenants.json.gz',
format = 'auto',
filename = true
)
)
WHERE (tenant ->> 'tenant_id') IS NOT NULL

View File

@@ -0,0 +1,68 @@
-- One Big Table: per-city padel market intelligence.
-- Consumed by: SEO article generation, planner city-select pre-fill, API endpoints.
--
-- Market score (0100) is a simple composite:
-- 40% population (log-scaled, city > 500K = max)
-- 40% venue density (courts per 100K residents)
-- 20% data confidence (completeness of both population + venue data)
MODEL (
name padelnomics.city_market_profile,
kind FULL,
cron '@daily',
grain city_slug
);
WITH base AS (
SELECT
c.city_code,
c.country_code,
c.city_name,
c.city_slug,
c.lat,
c.lon,
c.population,
c.population_year,
c.padel_venue_count,
-- Venue density: padel venues per 100K residents
CASE WHEN c.population > 0
THEN ROUND(c.padel_venue_count::DOUBLE / c.population * 100000, 2)
ELSE NULL
END AS venues_per_100k,
-- Data confidence: 1.0 if both population and venues are present
CASE
WHEN c.population > 0 AND c.padel_venue_count > 0 THEN 1.0
WHEN c.population > 0 OR c.padel_venue_count > 0 THEN 0.5
ELSE 0.0
END AS data_confidence
FROM padelnomics.dim_cities c
WHERE c.padel_venue_count > 0
),
scored AS (
SELECT *,
ROUND(
-- Population component (log scale, 500K+ city → 40 pts)
40.0 * LEAST(1.0, LN(GREATEST(population, 1)) / LN(500000))
-- Density component (5 courts/100K → 40 pts)
+ 40.0 * LEAST(1.0, COALESCE(venues_per_100k, 0) / 5.0)
-- Confidence component
+ 20.0 * data_confidence
, 1) AS market_score
FROM base
)
SELECT
city_code,
country_code,
city_name,
city_slug,
lat,
lon,
population,
population_year,
padel_venue_count,
venues_per_100k,
data_confidence,
market_score,
CURRENT_DATE AS refreshed_date
FROM scored
ORDER BY market_score DESC

View File

@@ -0,0 +1,72 @@
-- Per-city planner defaults for the financial calculator.
-- When a user selects a city in the planner, these values pre-fill the inputs.
-- Consumed by: padelnomics.planner.routes — city_defaults(city_slug) lookup.
--
-- Values are derived from market data where available, otherwise fall back to
-- country-level medians, then to global fallbacks from market research report.
--
-- Units are explicit in column names (EUR, %, h). All monetary values in EUR.
MODEL (
name padelnomics.planner_defaults,
kind FULL,
cron '@daily',
grain city_slug
);
WITH country_medians AS (
-- Country-level fallback values from market research (hardcoded until we
-- have richer pricing data from Playtomic or direct scraping).
SELECT * FROM (VALUES
-- (country_code, hourly_rate_peak_eur, monthly_rent_eur_sqm, capex_court_eur,
-- avg_utilisation_pct, courts_typical)
('DE', 22.0, 14.0, 42000.0, 0.55, 4),
('ES', 16.0, 9.0, 32000.0, 0.62, 6),
('GB', 24.0, 18.0, 48000.0, 0.52, 4),
('FR', 18.0, 12.0, 36000.0, 0.58, 5),
('IT', 15.0, 10.0, 30000.0, 0.60, 6),
('PT', 12.0, 8.0, 28000.0, 0.65, 6),
('AT', 20.0, 13.0, 40000.0, 0.54, 4),
('CH', 28.0, 22.0, 55000.0, 0.50, 4),
('NL', 20.0, 15.0, 40000.0, 0.56, 4),
('BE', 18.0, 13.0, 36000.0, 0.57, 4),
('SE', 22.0, 14.0, 42000.0, 0.50, 4),
('US', 20.0, 12.0, 38000.0, 0.58, 6)
) AS t(country_code, hourly_rate_peak_eur, monthly_rent_eur_sqm, capex_court_eur,
avg_utilisation_pct, courts_typical)
),
city_venue_density AS (
SELECT
city_slug,
country_code,
padel_venue_count,
population,
venues_per_100k,
market_score
FROM padelnomics.city_market_profile
)
SELECT
cvd.city_slug,
cvd.country_code,
cvd.padel_venue_count,
cvd.population,
cvd.market_score,
-- Hourly rate: adjust country median by market maturity
-- (high-density markets → slightly lower rates from competition)
ROUND(
cm.hourly_rate_peak_eur
* CASE
WHEN cvd.venues_per_100k > 4 THEN 0.90 -- very competitive
WHEN cvd.venues_per_100k > 2 THEN 0.95 -- competitive
WHEN cvd.venues_per_100k < 0.5 THEN 1.10 -- underserved premium
ELSE 1.0
END
, 2) AS hourly_rate_peak_eur,
ROUND(cm.hourly_rate_peak_eur * 0.75, 2) AS hourly_rate_offpeak_eur,
cm.monthly_rent_eur_sqm,
cm.capex_court_eur,
cm.avg_utilisation_pct,
cm.courts_typical,
CURRENT_DATE AS refreshed_date
FROM city_venue_density cvd
LEFT JOIN country_medians cm ON cvd.country_code = cm.country_code

View File

@@ -0,0 +1,62 @@
-- Cleaned OSM padel courts — node elements only (direct lat/lon available).
-- Deduplicates on osm_id, keeps most recently extracted record.
-- Country code resolved from addr:country tag or approximated by lat/lon bbox.
MODEL (
name padelnomics.stg_padel_courts,
kind FULL,
cron '@daily',
grain osm_id
);
WITH deduped AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
FROM padelnomics.raw_overpass_courts
WHERE osm_type = 'node'
AND lat IS NOT NULL
AND lon IS NOT NULL
AND lat BETWEEN -90 AND 90
AND lon BETWEEN -180 AND 180
),
-- Approximate country from lat/lon when addr:country tag is absent
with_country AS (
SELECT
osm_id,
lat,
lon,
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
ELSE NULL
END) AS country_code,
NULLIF(TRIM(name), '') AS name,
NULLIF(TRIM(city_tag), '') AS city_tag,
postcode,
operator_name,
opening_hours,
fee,
extracted_date
FROM deduped
WHERE rn = 1
)
SELECT
osm_id,
'osm' AS source,
lat,
lon,
country_code,
name,
city_tag AS city,
postcode,
operator_name,
opening_hours,
CASE LOWER(fee) WHEN 'yes' THEN TRUE WHEN 'no' THEN FALSE ELSE NULL END AS is_paid,
extracted_date
FROM with_country

View File

@@ -0,0 +1,32 @@
-- Cleaned Playtomic padel venue records. One row per venue, deduped on tenant_id.
MODEL (
name padelnomics.stg_playtomic_venues,
kind FULL,
cron '@daily',
grain tenant_id
);
WITH deduped AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY tenant_id ORDER BY extracted_date DESC) AS rn
FROM padelnomics.raw_playtomic_tenants
WHERE tenant_id IS NOT NULL
AND lat IS NOT NULL
AND lon IS NOT NULL
AND lat BETWEEN -90 AND 90
AND lon BETWEEN -180 AND 180
)
SELECT
tenant_id,
'playtomic' AS source,
lat,
lon,
UPPER(country_code) AS country_code,
NULLIF(TRIM(tenant_name), '') AS name,
NULLIF(TRIM(city), '') AS city,
postal_code AS postcode,
tenant_type,
extracted_date
FROM deduped
WHERE rn = 1

View File

@@ -0,0 +1,21 @@
-- Eurostat Urban Audit city population, cleaned and typed.
-- Eurostat city codes follow the NUTS Urban Audit convention (e.g. DE001C).
-- Country code is the first two characters of the city code.
MODEL (
name padelnomics.stg_population,
kind FULL,
cron '@daily',
grain (city_code, ref_year)
);
SELECT
UPPER(city_code) AS city_code,
UPPER(LEFT(city_code, 2)) AS country_code,
ref_year::INTEGER AS ref_year,
population::BIGINT AS population,
extracted_date
FROM padelnomics.raw_eurostat_population
WHERE population IS NOT NULL
AND population > 0
AND ref_year ~ '^\d{4}$'