feat(extract): expand GeoNames to cities1000 + add tennis court extractor
GeoNames: - cities15000 → cities1000 (~140K global locations, pop ≥ 1K) - Add lat/lon, admin1_code, admin2_code to output (needed for dim_locations) - Expand feature codes to include PPLA3/4/5 (Gemeinden, cantons, etc.) - Remove MIN_POPULATION=50K floor — cities1000 already pre-filters to ≥1K - Update assertions for new scale (~100K+ expected) Tennis courts: - New overpass_tennis.py extractor (sport=tennis, 180s Overpass timeout) - Registered as extract-overpass-tennis, added to EXTRACTORS list - New stg_tennis_courts.sql staging model (grain: osm_id) stg_population_geonames: add lat, lon, admin1_code, admin2_code columns Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,6 +11,7 @@ dependencies = [
|
|||||||
[project.scripts]
|
[project.scripts]
|
||||||
extract = "padelnomics_extract.all:main"
|
extract = "padelnomics_extract.all:main"
|
||||||
extract-overpass = "padelnomics_extract.overpass:main"
|
extract-overpass = "padelnomics_extract.overpass:main"
|
||||||
|
extract-overpass-tennis = "padelnomics_extract.overpass_tennis:main"
|
||||||
extract-eurostat = "padelnomics_extract.eurostat:main"
|
extract-eurostat = "padelnomics_extract.eurostat:main"
|
||||||
extract-playtomic-tenants = "padelnomics_extract.playtomic_tenants:main"
|
extract-playtomic-tenants = "padelnomics_extract.playtomic_tenants:main"
|
||||||
extract-playtomic-availability = "padelnomics_extract.playtomic_availability:main"
|
extract-playtomic-availability = "padelnomics_extract.playtomic_availability:main"
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ from .ons_uk import EXTRACTOR_NAME as ONS_UK_NAME
|
|||||||
from .ons_uk import extract as extract_ons_uk
|
from .ons_uk import extract as extract_ons_uk
|
||||||
from .overpass import EXTRACTOR_NAME as OVERPASS_NAME
|
from .overpass import EXTRACTOR_NAME as OVERPASS_NAME
|
||||||
from .overpass import extract as extract_overpass
|
from .overpass import extract as extract_overpass
|
||||||
|
from .overpass_tennis import EXTRACTOR_NAME as OVERPASS_TENNIS_NAME
|
||||||
|
from .overpass_tennis import extract as extract_overpass_tennis
|
||||||
from .playtomic_availability import EXTRACTOR_NAME as AVAILABILITY_NAME
|
from .playtomic_availability import EXTRACTOR_NAME as AVAILABILITY_NAME
|
||||||
from .playtomic_availability import extract as extract_availability
|
from .playtomic_availability import extract as extract_availability
|
||||||
from .playtomic_tenants import EXTRACTOR_NAME as TENANTS_NAME
|
from .playtomic_tenants import EXTRACTOR_NAME as TENANTS_NAME
|
||||||
@@ -26,6 +28,7 @@ logger = setup_logging("padelnomics.extract")
|
|||||||
|
|
||||||
EXTRACTORS = [
|
EXTRACTORS = [
|
||||||
(OVERPASS_NAME, extract_overpass),
|
(OVERPASS_NAME, extract_overpass),
|
||||||
|
(OVERPASS_TENNIS_NAME, extract_overpass_tennis),
|
||||||
(EUROSTAT_NAME, extract_eurostat),
|
(EUROSTAT_NAME, extract_eurostat),
|
||||||
(EUROSTAT_CITY_LABELS_NAME, extract_eurostat_city_labels),
|
(EUROSTAT_CITY_LABELS_NAME, extract_eurostat_city_labels),
|
||||||
(CENSUS_USA_NAME, extract_census_usa),
|
(CENSUS_USA_NAME, extract_census_usa),
|
||||||
|
|||||||
@@ -1,17 +1,20 @@
|
|||||||
"""GeoNames global city population extractor.
|
"""GeoNames global city population extractor.
|
||||||
|
|
||||||
Downloads the cities15000.zip bulk file (~1.5MB compressed, ~26K entries) from
|
Downloads the cities1000.zip bulk file (~30MB compressed, ~140K entries) from
|
||||||
GeoNames and filters to cities with population ≥ 50,000 and feature codes in
|
GeoNames. Includes all populated places with population ≥ 1,000 and feature codes
|
||||||
{PPLA, PPLA2, PPLC, PPL} (populated places, avoiding parks, airports, etc.).
|
in {PPLA, PPLA2, PPLA3, PPLA4, PPLA5, PPLC, PPL}.
|
||||||
|
|
||||||
Used as the global fallback for population when Eurostat/Census/ONS don't cover
|
This broader coverage (vs. the old cities15000 with ≥50K filter) supports
|
||||||
a country. Padel is expanding globally so this catches UAE, Australia, Argentina, etc.
|
Gemeinde-level market intelligence pages — small municipalities often have the
|
||||||
|
highest padel investment opportunity (white space markets).
|
||||||
|
|
||||||
Requires: GEONAMES_USERNAME env var (free registration at geonames.org)
|
Requires: GEONAMES_USERNAME env var (free registration at geonames.org)
|
||||||
|
|
||||||
Landing: {LANDING_DIR}/geonames/{year}/{month}/cities_global.json.gz
|
Landing: {LANDING_DIR}/geonames/{year}/{month}/cities_global.json.gz
|
||||||
Output: {"rows": [{"geoname_id": 2950159, "city_name": "Berlin",
|
Output: {"rows": [{"geoname_id": 2950159, "city_name": "Berlin",
|
||||||
"country_code": "DE", "population": 3644826,
|
"country_code": "DE", "population": 3644826,
|
||||||
|
"lat": 52.524, "lon": 13.411,
|
||||||
|
"admin1_code": "16", "admin2_code": "00",
|
||||||
"ref_year": 2024}], "count": N}
|
"ref_year": 2024}], "count": N}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -31,24 +34,33 @@ logger = setup_logging("padelnomics.extract.geonames")
|
|||||||
|
|
||||||
EXTRACTOR_NAME = "geonames"
|
EXTRACTOR_NAME = "geonames"
|
||||||
|
|
||||||
DOWNLOAD_URL = "https://download.geonames.org/export/dump/cities15000.zip"
|
DOWNLOAD_URL = "https://download.geonames.org/export/dump/cities1000.zip"
|
||||||
|
|
||||||
# Only populated place feature codes — excludes airports, parks, admin areas, etc.
|
# Only populated place feature codes — excludes airports, parks, admin areas, etc.
|
||||||
# PPLC = capital of a political entity
|
# PPLC = capital of a political entity
|
||||||
# PPLA = seat of a first-order administrative division
|
# PPLA = seat of a first-order administrative division
|
||||||
# PPLA2 = seat of a second-order admin division
|
# PPLA2 = seat of a second-order admin division
|
||||||
|
# PPLA3 = seat of a third-order admin division (Gemeinden, cantons, etc.)
|
||||||
|
# PPLA4 = seat of a fourth-order admin division
|
||||||
|
# PPLA5 = seat of a fifth-order admin division
|
||||||
# PPL = populated place
|
# PPL = populated place
|
||||||
VALID_FEATURE_CODES = {"PPLC", "PPLA", "PPLA2", "PPL"}
|
VALID_FEATURE_CODES = {"PPLC", "PPLA", "PPLA2", "PPLA3", "PPLA4", "PPLA5", "PPL"}
|
||||||
|
|
||||||
MIN_POPULATION = 50_000
|
# No population floor — cities1000.zip is pre-filtered to ≥ 1,000.
|
||||||
|
# Accept all to maximise Gemeinde-level coverage.
|
||||||
|
MIN_POPULATION = 0
|
||||||
|
|
||||||
# GeoNames tab-separated column layout for cities15000.txt
|
# GeoNames tab-separated column layout for cities1000.txt
|
||||||
# https://download.geonames.org/export/dump/readme.txt
|
# https://download.geonames.org/export/dump/readme.txt
|
||||||
COL_GEONAME_ID = 0
|
COL_GEONAME_ID = 0
|
||||||
COL_NAME = 1
|
COL_NAME = 1
|
||||||
COL_ASCIINAME = 2
|
COL_ASCIINAME = 2
|
||||||
COL_COUNTRY_CODE = 8
|
COL_LAT = 4
|
||||||
|
COL_LON = 5
|
||||||
COL_FEATURE_CODE = 7
|
COL_FEATURE_CODE = 7
|
||||||
|
COL_COUNTRY_CODE = 8
|
||||||
|
COL_ADMIN1_CODE = 10
|
||||||
|
COL_ADMIN2_CODE = 11
|
||||||
COL_POPULATION = 14
|
COL_POPULATION = 14
|
||||||
COL_MODIFICATION_DATE = 18
|
COL_MODIFICATION_DATE = 18
|
||||||
|
|
||||||
@@ -86,10 +98,21 @@ def _parse_cities_txt(content: bytes) -> list[dict]:
|
|||||||
country_code = parts[COL_COUNTRY_CODE].strip().upper()
|
country_code = parts[COL_COUNTRY_CODE].strip().upper()
|
||||||
if not city_name or not country_code:
|
if not city_name or not country_code:
|
||||||
continue
|
continue
|
||||||
|
try:
|
||||||
|
lat = float(parts[COL_LAT])
|
||||||
|
lon = float(parts[COL_LON])
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
continue
|
||||||
|
admin1_code = parts[COL_ADMIN1_CODE].strip() if len(parts) > COL_ADMIN1_CODE else ""
|
||||||
|
admin2_code = parts[COL_ADMIN2_CODE].strip() if len(parts) > COL_ADMIN2_CODE else ""
|
||||||
rows.append({
|
rows.append({
|
||||||
"geoname_id": geoname_id,
|
"geoname_id": geoname_id,
|
||||||
"city_name": city_name,
|
"city_name": city_name,
|
||||||
"country_code": country_code,
|
"country_code": country_code,
|
||||||
|
"lat": lat,
|
||||||
|
"lon": lon,
|
||||||
|
"admin1_code": admin1_code or None,
|
||||||
|
"admin2_code": admin2_code or None,
|
||||||
"population": population,
|
"population": population,
|
||||||
"ref_year": REF_YEAR,
|
"ref_year": REF_YEAR,
|
||||||
})
|
})
|
||||||
@@ -102,7 +125,7 @@ def extract(
|
|||||||
conn: sqlite3.Connection,
|
conn: sqlite3.Connection,
|
||||||
session: niquests.Session,
|
session: niquests.Session,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Download GeoNames cities15000.zip. Skips if already run this month."""
|
"""Download GeoNames cities1000.zip. Skips if already run this month."""
|
||||||
username = os.environ.get("GEONAMES_USERNAME", "").strip()
|
username = os.environ.get("GEONAMES_USERNAME", "").strip()
|
||||||
if not username:
|
if not username:
|
||||||
logger.warning("GEONAMES_USERNAME not set — writing empty placeholder so SQLMesh models can run")
|
logger.warning("GEONAMES_USERNAME not set — writing empty placeholder so SQLMesh models can run")
|
||||||
@@ -120,25 +143,25 @@ def extract(
|
|||||||
|
|
||||||
year, month = year_month.split("/")
|
year, month = year_month.split("/")
|
||||||
|
|
||||||
# GeoNames bulk downloads don't require the username in the URL for cities15000.zip,
|
# GeoNames bulk downloads don't require the username in the URL for cities1000.zip,
|
||||||
# but the username signals acceptance of their terms of use and helps their monitoring.
|
# but the username signals acceptance of their terms of use and helps their monitoring.
|
||||||
url = f"{DOWNLOAD_URL}?username={username}"
|
url = f"{DOWNLOAD_URL}?username={username}"
|
||||||
logger.info("GET cities15000.zip (~1.5MB compressed)")
|
logger.info("GET cities1000.zip (~30MB compressed, ~140K locations)")
|
||||||
resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 4)
|
resp = session.get(url, timeout=HTTP_TIMEOUT_SECONDS * 10)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|
||||||
assert len(resp.content) > 100_000, (
|
assert len(resp.content) > 1_000_000, (
|
||||||
f"cities15000.zip too small ({len(resp.content)} bytes) — download may have failed"
|
f"cities1000.zip too small ({len(resp.content)} bytes) — download may have failed"
|
||||||
)
|
)
|
||||||
|
|
||||||
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
|
||||||
txt_name = next((n for n in zf.namelist() if n.endswith(".txt")), None)
|
txt_name = next((n for n in zf.namelist() if n.endswith(".txt")), None)
|
||||||
assert txt_name, f"No .txt file in cities15000.zip: {zf.namelist()}"
|
assert txt_name, f"No .txt file in cities1000.zip: {zf.namelist()}"
|
||||||
txt_content = zf.read(txt_name)
|
txt_content = zf.read(txt_name)
|
||||||
|
|
||||||
rows = _parse_cities_txt(txt_content)
|
rows = _parse_cities_txt(txt_content)
|
||||||
assert len(rows) > 5_000, f"Expected >5000 global cities ≥50K pop, got {len(rows)}"
|
assert len(rows) > 100_000, f"Expected >100K global locations (pop ≥1K), got {len(rows)}"
|
||||||
logger.info("parsed %d global cities with population ≥%d", len(rows), MIN_POPULATION)
|
logger.info("parsed %d global locations (pop ≥1K)", len(rows))
|
||||||
|
|
||||||
dest_dir = landing_path(landing_dir, "geonames", year, month)
|
dest_dir = landing_path(landing_dir, "geonames", year, month)
|
||||||
dest = dest_dir / "cities_global.json.gz"
|
dest = dest_dir / "cities_global.json.gz"
|
||||||
|
|||||||
@@ -0,0 +1,78 @@
|
|||||||
|
"""Overpass API extractor — global tennis court locations from OpenStreetMap.
|
||||||
|
|
||||||
|
Queries the Overpass API for all nodes/ways/relations tagged sport=tennis.
|
||||||
|
Tennis court density near a location is a proxy for racket-sport culture —
|
||||||
|
areas with many tennis clubs are prime candidates for padel adoption.
|
||||||
|
|
||||||
|
The query returns ~150K+ results globally (vs ~5K for padel), so a higher
|
||||||
|
Overpass timeout is used.
|
||||||
|
|
||||||
|
Landing: {LANDING_DIR}/overpass_tennis/{year}/{month}/courts.json.gz
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import niquests
|
||||||
|
|
||||||
|
from ._shared import OVERPASS_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||||
|
from .utils import landing_path, write_gzip_atomic
|
||||||
|
|
||||||
|
logger = setup_logging("padelnomics.extract.overpass_tennis")
|
||||||
|
|
||||||
|
EXTRACTOR_NAME = "overpass_tennis"
|
||||||
|
OVERPASS_URL = "https://overpass-api.de/api/interpreter"
|
||||||
|
|
||||||
|
# Tennis returns ~150K+ elements globally vs ~5K for padel — use 3× timeout.
|
||||||
|
TENNIS_OVERPASS_TIMEOUT_SECONDS = OVERPASS_TIMEOUT_SECONDS * 3
|
||||||
|
|
||||||
|
OVERPASS_QUERY = (
|
||||||
|
"[out:json][timeout:180];\n"
|
||||||
|
"(\n"
|
||||||
|
' node["sport"="tennis"];\n'
|
||||||
|
' way["sport"="tennis"];\n'
|
||||||
|
' relation["sport"="tennis"];\n'
|
||||||
|
");\n"
|
||||||
|
"out body;"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract(
|
||||||
|
landing_dir: Path,
|
||||||
|
year_month: str,
|
||||||
|
conn: sqlite3.Connection,
|
||||||
|
session: niquests.Session,
|
||||||
|
) -> dict:
|
||||||
|
"""POST OverpassQL query for tennis courts and write raw OSM JSON. Returns run metrics."""
|
||||||
|
year, month = year_month.split("/")
|
||||||
|
dest_dir = landing_path(landing_dir, "overpass_tennis", year, month)
|
||||||
|
dest = dest_dir / "courts.json.gz"
|
||||||
|
|
||||||
|
logger.info("POST %s (sport=tennis, ~150K+ results expected)", OVERPASS_URL)
|
||||||
|
resp = session.post(
|
||||||
|
OVERPASS_URL,
|
||||||
|
data={"data": OVERPASS_QUERY},
|
||||||
|
timeout=TENNIS_OVERPASS_TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
size_bytes = len(resp.content)
|
||||||
|
logger.info("%s bytes received", f"{size_bytes:,}")
|
||||||
|
|
||||||
|
bytes_written = write_gzip_atomic(dest, resp.content)
|
||||||
|
logger.info("wrote %s (%s bytes compressed)", dest, f"{bytes_written:,}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"files_written": 1,
|
||||||
|
"files_skipped": 0,
|
||||||
|
"bytes_written": bytes_written,
|
||||||
|
"cursor_value": year_month,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
run_extractor(EXTRACTOR_NAME, extract)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
-- GeoNames global city population (cities15000 bulk dataset, filtered to ≥50K).
|
-- GeoNames global city/municipality population (cities1000 bulk dataset, pop ≥ 1K).
|
||||||
-- Global fallback for countries not covered by Eurostat, Census, or ONS.
|
-- Global fallback for countries not covered by Eurostat, Census, or ONS.
|
||||||
|
-- Broad coverage (140K+ locations) enables Gemeinde-level market intelligence.
|
||||||
-- One row per geoname_id (GeoNames stable numeric identifier).
|
-- One row per geoname_id (GeoNames stable numeric identifier).
|
||||||
--
|
--
|
||||||
-- Source: data/landing/geonames/{year}/{month}/cities_global.json.gz
|
-- Source: data/landing/geonames/{year}/{month}/cities_global.json.gz
|
||||||
@@ -16,6 +17,10 @@ WITH parsed AS (
|
|||||||
TRY_CAST(row ->> 'geoname_id' AS INTEGER) AS geoname_id,
|
TRY_CAST(row ->> 'geoname_id' AS INTEGER) AS geoname_id,
|
||||||
row ->> 'city_name' AS city_name,
|
row ->> 'city_name' AS city_name,
|
||||||
row ->> 'country_code' AS country_code,
|
row ->> 'country_code' AS country_code,
|
||||||
|
TRY_CAST(row ->> 'lat' AS DOUBLE) AS lat,
|
||||||
|
TRY_CAST(row ->> 'lon' AS DOUBLE) AS lon,
|
||||||
|
row ->> 'admin1_code' AS admin1_code,
|
||||||
|
row ->> 'admin2_code' AS admin2_code,
|
||||||
TRY_CAST(row ->> 'population' AS BIGINT) AS population,
|
TRY_CAST(row ->> 'population' AS BIGINT) AS population,
|
||||||
TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year,
|
TRY_CAST(row ->> 'ref_year' AS INTEGER) AS ref_year,
|
||||||
CURRENT_DATE AS extracted_date
|
CURRENT_DATE AS extracted_date
|
||||||
@@ -32,6 +37,10 @@ SELECT
|
|||||||
geoname_id,
|
geoname_id,
|
||||||
TRIM(city_name) AS city_name,
|
TRIM(city_name) AS city_name,
|
||||||
UPPER(country_code) AS country_code,
|
UPPER(country_code) AS country_code,
|
||||||
|
lat,
|
||||||
|
lon,
|
||||||
|
NULLIF(TRIM(admin1_code), '') AS admin1_code,
|
||||||
|
NULLIF(TRIM(admin2_code), '') AS admin2_code,
|
||||||
population,
|
population,
|
||||||
ref_year,
|
ref_year,
|
||||||
extracted_date
|
extracted_date
|
||||||
@@ -40,3 +49,5 @@ WHERE population IS NOT NULL
|
|||||||
AND population > 0
|
AND population > 0
|
||||||
AND geoname_id IS NOT NULL
|
AND geoname_id IS NOT NULL
|
||||||
AND city_name IS NOT NULL
|
AND city_name IS NOT NULL
|
||||||
|
AND lat IS NOT NULL
|
||||||
|
AND lon IS NOT NULL
|
||||||
|
|||||||
@@ -0,0 +1,72 @@
|
|||||||
|
-- Tennis court locations from OpenStreetMap via Overpass API (sport=tennis).
|
||||||
|
-- Used as a "racket sport culture" signal in the opportunity score:
|
||||||
|
-- areas with high tennis court density are prime padel adoption markets.
|
||||||
|
--
|
||||||
|
-- Source: data/landing/overpass_tennis/{year}/{month}/courts.json.gz
|
||||||
|
|
||||||
|
MODEL (
|
||||||
|
name staging.stg_tennis_courts,
|
||||||
|
kind FULL,
|
||||||
|
cron '@daily',
|
||||||
|
grain osm_id
|
||||||
|
);
|
||||||
|
|
||||||
|
WITH parsed AS (
|
||||||
|
SELECT
|
||||||
|
elem ->> 'type' AS osm_type,
|
||||||
|
(elem ->> 'id')::BIGINT AS osm_id,
|
||||||
|
TRY_CAST(elem ->> 'lat' AS DOUBLE) AS lat,
|
||||||
|
TRY_CAST(elem ->> 'lon' AS DOUBLE) AS lon,
|
||||||
|
elem -> 'tags' ->> 'name' AS name,
|
||||||
|
elem -> 'tags' ->> 'addr:country' AS country_code,
|
||||||
|
elem -> 'tags' ->> 'addr:city' AS city_tag,
|
||||||
|
filename AS source_file,
|
||||||
|
CURRENT_DATE AS extracted_date
|
||||||
|
FROM (
|
||||||
|
SELECT UNNEST(elements) AS elem, filename
|
||||||
|
FROM read_json(
|
||||||
|
@LANDING_DIR || '/overpass_tennis/*/*/courts.json.gz',
|
||||||
|
format = 'auto',
|
||||||
|
filename = true
|
||||||
|
)
|
||||||
|
)
|
||||||
|
WHERE (elem ->> 'type') IS NOT NULL
|
||||||
|
),
|
||||||
|
deduped AS (
|
||||||
|
SELECT *,
|
||||||
|
ROW_NUMBER() OVER (PARTITION BY osm_id ORDER BY extracted_date DESC) AS rn
|
||||||
|
FROM parsed
|
||||||
|
WHERE osm_type = 'node'
|
||||||
|
AND lat IS NOT NULL AND lon IS NOT NULL
|
||||||
|
AND lat BETWEEN -90 AND 90
|
||||||
|
AND lon BETWEEN -180 AND 180
|
||||||
|
),
|
||||||
|
with_country AS (
|
||||||
|
SELECT
|
||||||
|
osm_id, lat, lon,
|
||||||
|
COALESCE(NULLIF(TRIM(UPPER(country_code)), ''), CASE
|
||||||
|
WHEN lat BETWEEN 47.27 AND 55.06 AND lon BETWEEN 5.87 AND 15.04 THEN 'DE'
|
||||||
|
WHEN lat BETWEEN 35.95 AND 43.79 AND lon BETWEEN -9.39 AND 4.33 THEN 'ES'
|
||||||
|
WHEN lat BETWEEN 49.90 AND 60.85 AND lon BETWEEN -8.62 AND 1.77 THEN 'GB'
|
||||||
|
WHEN lat BETWEEN 41.36 AND 51.09 AND lon BETWEEN -5.14 AND 9.56 THEN 'FR'
|
||||||
|
WHEN lat BETWEEN 45.46 AND 47.80 AND lon BETWEEN 5.96 AND 10.49 THEN 'CH'
|
||||||
|
WHEN lat BETWEEN 46.37 AND 49.02 AND lon BETWEEN 9.53 AND 17.16 THEN 'AT'
|
||||||
|
WHEN lat BETWEEN 36.35 AND 47.09 AND lon BETWEEN 6.62 AND 18.51 THEN 'IT'
|
||||||
|
WHEN lat BETWEEN 37.00 AND 42.15 AND lon BETWEEN -9.50 AND -6.19 THEN 'PT'
|
||||||
|
ELSE NULL
|
||||||
|
END) AS country_code,
|
||||||
|
NULLIF(TRIM(name), '') AS name,
|
||||||
|
NULLIF(TRIM(city_tag), '') AS city,
|
||||||
|
extracted_date
|
||||||
|
FROM deduped
|
||||||
|
WHERE rn = 1
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
osm_id,
|
||||||
|
lat,
|
||||||
|
lon,
|
||||||
|
country_code,
|
||||||
|
name,
|
||||||
|
city,
|
||||||
|
extracted_date
|
||||||
|
FROM with_country
|
||||||
Reference in New Issue
Block a user