Compare commits
9 Commits
v202603011
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
36deaba00e | ||
|
|
9608b7f601 | ||
|
|
0811b30cbd | ||
|
|
7d2950928e | ||
|
|
65e51d2972 | ||
|
|
c5d872ec55 | ||
|
|
75305935bd | ||
|
|
a15c32d398 | ||
|
|
97c5846d51 |
@@ -3,6 +3,8 @@ APP_NAME=ENC[AES256_GCM,data:Vic/MJYoxZo8JAI=,iv:n1SEGQaGeZtYMtLmDRFiljDBbNKFvCz
|
||||
SECRET_KEY=ENC[AES256_GCM,data:a3Bhj3gSQaE3llRWBYzpjoFDhhhSsNee67jXJs7+qn4=,iv:yvrx78X5Ut4DBSlmBnIn09ESVc/tuDiwiV4njmjcvko=,tag:cbFUTAEpX+isQD9FCVllsw==,type:str]
|
||||
BASE_URL=ENC[AES256_GCM,data:LcbPDZf9Pwcuv7RxN9xhNfa9Tufi,iv:cOdjW9nNe+BuDXh+dL4b5LFQL2mKBiKV0FaEsDGMAQc=,tag:3uAn3AIwsztIfGpkQLD5Fg==,type:str]
|
||||
DEBUG=ENC[AES256_GCM,data:qrEGkA==,iv:bCyEDWiEzolHo4vabiyYTsqM0eUaBmNbXYYu4wCsaeE=,tag:80gnDNbdZHRWVEYtuA1M2Q==,type:str]
|
||||
#ENC[AES256_GCM,data:YB5h,iv:2HFpvHNebAB9M/44rtPk/QpFV9hNKOlV/099OSjPnOA=,tag:BVj8vGy6K3LW/wb1vcZ+Ug==,type:comment]
|
||||
GITEA_TOKEN=ENC[AES256_GCM,data:aIM7vQXxFbz7FDdXEdwtelvmXAdLgJfWNCSPeK//NlveQrU5cLDt8w==,iv:9qhjk52ZAs+y5WwP5WebMUwHhu6JNdHzAsEOpznrwBw=,tag:WnCDA4hAccMFs6vXVVKqxw==,type:str]
|
||||
#ENC[AES256_GCM,data:YmlGAWpXxRCqam3oTWtGxHDXC+svEXI4HyUxrm/8OcKTuJsYPcL1WcnYqrP5Mf5lU5qPezEXUrrgZy8vjVW6qAbb0IA2PMM4Kg==,iv:dx6Dn99dJgjwyvUp8NAygXjRQ50yKYFeC73Oqt9WvmY=,tag:6JLF2ixSAv39VkKt6+cecQ==,type:comment]
|
||||
ADMIN_EMAILS=ENC[AES256_GCM,data:hlG8b32WlD4ems3VKQ==,iv:wWO08dmX4oLhHulXg4HUG0PjRnFiX19RUTkTvjqIw5I=,tag:KMjXsBt7aE/KqlCfV+fdMg==,type:str]
|
||||
#ENC[AES256_GCM,data:b2wQxnL8Q2Bp,iv:q8ep3yUPzCumpZpljoVL2jbcPdsI5c2piiZ0x5k10Mw=,tag:IbjkT0Mjgu9n+6FGiPVihg==,type:comment]
|
||||
@@ -71,7 +73,7 @@ GEONAMES_USERNAME=ENC[AES256_GCM,data:aSkVdLNrhiF6tlg=,iv:eemFGwDIv3EG/P3lVHGZj9
|
||||
CENSUS_API_KEY=ENC[AES256_GCM,data:qqG971573aGq9MiHI2xLlanKKFwjfcNNoMXtm8LNbyh0rMbQN2XukQ==,iv:az2i0ldH75nHGah4DeOxaXmDbVYqmC1c77ptZqFA9BI=,tag:zoDdKj9bR7fgIDo1/dEU2g==,type:str]
|
||||
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBxNWNmUzVNUGdWRnE0ZFpF\nM0JQZWZ3UDdEVzlwTmIxakxOZXBkT2x2ZlNrClRtV2M3S2daSGxUZmFDSWQ2Nmh4\neU51QndFcUxlSE00RFovOVJTcDZmUUUKLS0tIDcvL3hRMDRoMWZZSXljNzA3WG5o\nMWFic21MV0krMzlIaldBTVU0ZDdlTE0K7euGQtA+9lHNws+x7TMCArZamm9att96\nL8cXoUDWe5fNI5+M1bXReqVfNwPTwZsV6j/+ZtYKybklIzWz02Ex4A==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_0__map_recipient=age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a
|
||||
sops_lastmodified=2026-03-01T13:26:08Z
|
||||
sops_mac=ENC[AES256_GCM,data:WmbT6tCUEoCDyKu673NQoJNzmCiilpG8yDVGl6ObxTOYleWt+1DVdPS+XUV+0Wd4bfkEhGTEfXAyy+wfoCVfYnenMuDGjXUUdsvqrOX6nnNCJ8nIntL46LfbRsbVrU6eeYGu/TaTyfouWjkk6pqlxffNSS6rrEFNZE4Q+v58+EI=,iv:TuCEmK6YJXsYISbN4mbuVbS6OvUNuhPRLstjjNkkrPk=,tag:hWLS036q7H5lMNpR6gZBVA==,type:str]
|
||||
sops_lastmodified=2026-03-01T13:34:16Z
|
||||
sops_mac=ENC[AES256_GCM,data:JLfGLbNTEcI6M/sUA5Zez6cfEUObgnUBmX52560PzBmeLZt0F5Y5QpeojIBqEDMuNB0hp1nnPI59WClLJtQ12VlHo9TkL3x9uCNUG+KneQrn1bTmJpA3cwNkWTzIm4l+TGbJbd4FpKJ9H0v1w+sqoKOgG8DqbtOeVdUfsVspAso=,iv:UqYxooXkEtx+y7fYzl+GFncpkjz8dcP7o9fp+kFf6w4=,tag:/maSb1aZGo+Ia8eGpB7PYw==,type:str]
|
||||
sops_unencrypted_suffix=_unencrypted
|
||||
sops_version=3.12.1
|
||||
|
||||
@@ -52,13 +52,18 @@ BING_SITE_URL=ENC[AES256_GCM,data:M33VI97DyxH8gRR3ZUXoXg4QrEv5og==,iv:GxZtwfbBVi
|
||||
#ENC[AES256_GCM,data:OTUMKNkRW0zrupNppXthwE1oieILhNjM+cjx5hFn69g=,iv:48ID2qtSe9ggD2X+G/iUqp3v2uwEc7fZw8lxHIvVXmk=,tag:okBn0Npk1K9dDOFWA/AB1A==,type:comment]
|
||||
GEONAMES_USERNAME=ENC[AES256_GCM,data:UXd/S2TzXPiGmLY=,iv:OMURM5E6SFEsaqroUlH76DEnr7C/ujNk9UQnbWT0hK4=,tag:VsjjS12QDbudiEhdAQ/OCQ==,type:str]
|
||||
CENSUS_API_KEY=ENC[AES256_GCM,data:9RbKlxSD17LqIuuNXaOKSgZ8LnFh9Wbze3XHgpctfV/1TqBMZTIedQ==,iv:WwsmR3HLUEcgUpLliGRaUPhGM9vFNPMGXSAQQ6+9UVc=,tag:R4EMNy5MxxvK0UTaCL0umA==,type:str]
|
||||
#ENC[AES256_GCM,data:SL402gYB8ngjqkrG03FmaA==,iv:I326cYnOWdFnaUwnSfP+s2p9oCDCnqDzUJuPOzSFJc0=,tag:MBW5AqAaq4hTMmNXq1tXKw==,type:comment]
|
||||
R2_LANDING_BUCKET=ENC[AES256_GCM,data:yZXLNQb8yN9nQPdxqmqv61fLWbRYCjjOqQ==,iv:fAwBLC/EuU0lgYOxZSkTagWyeQCdEadjssapxpCEGjA=,tag:VUmuVw76WZAaukp71Desag==,type:str]
|
||||
R2_LANDING_ACCESS_KEY_ID=ENC[AES256_GCM,data:Y6y+U1ayhpFDcoaDjl7hyMVjU3gVvtORAH5gbd+HXbM=,iv:ra9kuch1DT+2tfz140bvxQRIXypsdiUrX1QYQ59gNRI=,tag:Wt85qliUMFvgbvoUrOXT7A==,type:str]
|
||||
R2_LANDING_SECRET_ACCESS_KEY=ENC[AES256_GCM,data:99wB9aKSq2GihW9FOwBSMgHYzNKBHlol2Mf2kg4Ma6Fr4Cr21t/blzPxNQ7YRdeKk6ypFgViXlS4BJz9nC+v0g==,iv:/AmbXtj/uSGcMp+NBhN5tiVb2U56tvO5e1UpG2/ijPo=,tag:Qg2Tt11DUJPyeYcq9iSVnQ==,type:str]
|
||||
R2_ENDPOINT=ENC[AES256_GCM,data:PBWTzUfhc/qVZ4n3GqJdZu8W7Ee0+FpsgikWVxgptQ3BJ2rQ4ewDuEB05inB1Agz1sB42VEBAsTtR3c5waPPRNs=,iv:ILZ0999fsPYYzVQYuIgAxpyystcplnykVoT5RpSEW2w=,tag:FxFOjQ+YcZuLf+jJr2OVFQ==,type:str]
|
||||
sops_age__list_0__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBaUVk0UEVqdmtsM3VzQnpZ\nZjJDZ1lsM0VqWFpVVXUvNzdQcCtHbVJLNjFnCmhna01vTkVBaFQ5ZVlXeGhYNXdH\ncWJ5Qi9PdkxLaHBhQnR3cmtoblkxdEUKLS0tIDhHamY4NXhxOG9YN1NpbTN1aVRh\nOHVKcEN1d0QwQldVTDlBWUU4SDVDWlUKRJU+CTfTzIx6LLKin9sTXAHPVAfiUerZ\nCqYVFncsCJE3TbMI424urQj7kragPoGl1z4++yqAXNTRxfZIY4KTkg==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_0__map_recipient=age1f5002gj4s78jju45jd28kuejtcfhn5cdujz885fl7z2p9ym68pnsgky87a
|
||||
sops_age__list_1__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBmVEticFRVemlzZnlzek4x\nbWJ0d0h5ejJVUk5remo1VkdxNjVpdllqbFhFClc1UXlNd09xVVA5MnltMlN5MWRy\nYUlNRmNybHh1RGdPVC9yWlYrVmRTdkkKLS0tIHBUbU9qSDMrVGVHZDZGSFdpWlBh\nT3NXTGl0SmszaU9hRmU5bXI0cDRoRW8KLvbNYsBEwz+ITKvn7Yn+iNHiRzyyjtQt\no9/HupykJ3WjSdleGz7ZN6UiPGelHp0D/rzSASTYaI1+0i0xZ4PUoQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_1__map_recipient=age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
||||
sops_age__list_2__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFeHhaOURNZnRVMEwxNThu\nUjF4Q0kwUXhTUE1QSzZJbmpubnh3RnpQTmdvCjRmWWxpNkxFUmVGb3NRbnlydW5O\nWEg3ZXJQTU4vcndzS2pUQXY3Q0ttYjAKLS0tIE9IRFJ1c2ZxbGVHa2xTL0swbGN1\nTzgwMThPUDRFTWhuZHJjZUYxOTZrU00KY62qrNBCUQYxwcLMXFEnLkwncxq3BPJB\nKm4NzeHBU87XmPWVrgrKuf+PH1mxJlBsl7Hev8xBTy7l6feiZjLIvQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_2__map_recipient=age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
||||
sops_lastmodified=2026-03-01T13:25:41Z
|
||||
sops_mac=ENC[AES256_GCM,data:EL9Bgo0pWWECeHaaM1bHtkvwBgBmS3P2cX+6oahHKmLEJLI7P7fiomP7G8SdrfUyNpZaP9d4LlfwZSuCPqH6rP8jzF67oNkfXfd/xK4OW2U2TqSvouCMzlhqVQgS4HHl5EgvOI488WEIZko7KK2A1rxnpkm8C29WG9d9G64LKvw=,iv:XzsNm3CXnlC6SIef63BdddALjGustp8czHQCWOtjXBQ=,tag:zll0db6K1+M4brOpfVWnhg==,type:str]
|
||||
sops_lastmodified=2026-03-01T17:40:31Z
|
||||
sops_mac=ENC[AES256_GCM,data:xiTAz5BSk9F7GqQHcy0UpU7jCS2wHbfi27hOvpdoxAKtGLxaZ5PISQHVWEStWjHS+8g+3ACrTj/UQfUuCTr/55UVU0Wu6hyAWnuZ3DuaMfYUNer+9XZm5V2jTibQIYH01ZWyt4aeqs/Njn39FMx33s4hRdYVjfN391wgkx2+Hsg=,iv:UbgoSuVPu9H7Gu+HwZ6m60KgfGxZwKITMrkT54nd1yY=,tag:pM0hoz6XDQk6HaSJBkOR1Q==,type:str]
|
||||
sops_unencrypted_suffix=_unencrypted
|
||||
sops_version=3.12.1
|
||||
|
||||
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
- **Admin: styled confirm dialog for all destructive actions** — replaced all native `window.confirm()` calls with the existing `#confirm-dialog` styled `<dialog>`. A new global `htmx:confirm` handler intercepts HTMX confirmation prompts and shows the dialog; form-submit buttons on affiliate pages were updated to use `confirmAction()`. Affected: pipeline Transform tab (Run Transform, Run Export, Run Full Pipeline), pipeline Overview tab (Run extractor), affiliate product delete, affiliate program delete (both form and list variants).
|
||||
- **Pipeline tabs: no scrollbar** — added `scrollbar-width: none` and `::-webkit-scrollbar { display: none }` to `.pipeline-tabs` to suppress the spurious horizontal scrollbar on narrow viewports.
|
||||
|
||||
### Fixed
|
||||
- **Stale-tier failures no longer exhaust the next proxy tier** — with parallel workers, threads that fetched a proxy just before tier escalation reported failures after the tier changed, immediately blowing through the new tier's circuit breaker before it ever got tried (Rayobyte was skipped entirely). `record_failure(proxy_url)` now checks which tier the proxy belongs to and ignores the circuit breaker when the proxy is from an already-escalated tier.
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ extract-census-usa = "padelnomics_extract.census_usa:main"
|
||||
extract-census-usa-income = "padelnomics_extract.census_usa_income:main"
|
||||
extract-ons-uk = "padelnomics_extract.ons_uk:main"
|
||||
extract-geonames = "padelnomics_extract.geonames:main"
|
||||
extract-gisco = "padelnomics_extract.gisco:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
|
||||
@@ -11,9 +11,12 @@ from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
import niquests
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .utils import end_run, open_state_db, start_run
|
||||
|
||||
load_dotenv()
|
||||
|
||||
LANDING_DIR = Path(os.environ.get("LANDING_DIR", "data/landing"))
|
||||
|
||||
HTTP_TIMEOUT_SECONDS = 30
|
||||
|
||||
@@ -7,7 +7,7 @@ A graphlib.TopologicalSorter schedules them: tasks with no unmet dependencies
|
||||
run immediately in parallel; each completion may unlock new tasks.
|
||||
|
||||
Current dependency graph:
|
||||
- All 8 non-availability extractors have no dependencies (run in parallel)
|
||||
- All 9 non-availability extractors have no dependencies (run in parallel)
|
||||
- playtomic_availability depends on playtomic_tenants (starts as soon as
|
||||
tenants finishes, even if other extractors are still running)
|
||||
"""
|
||||
@@ -26,6 +26,8 @@ from .eurostat_city_labels import EXTRACTOR_NAME as EUROSTAT_CITY_LABELS_NAME
|
||||
from .eurostat_city_labels import extract as extract_eurostat_city_labels
|
||||
from .geonames import EXTRACTOR_NAME as GEONAMES_NAME
|
||||
from .geonames import extract as extract_geonames
|
||||
from .gisco import EXTRACTOR_NAME as GISCO_NAME
|
||||
from .gisco import extract as extract_gisco
|
||||
from .ons_uk import EXTRACTOR_NAME as ONS_UK_NAME
|
||||
from .ons_uk import extract as extract_ons_uk
|
||||
from .overpass import EXTRACTOR_NAME as OVERPASS_NAME
|
||||
@@ -50,6 +52,7 @@ EXTRACTORS: dict[str, tuple] = {
|
||||
CENSUS_USA_INCOME_NAME: (extract_census_usa_income, []),
|
||||
ONS_UK_NAME: (extract_ons_uk, []),
|
||||
GEONAMES_NAME: (extract_geonames, []),
|
||||
GISCO_NAME: (extract_gisco, []),
|
||||
TENANTS_NAME: (extract_tenants, []),
|
||||
AVAILABILITY_NAME: (extract_availability, [TENANTS_NAME]),
|
||||
}
|
||||
|
||||
95
extract/padelnomics_extract/src/padelnomics_extract/gisco.py
Normal file
95
extract/padelnomics_extract/src/padelnomics_extract/gisco.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""GISCO NUTS-2 boundary GeoJSON extractor.
|
||||
|
||||
Downloads NUTS-2 boundary polygons from Eurostat GISCO. The file is stored
|
||||
uncompressed because DuckDB's ST_Read cannot read gzipped files.
|
||||
|
||||
NUTS classification revises approximately every 7 years (current: 2021).
|
||||
The partition path is fixed to the revision year, not the run date, making
|
||||
the source version explicit. Cursor tracking still uses year_month to avoid
|
||||
re-downloading on every monthly run.
|
||||
|
||||
Landing: {LANDING_DIR}/gisco/2024/01/nuts2_boundaries.geojson (~5 MB, uncompressed)
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
|
||||
import niquests
|
||||
|
||||
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging
|
||||
from .utils import get_last_cursor
|
||||
|
||||
logger = setup_logging("padelnomics.extract.gisco")
|
||||
|
||||
EXTRACTOR_NAME = "gisco"
|
||||
|
||||
# NUTS 2021 revision, 20M scale (1:20,000,000), WGS84 (EPSG:4326), LEVL_2 only.
|
||||
# 20M resolution gives simplified polygons that are fast for point-in-polygon
|
||||
# matching without sacrificing accuracy at the NUTS-2 boundary level.
|
||||
GISCO_URL = (
|
||||
"https://gisco-services.ec.europa.eu/distribution/v2/nuts/geojson/"
|
||||
"NUTS_RG_20M_2021_4326_LEVL_2.geojson"
|
||||
)
|
||||
|
||||
# Fixed partition: NUTS boundaries are a static reference file, not time-series data.
|
||||
# The 2024/01 partition reflects when this NUTS 2021 dataset was first ingested.
|
||||
DEST_REL = Path("gisco/2024/01/nuts2_boundaries.geojson")
|
||||
|
||||
_GISCO_TIMEOUT_SECONDS = HTTP_TIMEOUT_SECONDS * 4 # ~5 MB; generous for slow upstreams
|
||||
|
||||
|
||||
def extract(
|
||||
landing_dir: Path,
|
||||
year_month: str,
|
||||
conn: sqlite3.Connection,
|
||||
session: niquests.Session,
|
||||
) -> dict:
|
||||
"""Download NUTS-2 GeoJSON. Skips if already run this month or file exists."""
|
||||
last_cursor = get_last_cursor(conn, EXTRACTOR_NAME)
|
||||
if last_cursor == year_month:
|
||||
logger.info("already ran for %s — skipping", year_month)
|
||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||
|
||||
dest = landing_dir / DEST_REL
|
||||
if dest.exists():
|
||||
logger.info("file already exists (skipping download): %s", dest)
|
||||
return {
|
||||
"files_written": 0,
|
||||
"files_skipped": 1,
|
||||
"bytes_written": 0,
|
||||
"cursor_value": year_month,
|
||||
}
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
logger.info("GET %s", GISCO_URL)
|
||||
resp = session.get(GISCO_URL, timeout=_GISCO_TIMEOUT_SECONDS)
|
||||
resp.raise_for_status()
|
||||
|
||||
content = resp.content
|
||||
assert len(content) > 100_000, (
|
||||
f"GeoJSON too small ({len(content)} bytes) — download may have failed"
|
||||
)
|
||||
assert b'"FeatureCollection"' in content, "Response does not look like GeoJSON"
|
||||
|
||||
# Write uncompressed — ST_Read requires a plain file, not .gz
|
||||
tmp = dest.with_suffix(".geojson.tmp")
|
||||
tmp.write_bytes(content)
|
||||
tmp.rename(dest)
|
||||
|
||||
size_mb = len(content) / 1_000_000
|
||||
logger.info("written %s (%.1f MB)", dest, size_mb)
|
||||
|
||||
return {
|
||||
"files_written": 1,
|
||||
"files_skipped": 0,
|
||||
"bytes_written": len(content),
|
||||
"cursor_value": year_month,
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
run_extractor(EXTRACTOR_NAME, extract)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -434,8 +434,10 @@ def _find_venues_with_upcoming_slots(
|
||||
if not start_time_str:
|
||||
continue
|
||||
try:
|
||||
# Parse "2026-02-24T17:00:00" format
|
||||
slot_start = datetime.fromisoformat(start_time_str).replace(tzinfo=UTC)
|
||||
# start_time is "HH:MM:SS"; combine with resource's start_date
|
||||
start_date = resource.get("start_date", "")
|
||||
full_dt = f"{start_date}T{start_time_str}" if start_date else start_time_str
|
||||
slot_start = datetime.fromisoformat(full_dt).replace(tzinfo=UTC)
|
||||
if window_start <= slot_start < window_end:
|
||||
tenant_ids.add(tid)
|
||||
break # found one upcoming slot, no need to check more
|
||||
|
||||
@@ -54,6 +54,40 @@ chmod 600 "${REPO_DIR}/.env"
|
||||
|
||||
sudo -u "${SERVICE_USER}" bash -c "cd ${REPO_DIR} && ${UV} sync --all-packages"
|
||||
|
||||
# ── rclone config (r2-landing remote) ────────────────────────────────────────
|
||||
|
||||
_env_get() { grep -E "^${1}=" "${REPO_DIR}/.env" 2>/dev/null | head -1 | cut -d= -f2- | tr -d '"'"'" || true; }
|
||||
|
||||
R2_LANDING_KEY=$(_env_get R2_LANDING_ACCESS_KEY_ID)
|
||||
R2_LANDING_SECRET=$(_env_get R2_LANDING_SECRET_ACCESS_KEY)
|
||||
R2_ENDPOINT=$(_env_get R2_ENDPOINT)
|
||||
|
||||
if [ -n "${R2_LANDING_KEY}" ] && [ -n "${R2_LANDING_SECRET}" ] && [ -n "${R2_ENDPOINT}" ]; then
|
||||
RCLONE_CONF_DIR="/home/${SERVICE_USER}/.config/rclone"
|
||||
RCLONE_CONF="${RCLONE_CONF_DIR}/rclone.conf"
|
||||
|
||||
sudo -u "${SERVICE_USER}" mkdir -p "${RCLONE_CONF_DIR}"
|
||||
|
||||
grep -v '^\[r2-landing\]' "${RCLONE_CONF}" 2>/dev/null > "${RCLONE_CONF}.tmp" || true
|
||||
cat >> "${RCLONE_CONF}.tmp" <<EOF
|
||||
|
||||
[r2-landing]
|
||||
type = s3
|
||||
provider = Cloudflare
|
||||
access_key_id = ${R2_LANDING_KEY}
|
||||
secret_access_key = ${R2_LANDING_SECRET}
|
||||
endpoint = ${R2_ENDPOINT}
|
||||
acl = private
|
||||
no_check_bucket = true
|
||||
EOF
|
||||
mv "${RCLONE_CONF}.tmp" "${RCLONE_CONF}"
|
||||
chown "${SERVICE_USER}:${SERVICE_USER}" "${RCLONE_CONF}"
|
||||
chmod 600 "${RCLONE_CONF}"
|
||||
echo "$(date '+%H:%M:%S') ==> rclone [r2-landing] remote configured."
|
||||
else
|
||||
echo "$(date '+%H:%M:%S') ==> R2_LANDING_* not set — skipping rclone config."
|
||||
fi
|
||||
|
||||
# ── Systemd services ──────────────────────────────────────────────────────────
|
||||
|
||||
cp "${REPO_DIR}/infra/landing-backup/padelnomics-landing-backup.service" /etc/systemd/system/
|
||||
|
||||
@@ -7,15 +7,5 @@ Wants=network-online.target
|
||||
Type=oneshot
|
||||
User=padelnomics_service
|
||||
EnvironmentFile=/opt/padelnomics/.env
|
||||
Environment=LANDING_DIR=/data/padelnomics/landing
|
||||
ExecStart=/usr/bin/rclone sync ${LANDING_DIR} :s3:${LITESTREAM_R2_BUCKET}/padelnomics/landing \
|
||||
--s3-provider Cloudflare \
|
||||
--s3-access-key-id ${LITESTREAM_R2_ACCESS_KEY_ID} \
|
||||
--s3-secret-access-key ${LITESTREAM_R2_SECRET_ACCESS_KEY} \
|
||||
--s3-endpoint https://${LITESTREAM_R2_ENDPOINT} \
|
||||
--s3-no-check-bucket \
|
||||
--exclude ".state.sqlite*"
|
||||
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=padelnomics-landing-backup
|
||||
ExecStart=/bin/sh -c 'exec /usr/bin/rclone sync /data/padelnomics/landing/ r2-landing:${R2_LANDING_BUCKET}/padelnomics/ --log-level INFO --exclude ".state.sqlite*"'
|
||||
TimeoutStartSec=1800
|
||||
|
||||
@@ -39,3 +39,23 @@ module = "padelnomics_extract.playtomic_availability"
|
||||
entry = "main_recheck"
|
||||
schedule = "0,30 6-23 * * *"
|
||||
depends_on = ["playtomic_availability"]
|
||||
|
||||
[census_usa]
|
||||
module = "padelnomics_extract.census_usa"
|
||||
schedule = "monthly"
|
||||
|
||||
[census_usa_income]
|
||||
module = "padelnomics_extract.census_usa_income"
|
||||
schedule = "monthly"
|
||||
|
||||
[eurostat_city_labels]
|
||||
module = "padelnomics_extract.eurostat_city_labels"
|
||||
schedule = "monthly"
|
||||
|
||||
[ons_uk]
|
||||
module = "padelnomics_extract.ons_uk"
|
||||
schedule = "monthly"
|
||||
|
||||
[gisco]
|
||||
module = "padelnomics_extract.gisco"
|
||||
schedule = "monthly"
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
"""Download NUTS-2 boundary GeoJSON from Eurostat GISCO.
|
||||
|
||||
One-time (or on NUTS revision) download of NUTS-2 boundary polygons used for
|
||||
spatial income resolution in dim_locations. Stored uncompressed because DuckDB's
|
||||
ST_Read function cannot read gzipped files.
|
||||
|
||||
NUTS classification changes approximately every 7 years. Current revision: 2021.
|
||||
|
||||
Output: {LANDING_DIR}/gisco/2024/01/nuts2_boundaries.geojson (~5MB, uncompressed)
|
||||
|
||||
Usage:
|
||||
uv run python scripts/download_gisco_nuts.py [--landing-dir data/landing]
|
||||
|
||||
Idempotent: skips download if the file already exists.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import niquests
|
||||
|
||||
# NUTS 2021 revision, 20M scale (1:20,000,000), WGS84 (EPSG:4326), LEVL_2 only.
|
||||
# 20M resolution gives simplified polygons that are fast for point-in-polygon
|
||||
# matching without sacrificing accuracy at the NUTS-2 boundary level.
|
||||
GISCO_URL = (
|
||||
"https://gisco-services.ec.europa.eu/distribution/v2/nuts/geojson/"
|
||||
"NUTS_RG_20M_2021_4326_LEVL_2.geojson"
|
||||
)
|
||||
|
||||
# Fixed partition: NUTS boundaries are a static reference file, not time-series data.
|
||||
# Use the NUTS revision year as the partition to make the source version explicit.
|
||||
DEST_REL_PATH = "gisco/2024/01/nuts2_boundaries.geojson"
|
||||
|
||||
HTTP_TIMEOUT_SECONDS = 120
|
||||
|
||||
|
||||
def download_nuts_boundaries(landing_dir: Path) -> None:
|
||||
dest = landing_dir / DEST_REL_PATH
|
||||
if dest.exists():
|
||||
print(f"Already exists (skipping): {dest}")
|
||||
return
|
||||
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
print(f"Downloading NUTS-2 boundaries from GISCO...")
|
||||
print(f" URL: {GISCO_URL}")
|
||||
|
||||
with niquests.Session() as session:
|
||||
resp = session.get(GISCO_URL, timeout=HTTP_TIMEOUT_SECONDS)
|
||||
resp.raise_for_status()
|
||||
|
||||
content = resp.content
|
||||
assert len(content) > 100_000, (
|
||||
f"GeoJSON too small ({len(content)} bytes) — download may have failed"
|
||||
)
|
||||
assert b'"FeatureCollection"' in content, "Response does not look like GeoJSON"
|
||||
|
||||
# Write uncompressed — ST_Read requires a plain file
|
||||
tmp = dest.with_suffix(".geojson.tmp")
|
||||
tmp.write_bytes(content)
|
||||
tmp.rename(dest)
|
||||
|
||||
size_mb = len(content) / 1_000_000
|
||||
print(f" Written: {dest} ({size_mb:.1f} MB)")
|
||||
print("Done. Run SQLMesh plan to rebuild stg_nuts2_boundaries.")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--landing-dir", default="data/landing", type=Path)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.landing_dir.is_dir():
|
||||
print(f"Error: landing dir does not exist: {args.landing_dir}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
download_nuts_boundaries(args.landing_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -229,7 +229,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<form method="post" action="{{ url_for('admin.affiliate_delete', product_id=product_id) }}" style="margin:0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn-outline"
|
||||
onclick="return confirm('Delete this product? This cannot be undone.')">Delete</button>
|
||||
onclick="event.preventDefault(); confirmAction('Delete this product? This cannot be undone.', this.closest('form'))">Delete</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
@@ -123,7 +123,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<form method="post" action="{{ url_for('admin.affiliate_program_delete', program_id=program_id) }}" style="margin:0">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn-outline"
|
||||
onclick="return confirm('Delete this program? Blocked if products reference it.')">Delete</button>
|
||||
onclick="event.preventDefault(); confirmAction('Delete this program? Blocked if products reference it.', this.closest('form'))">Delete</button>
|
||||
</form>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
@@ -244,5 +244,19 @@ function confirmAction(message, form) {
|
||||
document.getElementById('confirm-cancel').addEventListener('click', function() { dialog.close(); }, { once: true });
|
||||
dialog.showModal();
|
||||
}
|
||||
|
||||
// Intercept hx-confirm to use the styled dialog instead of window.confirm()
|
||||
document.body.addEventListener('htmx:confirm', function(evt) {
|
||||
var dialog = document.getElementById('confirm-dialog');
|
||||
if (!dialog) return; // fallback: let HTMX use native confirm
|
||||
evt.preventDefault();
|
||||
document.getElementById('confirm-msg').textContent = evt.detail.question;
|
||||
var ok = document.getElementById('confirm-ok');
|
||||
var newOk = ok.cloneNode(true);
|
||||
ok.replaceWith(newOk);
|
||||
newOk.addEventListener('click', function() { dialog.close(); evt.detail.issueRequest(true); }, { once: true });
|
||||
document.getElementById('confirm-cancel').addEventListener('click', function() { dialog.close(); }, { once: true });
|
||||
dialog.showModal();
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
<form method="post" action="{{ url_for('admin.affiliate_program_delete', program_id=prog.id) }}" style="display:inline">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn-outline btn-sm"
|
||||
onclick="return confirm('Delete {{ prog.name }}? This is blocked if products reference it.')">Delete</button>
|
||||
onclick="event.preventDefault(); confirmAction('Delete {{ prog.name }}? This is blocked if products reference it.', this.closest('form'))">Delete</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
<form method="post" action="{{ url_for('admin.affiliate_delete', product_id=product.id) }}" style="display:inline">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||
<button type="submit" class="btn-outline btn-sm"
|
||||
onclick="return confirm('Delete {{ product.name }}?')">Delete</button>
|
||||
onclick="event.preventDefault(); confirmAction('Delete {{ product.name }}?', this.closest('form'))">Delete</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
hx-target="#pipeline-overview-content"
|
||||
hx-swap="outerHTML"
|
||||
hx-vals='{"extractor": "{{ wf.name }}", "csrf_token": "{{ csrf_token() }}"}'
|
||||
onclick="if (!confirm('Run {{ wf.name }} extractor?')) return false;">Run</button>
|
||||
hx-confirm="Run {{ wf.name }} extractor?">Run</button>
|
||||
</div>
|
||||
<p class="text-xs text-slate">{{ wf.schedule_label }}</p>
|
||||
{% if run %}
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
hx-target="#pipeline-transform-content"
|
||||
hx-swap="outerHTML"
|
||||
hx-vals='{"step": "transform", "csrf_token": "{{ csrf_token() }}"}'
|
||||
onclick="if (!confirm('Run SQLMesh transform (prod --auto-apply)?')) return false;">
|
||||
hx-confirm="Run SQLMesh transform (prod --auto-apply)?">
|
||||
Run Transform
|
||||
</button>
|
||||
</div>
|
||||
@@ -107,7 +107,7 @@
|
||||
hx-target="#pipeline-transform-content"
|
||||
hx-swap="outerHTML"
|
||||
hx-vals='{"step": "export", "csrf_token": "{{ csrf_token() }}"}'
|
||||
onclick="if (!confirm('Export serving tables (lakehouse → analytics.duckdb)?')) return false;">
|
||||
hx-confirm="Export serving tables (lakehouse → analytics.duckdb)?">
|
||||
Run Export
|
||||
</button>
|
||||
</div>
|
||||
@@ -138,7 +138,7 @@
|
||||
hx-target="#pipeline-transform-content"
|
||||
hx-swap="outerHTML"
|
||||
hx-vals='{"step": "pipeline", "csrf_token": "{{ csrf_token() }}"}'
|
||||
onclick="if (!confirm('Run full ELT pipeline (extract → transform → export)?')) return false;">
|
||||
hx-confirm="Run full ELT pipeline (extract → transform → export)?">
|
||||
Run Full Pipeline
|
||||
</button>
|
||||
</div>
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
|
||||
.pipeline-tabs {
|
||||
display: flex; gap: 0; border-bottom: 2px solid #E2E8F0; margin-bottom: 1.5rem;
|
||||
overflow-x: auto; -webkit-overflow-scrolling: touch;
|
||||
overflow-x: auto; -webkit-overflow-scrolling: touch; scrollbar-width: none;
|
||||
}
|
||||
.pipeline-tabs::-webkit-scrollbar { display: none; }
|
||||
.pipeline-tabs button {
|
||||
padding: 0.625rem 1.25rem; font-size: 0.8125rem; font-weight: 600;
|
||||
color: #64748B; background: none; border: none; cursor: pointer;
|
||||
|
||||
Reference in New Issue
Block a user