Files
padelnomics/infra/supervisor/workflows.toml
Deeman 97c5846d51 feat(extract): GISCO extractor + wire all unscheduled extractors
- New gisco.py: proper extractor module replacing scripts/download_gisco_nuts.py.
  Writes uncompressed .geojson (ST_Read can't handle .gz). Fixed partition path
  gisco/2024/01/nuts2_boundaries.geojson; cursor tracking skips re-download monthly.
- all.py: import + register gisco in EXTRACTORS (9 independent, 1 dep)
- pyproject.toml: add extract-gisco entry point
- workflows.toml: add census_usa, census_usa_income, eurostat_city_labels,
  ons_uk, gisco — all monthly, no dependencies
- Delete scripts/download_gisco_nuts.py (superseded)

Unblocks: stg_nuts2_boundaries, stg_regional_income, stg_income_usa,
and 4 downstream models (dim_locations, pseo_city_costs_de,
location_opportunity_profile, pseo_country_overview).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-01 15:49:39 +01:00

62 lines
1.6 KiB
TOML

# Workflow registry — the supervisor reads this file on every tick.
# To add a new extractor: add a [section] here and create the Python module.
#
# Fields:
# module — Python module path (must have a main() function)
# schedule — named preset ("hourly", "daily", "weekly", "monthly")
# or raw cron expression (e.g. "0 6-23 * * *")
# entry — optional: function name if not "main" (default: "main")
# depends_on — optional: list of workflow names that must run first
# proxy_mode — optional: "round-robin" (default) or "sticky"
[overpass]
module = "padelnomics_extract.overpass"
schedule = "monthly"
[overpass_tennis]
module = "padelnomics_extract.overpass_tennis"
schedule = "monthly"
[eurostat]
module = "padelnomics_extract.eurostat"
schedule = "monthly"
[geonames]
module = "padelnomics_extract.geonames"
schedule = "monthly"
[playtomic_tenants]
module = "padelnomics_extract.playtomic_tenants"
schedule = "daily"
[playtomic_availability]
module = "padelnomics_extract.playtomic_availability"
schedule = "daily"
depends_on = ["playtomic_tenants"]
[playtomic_recheck]
module = "padelnomics_extract.playtomic_availability"
entry = "main_recheck"
schedule = "0,30 6-23 * * *"
depends_on = ["playtomic_availability"]
[census_usa]
module = "padelnomics_extract.census_usa"
schedule = "monthly"
[census_usa_income]
module = "padelnomics_extract.census_usa_income"
schedule = "monthly"
[eurostat_city_labels]
module = "padelnomics_extract.eurostat_city_labels"
schedule = "monthly"
[ons_uk]
module = "padelnomics_extract.ons_uk"
schedule = "monthly"
[gisco]
module = "padelnomics_extract.gisco"
schedule = "monthly"