docs(inventory): document population pipeline implementation findings
- Add section 9 to data-sources-inventory.md covering live API quirks: Eurostat SDMX city labels response shape, ONS CSV download path (observations API 404s), US Census ACS place endpoint, GeoNames cities15000 bulk format - Add population coverage summary table and DuckDB glob limitation note - fix(extract): census_usa + geonames write empty placeholder when credentials absent so SQLMesh staging models don't fail with "no files found" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -64,7 +64,12 @@ def extract(
|
||||
"""Fetch ACS 5-year place population. Skips if already run this month."""
|
||||
api_key = os.environ.get("CENSUS_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
logger.warning("CENSUS_API_KEY not set — skipping US Census extract")
|
||||
logger.warning("CENSUS_API_KEY not set — writing empty placeholder so SQLMesh models can run")
|
||||
year, month = year_month.split("/")
|
||||
dest_dir = landing_path(landing_dir, "census_usa", year, month)
|
||||
dest = dest_dir / "acs5_places.json.gz"
|
||||
if not dest.exists():
|
||||
write_gzip_atomic(dest, b'{"rows": [], "count": 0}')
|
||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||
|
||||
# Skip if we already have data for this month (annual data, monthly cursor)
|
||||
|
||||
@@ -105,7 +105,12 @@ def extract(
|
||||
"""Download GeoNames cities15000.zip. Skips if already run this month."""
|
||||
username = os.environ.get("GEONAMES_USERNAME", "").strip()
|
||||
if not username:
|
||||
logger.warning("GEONAMES_USERNAME not set — skipping GeoNames extract")
|
||||
logger.warning("GEONAMES_USERNAME not set — writing empty placeholder so SQLMesh models can run")
|
||||
year, month = year_month.split("/")
|
||||
dest_dir = landing_path(landing_dir, "geonames", year, month)
|
||||
dest = dest_dir / "cities_global.json.gz"
|
||||
if not dest.exists():
|
||||
write_gzip_atomic(dest, b'{"rows": [], "count": 0}')
|
||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||
|
||||
last_cursor = get_last_cursor(conn, EXTRACTOR_NAME)
|
||||
|
||||
Reference in New Issue
Block a user