fix(extract,transform): fix COT/prices column name mismatches + OWM rate limit skip

- fct_cot_positioning: quote Swap__Positions_Short_All and Swap__Positions_Spread_All
  (CSV uses double underscore; DuckDB preserves header names exactly)
- fct_cot_positioning: quote Report_Date_as_YYYY-MM-DD (dashes preserved in header)
- fct_coffee_prices: quote "Adj Close" (space in CSV header)
- openmeteo/execute.py: skip API call in backfill when all daily files already exist
  (_count_existing_files pre-check prevents 429 rate limit on re-runs)
- dev_run.sh: open browser as admin@beanflows.coffee instead of pro@

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-26 09:46:34 +01:00
parent 611a4af966
commit 4fae358f97
5 changed files with 47 additions and 24 deletions

View File

@@ -75,6 +75,19 @@ def _write_day_file(location_id: str, date_str: str, record: dict) -> int:
return bytes_written
def _count_existing_files(location_id: str, start: date, end: date) -> int:
"""Count per-day files already on disk for a location between start and end (inclusive)."""
count = 0
d = start
while d <= end:
year = d.strftime("%Y")
dest_dir = landing_path(LANDING_DIR, LANDING_SUBDIR, location_id, year)
if (dest_dir / f"{d.isoformat()}.json.gz").exists():
count += 1
d += timedelta(days=1)
return count
def _split_and_write(location_id: str, response: dict) -> tuple[int, int, int]:
"""Split an Open-Meteo array response into per-day JSON.gz files.
@@ -171,12 +184,22 @@ def extract_weather_backfill() -> None:
bytes_written_total = 0
try:
start = BACKFILL_START
end = date.fromisoformat(yesterday)
expected_days = (end - start).days + 1
with niquests.Session() as session:
for loc in LOCATIONS:
logger.info(
f"Backfill {loc['id']} ({loc['country']}) "
f"{start_date}{yesterday}"
)
existing = _count_existing_files(loc["id"], start, end)
if existing == expected_days:
logger.info(f" {loc['id']}: 0 new, {existing} already existed (skipped API call)")
files_skipped += existing
continue
response = fetch_archive(
session, loc["lat"], loc["lon"],
start_date=start_date,