fix(extract,transform): fix COT/prices column name mismatches + OWM rate limit skip
- fct_cot_positioning: quote Swap__Positions_Short_All and Swap__Positions_Spread_All (CSV uses double underscore; DuckDB preserves header names exactly) - fct_cot_positioning: quote Report_Date_as_YYYY-MM-DD (dashes preserved in header) - fct_coffee_prices: quote "Adj Close" (space in CSV header) - openmeteo/execute.py: skip API call in backfill when all daily files already exist (_count_existing_files pre-check prevents 429 rate limit on re-runs) - dev_run.sh: open browser as admin@beanflows.coffee instead of pro@ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -75,6 +75,19 @@ def _write_day_file(location_id: str, date_str: str, record: dict) -> int:
|
||||
return bytes_written
|
||||
|
||||
|
||||
def _count_existing_files(location_id: str, start: date, end: date) -> int:
|
||||
"""Count per-day files already on disk for a location between start and end (inclusive)."""
|
||||
count = 0
|
||||
d = start
|
||||
while d <= end:
|
||||
year = d.strftime("%Y")
|
||||
dest_dir = landing_path(LANDING_DIR, LANDING_SUBDIR, location_id, year)
|
||||
if (dest_dir / f"{d.isoformat()}.json.gz").exists():
|
||||
count += 1
|
||||
d += timedelta(days=1)
|
||||
return count
|
||||
|
||||
|
||||
def _split_and_write(location_id: str, response: dict) -> tuple[int, int, int]:
|
||||
"""Split an Open-Meteo array response into per-day JSON.gz files.
|
||||
|
||||
@@ -171,12 +184,22 @@ def extract_weather_backfill() -> None:
|
||||
bytes_written_total = 0
|
||||
|
||||
try:
|
||||
start = BACKFILL_START
|
||||
end = date.fromisoformat(yesterday)
|
||||
expected_days = (end - start).days + 1
|
||||
|
||||
with niquests.Session() as session:
|
||||
for loc in LOCATIONS:
|
||||
logger.info(
|
||||
f"Backfill {loc['id']} ({loc['country']}) "
|
||||
f"{start_date} → {yesterday}"
|
||||
)
|
||||
existing = _count_existing_files(loc["id"], start, end)
|
||||
if existing == expected_days:
|
||||
logger.info(f" {loc['id']}: 0 new, {existing} already existed (skipped API call)")
|
||||
files_skipped += existing
|
||||
continue
|
||||
|
||||
response = fetch_archive(
|
||||
session, loc["lat"], loc["lon"],
|
||||
start_date=start_date,
|
||||
|
||||
Reference in New Issue
Block a user