fix ice_stocks XLS date parsing: handle 'Feb 20, 2026' format

ICE changed the daily stocks XLS header from 'As of: 1/30/2026' to
'As of: Feb 20, 2026  1:35:39PM'. Expand _build_canonical_csv_from_xls
to try multiple strptime formats (%m/%d/%Y, %b %d, %Y, etc.) on both
single-token and three-token date candidates.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-21 22:18:17 +01:00
parent 562e2d1847
commit 493ce64fde

View File

@@ -185,16 +185,33 @@ def _build_canonical_csv_from_xls(xls_bytes: bytes) -> bytes:
"""
rows = xls_to_rows(xls_bytes)
# Extract report date from row 2, cell 0 (e.g. "As of: 1/30/2026")
# Extract report date from row 2, cell 0
# Formats seen: "As of: 1/30/2026", "As of: Feb 20, 2026 1:35:39PM"
header_cell = str(rows[2][0]) if len(rows) > 2 else ""
report_date = ""
if "as of" in header_cell.lower():
date_part = header_cell.lower().replace("as of:", "").replace("as of", "").strip()
try:
dt = datetime.strptime(date_part.split()[0], "%m/%d/%Y")
report_date = dt.strftime("%Y-%m-%d")
except ValueError:
pass
# Strip prefix, preserve original case for month name parsing
after = header_cell.strip()
for prefix in ("As of:", "As Of:", "as of:", "As of", "As Of", "as of"):
if after.lower().startswith(prefix.lower()):
after = after[len(prefix):].strip()
break
tokens = after.split()
# Try "M/D/YYYY" (single token) then "Mon DD, YYYY" (three tokens)
candidates = [tokens[0]] if tokens else []
if len(tokens) >= 3:
candidates.append(" ".join(tokens[:3]))
for candidate in candidates:
candidate = candidate.rstrip(",")
for fmt in ("%m/%d/%Y", "%b %d, %Y", "%b %d %Y", "%B %d, %Y", "%B %d %Y"):
try:
dt = datetime.strptime(candidate, fmt)
report_date = dt.strftime("%Y-%m-%d")
break
except ValueError:
pass
if report_date:
break
if not report_date:
logger.warning(f"Could not parse report date from XLS header: {header_cell!r}")