diff --git a/extract/ice_stocks/src/ice_stocks/execute.py b/extract/ice_stocks/src/ice_stocks/execute.py index 70e3fee..3eddc97 100644 --- a/extract/ice_stocks/src/ice_stocks/execute.py +++ b/extract/ice_stocks/src/ice_stocks/execute.py @@ -185,16 +185,33 @@ def _build_canonical_csv_from_xls(xls_bytes: bytes) -> bytes: """ rows = xls_to_rows(xls_bytes) - # Extract report date from row 2, cell 0 (e.g. "As of: 1/30/2026") + # Extract report date from row 2, cell 0 + # Formats seen: "As of: 1/30/2026", "As of: Feb 20, 2026 1:35:39PM" header_cell = str(rows[2][0]) if len(rows) > 2 else "" report_date = "" if "as of" in header_cell.lower(): - date_part = header_cell.lower().replace("as of:", "").replace("as of", "").strip() - try: - dt = datetime.strptime(date_part.split()[0], "%m/%d/%Y") - report_date = dt.strftime("%Y-%m-%d") - except ValueError: - pass + # Strip prefix, preserve original case for month name parsing + after = header_cell.strip() + for prefix in ("As of:", "As Of:", "as of:", "As of", "As Of", "as of"): + if after.lower().startswith(prefix.lower()): + after = after[len(prefix):].strip() + break + tokens = after.split() + # Try "M/D/YYYY" (single token) then "Mon DD, YYYY" (three tokens) + candidates = [tokens[0]] if tokens else [] + if len(tokens) >= 3: + candidates.append(" ".join(tokens[:3])) + for candidate in candidates: + candidate = candidate.rstrip(",") + for fmt in ("%m/%d/%Y", "%b %d, %Y", "%b %d %Y", "%B %d, %Y", "%B %d %Y"): + try: + dt = datetime.strptime(candidate, fmt) + report_date = dt.strftime("%Y-%m-%d") + break + except ValueError: + pass + if report_date: + break if not report_date: logger.warning(f"Could not parse report date from XLS header: {header_cell!r}")