- Replace brittle ICE_STOCKS_URL env var with API-based URL discovery via the private ICE Report Center JSON API (no auth required) - Add rolling CSV → XLS fallback in extract_ice_stocks() using find_latest_report() from ice_api.py - Add ice_api.py: fetch_report_listings(), find_latest_report() with pagination up to MAX_API_PAGES - Add xls_parse.py: detect_file_format() (magic bytes), xls_to_rows() using xlrd for OLE2/BIFF XLS files - Add extract_ice_aging(): monthly certified stock aging report by age bucket × port → ice_aging/ landing dir - Add extract_ice_historical(): 30-year EOM by-port stocks from static ICE URL → ice_stocks_by_port/ landing dir - Add xlrd>=2.0.1 (parse XLS), xlwt>=1.3.0 (dev, test fixtures) - Add SQLMesh raw + foundation models for both new datasets - Add ice_aging_glob(), ice_stocks_by_port_glob() macros - Add extract_ice_aging + extract_ice_historical pipeline entries - Add 12 unit tests (format detection, XLS roundtrip, API mock, CSV output) Seed files (data/landing/ice_aging/seed/ and ice_stocks_by_port/seed/) must be created locally — data/ is gitignored. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
46 lines
1.7 KiB
Python
46 lines
1.7 KiB
Python
import os
|
|
|
|
from sqlmesh import macro
|
|
|
|
|
|
@macro()
|
|
def psd_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all PSD CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/psd/**/*.csv.gzip'"
|
|
|
|
|
|
@macro()
|
|
def cot_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all COT CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/cot/**/*.csv.gzip'"
|
|
|
|
|
|
@macro()
|
|
def prices_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all coffee price CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/prices/coffee_kc/**/*.csv.gzip'"
|
|
|
|
|
|
@macro()
|
|
def ice_stocks_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all ICE warehouse stock CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/ice_stocks/**/*.csv.gzip'"
|
|
|
|
|
|
@macro()
|
|
def ice_aging_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all ICE aging report CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/ice_aging/**/*.csv.gzip'"
|
|
|
|
|
|
@macro()
|
|
def ice_stocks_by_port_glob(evaluator) -> str:
|
|
"""Return a quoted glob path for all ICE historical by-port CSV gzip files under LANDING_DIR."""
|
|
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
|
return f"'{landing_dir}/ice_stocks_by_port/**/*.csv.gzip'"
|