Fix extract and SQLMesh pipeline to build DuckDB lakehouse

extract: wrap response.content in BytesIO before passing to normalize_zipped_csv, and call .read() on the returned BytesIO before write_bytes (two bugs: wrong type in, wrong type out) sqlmesh: {{ var() }} inside SQL string literals is not substituted by SQLMesh's Jinja (SQL parser treats them as opaque strings). Replace with a @psd_glob() macro that evaluates LANDING_DIR at render time and returns a quoted glob path string. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 17:02:59 +01:00
parent d05e522c88
commit 423fb8c619
3 changed files with 14 additions and 3 deletions
--- a/extract/psdonline/src/psdonline/execute.py
+++ b/extract/psdonline/src/psdonline/execute.py
@@ -4,6 +4,7 @@ import os
 import pathlib
 import sys
 from datetime import datetime
+from io import BytesIO

 import niquests

@@ -50,8 +51,8 @@ def extract_psd_file(url: str, year: int, month: int, http_session: niquests.Ses
    response = http_session.get(url, timeout=HTTP_TIMEOUT_SECONDS)
    logger.info(f"Storing file to {local_file}")
    extract_to_path.mkdir(parents=True, exist_ok=True)
-    normalized_content = normalize_zipped_csv(response.content)
-    local_file.write_bytes(normalized_content)
+    normalized_content = normalize_zipped_csv(BytesIO(response.content))
+    local_file.write_bytes(normalized_content.read())
    assert local_file.exists(), f"File was not written: {local_file}"
    logger.info("Download complete")