Fix extract and SQLMesh pipeline to build DuckDB lakehouse
extract: wrap response.content in BytesIO before passing to
normalize_zipped_csv, and call .read() on the returned BytesIO before
write_bytes (two bugs: wrong type in, wrong type out)
sqlmesh: {{ var() }} inside SQL string literals is not substituted by
SQLMesh's Jinja (SQL parser treats them as opaque strings). Replace with
a @psd_glob() macro that evaluates LANDING_DIR at render time and returns
a quoted glob path string.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,6 +4,7 @@ import os
|
||||
import pathlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
|
||||
import niquests
|
||||
|
||||
@@ -50,8 +51,8 @@ def extract_psd_file(url: str, year: int, month: int, http_session: niquests.Ses
|
||||
response = http_session.get(url, timeout=HTTP_TIMEOUT_SECONDS)
|
||||
logger.info(f"Storing file to {local_file}")
|
||||
extract_to_path.mkdir(parents=True, exist_ok=True)
|
||||
normalized_content = normalize_zipped_csv(response.content)
|
||||
local_file.write_bytes(normalized_content)
|
||||
normalized_content = normalize_zipped_csv(BytesIO(response.content))
|
||||
local_file.write_bytes(normalized_content.read())
|
||||
assert local_file.exists(), f"File was not written: {local_file}"
|
||||
logger.info("Download complete")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user