- Merge web ruff settings (select E/F/I/UP, line-length 100) into root config - Remove [tool.ruff] section from web/pyproject.toml - Remove "web" from root ruff exclude list - Simplify pre-commit hook to one command: ruff check . - Update CI to use: uv run ruff check . (from repo root) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
55 lines
1.4 KiB
Python
55 lines
1.4 KiB
Python
import gzip
|
|
import pathlib
|
|
import zipfile
|
|
from io import BytesIO
|
|
|
|
|
|
def normalize_zipped_csv(buffer: BytesIO)->BytesIO:
|
|
out = BytesIO()
|
|
with zipfile.ZipFile(buffer, mode='r').open("psd_alldata.csv", mode='r') as csv:
|
|
with gzip.open(out, "wb") as outfile:
|
|
outfile.write(csv.read())
|
|
out.seek(0)
|
|
return out
|
|
|
|
|
|
def convert_existing():
|
|
data = pathlib.Path(__file__).parent / "data"
|
|
for file in data.glob("*.zip"):
|
|
outfile = data / f"{file.stem}.csv.gzip"
|
|
if outfile.exists() and outfile.stat().st_size > 0:
|
|
continue
|
|
print(file)
|
|
gzip_contents = normalize_zipped_csv(file)
|
|
outfile.write_bytes(gzip_contents.read())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# """Test to make sure file contents are the same"""
|
|
# import pathlib
|
|
# import hashlib
|
|
#
|
|
# test_file = pathlib.Path(__file__).parent / "data/00d6e992d8c81_0.zip"
|
|
#
|
|
# with zipfile.ZipFile(test_file.open("rb"), mode='r').open("psd_alldata.csv", mode='r') as csv:
|
|
# raw_hash = hashlib.sha256(csv.read()).hexdigest()
|
|
#
|
|
# normalized = normalize_zipped_csv(test_file.open("rb"))
|
|
# print(raw_hash)
|
|
#
|
|
# with gzip.open(normalized, "rb") as normalized_file:
|
|
# normalized_hash = hashlib.sha256(normalized_file.read()).hexdigest()
|
|
# print(normalized_hash)
|
|
#
|
|
# assert raw_hash == normalized_hash
|
|
convert_existing()
|
|
|
|
|
|
|