feat: extraction framework overhaul — extract_core shared package + SQLite state tracking
- Add extract/extract_core/ workspace package with three modules:
- state.py: SQLite run tracking (open_state_db, start_run, end_run, get_last_cursor)
- http.py: niquests session factory + etag normalization helpers
- files.py: landing_path, content_hash, write_bytes_atomic (atomic gzip writes)
- State lives at {LANDING_DIR}/.state.sqlite — no extra env var needed
- SQLite chosen over DuckDB: state tracking is OLTP (row inserts/updates), not analytical
- Refactor all 4 extractors (psdonline, cftc_cot, coffee_prices, ice_stocks):
- Replace inline boilerplate with extract_core helpers
- Add start_run/end_run tracking to every extraction entry point
- extract_cot_year returns int (bytes_written) instead of bool
- Update tests: assert result == 0 (not `is False`) for the return type change
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
32
uv.lock
generated
32
uv.lock
generated
@@ -11,6 +11,7 @@ members = [
|
||||
"beanflows",
|
||||
"cftc-cot",
|
||||
"coffee-prices",
|
||||
"extract-core",
|
||||
"ice-stocks",
|
||||
"materia",
|
||||
"psdonline",
|
||||
@@ -365,11 +366,15 @@ name = "cftc-cot"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/cftc_cot" }
|
||||
dependencies = [
|
||||
{ name = "extract-core" },
|
||||
{ name = "niquests" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "niquests", specifier = ">=3.14.1" }]
|
||||
requires-dist = [
|
||||
{ name = "extract-core", editable = "extract/extract_core" },
|
||||
{ name = "niquests", specifier = ">=3.14.1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "charset-normalizer"
|
||||
@@ -438,11 +443,15 @@ name = "coffee-prices"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/coffee_prices" }
|
||||
dependencies = [
|
||||
{ name = "extract-core" },
|
||||
{ name = "yfinance" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "yfinance", specifier = ">=0.2.55" }]
|
||||
requires-dist = [
|
||||
{ name = "extract-core", editable = "extract/extract_core" },
|
||||
{ name = "yfinance", specifier = ">=0.2.55" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
@@ -740,6 +749,17 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "extract-core"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/extract_core" }
|
||||
dependencies = [
|
||||
{ name = "niquests" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "niquests", specifier = ">=3.14.1" }]
|
||||
|
||||
[[package]]
|
||||
name = "fakeredis"
|
||||
version = "2.34.0"
|
||||
@@ -1059,12 +1079,14 @@ name = "ice-stocks"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/ice_stocks" }
|
||||
dependencies = [
|
||||
{ name = "extract-core" },
|
||||
{ name = "niquests" },
|
||||
{ name = "xlrd" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "extract-core", editable = "extract/extract_core" },
|
||||
{ name = "niquests", specifier = ">=3.14.1" },
|
||||
{ name = "xlrd", specifier = ">=2.0.1" },
|
||||
]
|
||||
@@ -2067,11 +2089,15 @@ name = "psdonline"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/psdonline" }
|
||||
dependencies = [
|
||||
{ name = "extract-core" },
|
||||
{ name = "niquests" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "niquests", specifier = ">=3.14.1" }]
|
||||
requires-dist = [
|
||||
{ name = "extract-core", editable = "extract/extract_core" },
|
||||
{ name = "niquests", specifier = ">=3.14.1" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psutil"
|
||||
|
||||
Reference in New Issue
Block a user