Merge worktree-cot-integration: Phase 1 + scout MCP server
- Phase 1A-C: KC=F price extraction, SQLMesh models, dashboard charts, API endpoints - ICE warehouse stocks: extraction package, SQLMesh models, dashboard + API - Methodology page (/methodology) with all data sources documented - Supervisor pipeline automation with webhook alerting - Scout MCP server (tools/scout/) for browser recon via Pydoll - msgspec added as workspace dependency for typed boundary structs - vision.md updated to reflect Phase 1 completion (Feb 2026) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
9
.mcp.json
Normal file
9
.mcp.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"scout": {
|
||||
"type": "stdio",
|
||||
"command": "uv",
|
||||
"args": ["run", "--directory", "tools/scout", "scout-server"]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -201,6 +201,13 @@ active_users = [u for u in users if u.is_active()]
|
||||
- Small, focused libraries
|
||||
- Direct solutions
|
||||
- Understanding what code does
|
||||
|
||||
**Approved dependencies (earn their place):**
|
||||
- `msgspec` — struct types and validation at system boundaries (external APIs, user input,
|
||||
inter-process data). Use `msgspec.Struct` instead of dataclasses when you need: fast
|
||||
encode/decode, built-in validation, or typed containers for boundary data.
|
||||
**Rule:** use Structs at boundaries (API responses, HAR entries, MCP tool I/O) —
|
||||
keep internal plumbing as plain dicts/tuples.
|
||||
</question_dependencies>
|
||||
|
||||
</architecture_principles>
|
||||
|
||||
18
extract/coffee_prices/pyproject.toml
Normal file
18
extract/coffee_prices/pyproject.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[project]
|
||||
name = "coffee_prices"
|
||||
version = "0.1.0"
|
||||
description = "KC=F Coffee C futures price extractor"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"yfinance>=0.2.55",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
extract_prices = "coffee_prices.execute:extract_coffee_prices"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/coffee_prices"]
|
||||
0
extract/coffee_prices/src/coffee_prices/__init__.py
Normal file
0
extract/coffee_prices/src/coffee_prices/__init__.py
Normal file
92
extract/coffee_prices/src/coffee_prices/execute.py
Normal file
92
extract/coffee_prices/src/coffee_prices/execute.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Coffee C (KC=F) futures price extraction.
|
||||
|
||||
Downloads daily OHLCV data from Yahoo Finance via yfinance and stores as
|
||||
gzip CSV in the landing directory. Uses SHA256 of CSV bytes as the
|
||||
idempotency key — skips if a file with the same hash already exists.
|
||||
|
||||
Landing path: LANDING_DIR/prices/coffee_kc/{hash8}.csv.gzip
|
||||
"""
|
||||
|
||||
import gzip
|
||||
import hashlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
|
||||
import yfinance as yf
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
)
|
||||
logger = logging.getLogger("Coffee Prices Extractor")
|
||||
|
||||
LANDING_DIR = pathlib.Path(os.getenv("LANDING_DIR", "data/landing"))
|
||||
TICKER = "KC=F"
|
||||
DEST_SUBDIR = "prices/coffee_kc"
|
||||
|
||||
# yfinance raises on network issues; give it enough time for the full history
|
||||
DOWNLOAD_TIMEOUT_SECONDS = 120
|
||||
|
||||
|
||||
def extract_coffee_prices() -> None:
|
||||
"""Download KC=F daily OHLCV history and store as gzip CSV.
|
||||
|
||||
Idempotent: computes SHA256 of CSV bytes, skips if already on disk.
|
||||
On first run downloads full history (period='max'). On subsequent runs
|
||||
the hash matches if no new trading days have closed since last run.
|
||||
"""
|
||||
logger.info(f"Downloading {TICKER} daily OHLCV from Yahoo Finance...")
|
||||
|
||||
ticker = yf.Ticker(TICKER)
|
||||
df = ticker.history(period="max", interval="1d", auto_adjust=False, timeout=DOWNLOAD_TIMEOUT_SECONDS)
|
||||
|
||||
assert df is not None and len(df) > 0, f"yfinance returned empty DataFrame for {TICKER}"
|
||||
|
||||
# Reset index so Date becomes a plain column
|
||||
df = df.reset_index()
|
||||
|
||||
# Keep standard OHLCV columns only; yfinance may return extra columns
|
||||
keep_cols = [c for c in ["Date", "Open", "High", "Low", "Close", "Adj Close", "Volume"] if c in df.columns]
|
||||
df = df[keep_cols]
|
||||
|
||||
# Normalize Date to ISO string for CSV stability across timezones
|
||||
df["Date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
||||
|
||||
# Serialize to CSV bytes
|
||||
csv_buf = io.StringIO()
|
||||
df.to_csv(csv_buf, index=False)
|
||||
csv_bytes = csv_buf.getvalue().encode("utf-8")
|
||||
|
||||
assert len(csv_bytes) > 0, "CSV serialization produced empty output"
|
||||
|
||||
# Hash-based idempotency key (first 8 hex chars of SHA256)
|
||||
sha256 = hashlib.sha256(csv_bytes).hexdigest()
|
||||
etag = sha256[:8]
|
||||
|
||||
dest_dir = LANDING_DIR / DEST_SUBDIR
|
||||
local_file = dest_dir / f"{etag}.csv.gzip"
|
||||
|
||||
if local_file.exists():
|
||||
logger.info(f"File {local_file.name} already exists — no new data, skipping")
|
||||
return
|
||||
|
||||
# Compress and write
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
compressed = gzip.compress(csv_bytes)
|
||||
local_file.write_bytes(compressed)
|
||||
|
||||
assert local_file.exists(), f"File was not written: {local_file}"
|
||||
assert local_file.stat().st_size > 0, f"Written file is empty: {local_file}"
|
||||
|
||||
logger.info(
|
||||
f"Stored {local_file} ({local_file.stat().st_size:,} bytes, {len(df):,} rows)"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_coffee_prices()
|
||||
18
extract/ice_stocks/pyproject.toml
Normal file
18
extract/ice_stocks/pyproject.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[project]
|
||||
name = "ice_stocks"
|
||||
version = "0.1.0"
|
||||
description = "ICE certified warehouse stocks extractor"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"niquests>=3.14.1",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
extract_ice = "ice_stocks.execute:extract_ice_stocks"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/ice_stocks"]
|
||||
0
extract/ice_stocks/src/ice_stocks/__init__.py
Normal file
0
extract/ice_stocks/src/ice_stocks/__init__.py
Normal file
173
extract/ice_stocks/src/ice_stocks/execute.py
Normal file
173
extract/ice_stocks/src/ice_stocks/execute.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""ICE certified Coffee C warehouse stock extraction.
|
||||
|
||||
Downloads daily certified stock reports from the ICE Report Center and stores
|
||||
as gzip CSV in the landing directory. Uses SHA256 of content as the
|
||||
idempotency key — skips if a file with the same hash already exists.
|
||||
|
||||
Landing path: LANDING_DIR/ice_stocks/{year}/{date}_{hash8}.csv.gzip
|
||||
|
||||
CSV format produced (matching raw.ice_warehouse_stocks columns):
|
||||
report_date,total_certified_bags,pending_grading_bags
|
||||
|
||||
ICE Report Center URL discovery:
|
||||
Visit https://www.theice.com/report-center and locate the
|
||||
"Coffee C Warehouse Stocks" report. The download URL has the pattern:
|
||||
https://www.theice.com/report-center/commodities/COFFEE/reports/...
|
||||
Set ICE_STOCKS_URL environment variable to the discovered URL.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import gzip
|
||||
import hashlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import niquests
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
handlers=[logging.StreamHandler(sys.stdout)],
|
||||
)
|
||||
logger = logging.getLogger("ICE Stocks Extractor")
|
||||
|
||||
LANDING_DIR = pathlib.Path(os.getenv("LANDING_DIR", "data/landing"))
|
||||
DEST_SUBDIR = "ice_stocks"
|
||||
|
||||
# ICE Report Center URL for Coffee C certified warehouse stocks.
|
||||
# Discover by visiting https://www.theice.com/report-center and locating
|
||||
# the Coffee C warehouse stocks CSV export. Override via environment variable.
|
||||
ICE_STOCKS_URL = os.getenv(
|
||||
"ICE_STOCKS_URL",
|
||||
"https://www.theice.com/publicdocs/futures_us/exchange_notices/coffee_certifiedstocks.csv",
|
||||
)
|
||||
|
||||
HTTP_TIMEOUT_SECONDS = 60
|
||||
|
||||
# Expected column names from ICE CSV (may vary — adapt to actual column names)
|
||||
# The ICE report typically has: Date, Certified Stocks (bags), Pending Grading (bags)
|
||||
# We normalize to our canonical names.
|
||||
COLUMN_MAPPINGS = {
|
||||
# Possible ICE column name → our canonical name
|
||||
"date": "report_date",
|
||||
"report date": "report_date",
|
||||
"Date": "report_date",
|
||||
"certified stocks": "total_certified_bags",
|
||||
"Certified Stocks": "total_certified_bags",
|
||||
"certified stocks (bags)": "total_certified_bags",
|
||||
"total certified": "total_certified_bags",
|
||||
"pending grading": "pending_grading_bags",
|
||||
"Pending Grading": "pending_grading_bags",
|
||||
"pending grading (bags)": "pending_grading_bags",
|
||||
}
|
||||
|
||||
|
||||
def _normalize_row(row: dict) -> dict | None:
|
||||
"""Map raw ICE CSV columns to canonical schema. Returns None if date missing."""
|
||||
normalized = {}
|
||||
for raw_key, value in row.items():
|
||||
canonical = COLUMN_MAPPINGS.get(raw_key.strip()) or COLUMN_MAPPINGS.get(raw_key.strip().lower())
|
||||
if canonical:
|
||||
# Strip commas from numeric strings (ICE uses "1,234,567" format)
|
||||
normalized[canonical] = value.strip().replace(",", "") if value else ""
|
||||
|
||||
if "report_date" not in normalized or not normalized["report_date"]:
|
||||
return None
|
||||
|
||||
# Fill missing optional columns with empty string
|
||||
normalized.setdefault("total_certified_bags", "")
|
||||
normalized.setdefault("pending_grading_bags", "")
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
def _build_canonical_csv(raw_content: bytes) -> bytes:
|
||||
"""Parse ICE CSV and emit canonical CSV with our column schema."""
|
||||
text = raw_content.decode("utf-8", errors="replace")
|
||||
reader = csv.DictReader(io.StringIO(text))
|
||||
|
||||
rows = []
|
||||
for row in reader:
|
||||
normalized = _normalize_row(row)
|
||||
if normalized:
|
||||
rows.append(normalized)
|
||||
|
||||
if not rows:
|
||||
return b""
|
||||
|
||||
out = io.StringIO()
|
||||
writer = csv.DictWriter(out, fieldnames=["report_date", "total_certified_bags", "pending_grading_bags"])
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
return out.getvalue().encode("utf-8")
|
||||
|
||||
|
||||
def extract_ice_stocks() -> None:
|
||||
"""Download ICE certified Coffee C warehouse stocks and store as gzip CSV.
|
||||
|
||||
Idempotent: computes SHA256 of canonical CSV bytes, skips if already on disk.
|
||||
The ICE report is a rolling file (same URL, updated daily) — we detect
|
||||
changes via content hash.
|
||||
"""
|
||||
logger.info(f"Downloading ICE warehouse stocks from: {ICE_STOCKS_URL}")
|
||||
|
||||
with niquests.Session() as session:
|
||||
try:
|
||||
response = session.get(ICE_STOCKS_URL, timeout=HTTP_TIMEOUT_SECONDS)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to connect to ICE Report Center: {e}\n"
|
||||
"If the URL has changed, set ICE_STOCKS_URL environment variable.\n"
|
||||
"Visit https://www.theice.com/report-center to find the current URL."
|
||||
)
|
||||
return
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.warning(
|
||||
"ICE stocks URL returned 404. The report URL may have changed.\n"
|
||||
"Visit https://www.theice.com/report-center to find the current URL,\n"
|
||||
"then set ICE_STOCKS_URL environment variable."
|
||||
)
|
||||
return
|
||||
|
||||
assert response.status_code == 200, (
|
||||
f"Unexpected status {response.status_code} from {ICE_STOCKS_URL}"
|
||||
)
|
||||
assert len(response.content) > 0, "Downloaded empty file from ICE"
|
||||
|
||||
canonical_csv = _build_canonical_csv(response.content)
|
||||
if not canonical_csv:
|
||||
logger.warning("ICE CSV parsed to 0 rows — column mapping may need updating")
|
||||
return
|
||||
|
||||
# Hash-based idempotency
|
||||
sha256 = hashlib.sha256(canonical_csv).hexdigest()
|
||||
etag = sha256[:8]
|
||||
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
year = datetime.now().strftime("%Y")
|
||||
|
||||
dest_dir = LANDING_DIR / DEST_SUBDIR / year
|
||||
local_file = dest_dir / f"{today}_{etag}.csv.gzip"
|
||||
|
||||
if local_file.exists():
|
||||
logger.info(f"File {local_file.name} already exists — content unchanged, skipping")
|
||||
return
|
||||
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
compressed = gzip.compress(canonical_csv)
|
||||
local_file.write_bytes(compressed)
|
||||
|
||||
assert local_file.exists(), f"File was not written: {local_file}"
|
||||
assert local_file.stat().st_size > 0, f"Written file is empty: {local_file}"
|
||||
|
||||
logger.info(f"Stored {local_file} ({local_file.stat().st_size:,} bytes)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_ice_stocks()
|
||||
@@ -2,6 +2,11 @@
|
||||
# Materia Supervisor - Continuous pipeline orchestration
|
||||
# Inspired by TigerBeetle's CFO supervisor: simple, resilient, easy to understand
|
||||
# https://github.com/tigerbeetle/tigerbeetle/blob/main/src/scripts/cfo_supervisor.sh
|
||||
#
|
||||
# Environment variables (set in systemd EnvironmentFile):
|
||||
# LANDING_DIR — local path for extracted landing data
|
||||
# DUCKDB_PATH — path to DuckDB lakehouse file
|
||||
# ALERT_WEBHOOK_URL — optional ntfy.sh / Slack / Telegram webhook for failure alerts
|
||||
|
||||
set -eu
|
||||
|
||||
@@ -24,14 +29,33 @@ do
|
||||
git switch --discard-changes --detach origin/master
|
||||
uv sync
|
||||
|
||||
# Run pipelines
|
||||
# Extract all data sources
|
||||
LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \
|
||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \
|
||||
uv run materia pipeline run extract
|
||||
|
||||
LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \
|
||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \
|
||||
uv run materia pipeline run extract_cot
|
||||
|
||||
LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \
|
||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \
|
||||
uv run materia pipeline run extract_prices
|
||||
|
||||
LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \
|
||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \
|
||||
uv run materia pipeline run extract_ice
|
||||
|
||||
# Transform all data sources
|
||||
LANDING_DIR="${LANDING_DIR:-/data/materia/landing}" \
|
||||
DUCKDB_PATH="${DUCKDB_PATH:-/data/materia/lakehouse.duckdb}" \
|
||||
uv run materia pipeline run transform
|
||||
|
||||
) || sleep 600 # Sleep 10 min on failure to avoid busy-loop retries
|
||||
) || {
|
||||
# Notify on failure if webhook is configured, then sleep to avoid busy-loop
|
||||
if [ -n "${ALERT_WEBHOOK_URL:-}" ]; then
|
||||
curl -s -d "Materia pipeline failed at $(date)" "$ALERT_WEBHOOK_URL" 2>/dev/null || true
|
||||
fi
|
||||
sleep 600 # Sleep 10 min on failure
|
||||
}
|
||||
done
|
||||
|
||||
@@ -15,6 +15,7 @@ dependencies = [
|
||||
"niquests>=3.15.2",
|
||||
"hcloud>=2.8.0",
|
||||
"prefect>=3.6.15",
|
||||
"msgspec>=0.19",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
@@ -41,7 +42,8 @@ dev = [
|
||||
psdonline = {workspace = true }
|
||||
sqlmesh_materia = {workspace = true }
|
||||
cftc_cot = {workspace = true }
|
||||
|
||||
coffee_prices = {workspace = true }
|
||||
ice_stocks = {workspace = true }
|
||||
[tool.uv.workspace]
|
||||
members = [
|
||||
"extract/*",
|
||||
|
||||
@@ -20,6 +20,14 @@ PIPELINES = {
|
||||
"command": ["uv", "run", "--package", "cftc_cot", "extract_cot"],
|
||||
"timeout_seconds": 1800,
|
||||
},
|
||||
"extract_prices": {
|
||||
"command": ["uv", "run", "--package", "coffee_prices", "extract_prices"],
|
||||
"timeout_seconds": 300,
|
||||
},
|
||||
"extract_ice": {
|
||||
"command": ["uv", "run", "--package", "ice_stocks", "extract_ice"],
|
||||
"timeout_seconds": 600,
|
||||
},
|
||||
"transform": {
|
||||
"command": ["uv", "run", "--package", "sqlmesh_materia", "sqlmesh", "-p", "transform/sqlmesh_materia", "plan", "prod", "--no-prompts", "--auto-apply"],
|
||||
"timeout_seconds": 3600,
|
||||
|
||||
20
tools/scout/pyproject.toml
Normal file
20
tools/scout/pyproject.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[project]
|
||||
name = "scout"
|
||||
version = "0.1.0"
|
||||
description = "Browser recon MCP server — discover API endpoints via HAR recording"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"pydoll-python>=1.5",
|
||||
"mcp[cli]>=1.0",
|
||||
"msgspec>=0.19",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
scout-server = "scout.server:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["src/scout"]
|
||||
0
tools/scout/src/scout/__init__.py
Normal file
0
tools/scout/src/scout/__init__.py
Normal file
190
tools/scout/src/scout/analyze.py
Normal file
190
tools/scout/src/scout/analyze.py
Normal file
@@ -0,0 +1,190 @@
|
||||
"""HAR file analysis — filter static assets, surface API endpoints and downloads.
|
||||
|
||||
Parses HAR 1.2 JSON files produced by Pydoll's network recorder. Filters out
|
||||
static assets (JS, CSS, images, fonts) and returns a structured summary of:
|
||||
- API calls (JSON responses, any POST request)
|
||||
- Data downloads (CSV, PDF, Excel)
|
||||
|
||||
Typical call:
|
||||
summary = analyze_har_file("data/scout/recording.har")
|
||||
print(format_summary(summary))
|
||||
"""
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
import msgspec
|
||||
|
||||
STATIC_EXTENSIONS = frozenset(
|
||||
{".js", ".css", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico",
|
||||
".woff", ".woff2", ".ttf", ".eot", ".map", ".webp", ".avif", ".apng"}
|
||||
)
|
||||
|
||||
STATIC_CONTENT_TYPES = frozenset(
|
||||
{"text/html", "text/javascript", "application/javascript",
|
||||
"text/css", "image/", "font/", "audio/", "video/"}
|
||||
)
|
||||
|
||||
DOWNLOAD_CONTENT_TYPES = (
|
||||
"text/csv",
|
||||
"application/pdf",
|
||||
"application/vnd.ms-excel",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/octet-stream",
|
||||
"text/plain",
|
||||
)
|
||||
|
||||
POST_BODY_MAX_CHARS = 500
|
||||
|
||||
|
||||
class HarEntry(msgspec.Struct):
|
||||
"""A single interesting HTTP request/response from a HAR file."""
|
||||
|
||||
method: str
|
||||
url: str
|
||||
status: int
|
||||
content_type: str
|
||||
size_bytes: int
|
||||
post_body: str = ""
|
||||
|
||||
|
||||
class HarSummary(msgspec.Struct):
|
||||
"""Analysis result: static assets filtered out, interesting entries categorized."""
|
||||
|
||||
api_calls: list[HarEntry]
|
||||
downloads: list[HarEntry]
|
||||
other_interesting: list[HarEntry]
|
||||
total_entries: int
|
||||
filtered_static: int
|
||||
|
||||
|
||||
def _is_static(url: str, content_type: str) -> bool:
|
||||
"""Return True if this entry looks like a static asset."""
|
||||
path = url.split("?")[0].lower()
|
||||
ext = pathlib.PurePosixPath(path).suffix
|
||||
if ext in STATIC_EXTENSIONS:
|
||||
return True
|
||||
ct = content_type.lower().split(";")[0].strip()
|
||||
return any(ct.startswith(s) for s in STATIC_CONTENT_TYPES)
|
||||
|
||||
|
||||
def _extract_entry(raw: dict) -> HarEntry | None:
|
||||
"""Parse a raw HAR entry dict into a typed HarEntry. Returns None for static assets."""
|
||||
request = raw.get("request", {})
|
||||
response = raw.get("response", {})
|
||||
|
||||
url = request.get("url", "")
|
||||
method = request.get("method", "").upper()
|
||||
status = response.get("status", 0)
|
||||
|
||||
content = response.get("content", {})
|
||||
content_type = content.get("mimeType", "").lower().split(";")[0].strip()
|
||||
size_bytes = max(content.get("size", 0), 0)
|
||||
|
||||
if _is_static(url, content_type):
|
||||
return None
|
||||
|
||||
# Extract POST body from postData
|
||||
post_body = ""
|
||||
post_data = request.get("postData", {})
|
||||
if post_data:
|
||||
text = post_data.get("text", "")
|
||||
params = post_data.get("params", [])
|
||||
if text:
|
||||
post_body = text[:POST_BODY_MAX_CHARS]
|
||||
elif params:
|
||||
post_body = "&".join(
|
||||
f"{p['name']}={p.get('value', '')}" for p in params
|
||||
)[:POST_BODY_MAX_CHARS]
|
||||
|
||||
return HarEntry(
|
||||
method=method,
|
||||
url=url,
|
||||
status=status,
|
||||
content_type=content_type,
|
||||
size_bytes=size_bytes,
|
||||
post_body=post_body,
|
||||
)
|
||||
|
||||
|
||||
def analyze_har_file(har_path: str) -> HarSummary:
|
||||
"""Parse HAR JSON, filter static assets, categorize interesting entries."""
|
||||
data = json.loads(pathlib.Path(har_path).read_bytes())
|
||||
raw_entries = data.get("log", {}).get("entries", [])
|
||||
|
||||
assert raw_entries, f"No entries found in HAR file: {har_path}"
|
||||
|
||||
total = len(raw_entries)
|
||||
filtered_static = 0
|
||||
api_calls: list[HarEntry] = []
|
||||
downloads: list[HarEntry] = []
|
||||
other_interesting: list[HarEntry] = []
|
||||
|
||||
for raw in raw_entries:
|
||||
entry = _extract_entry(raw)
|
||||
if entry is None:
|
||||
filtered_static += 1
|
||||
continue
|
||||
|
||||
ct = entry.content_type
|
||||
is_download = any(ct.startswith(t) for t in DOWNLOAD_CONTENT_TYPES)
|
||||
is_api = ct == "application/json" or ct == "application/xml" or entry.method == "POST"
|
||||
|
||||
if is_download:
|
||||
downloads.append(entry)
|
||||
elif is_api:
|
||||
api_calls.append(entry)
|
||||
else:
|
||||
other_interesting.append(entry)
|
||||
|
||||
return HarSummary(
|
||||
api_calls=api_calls,
|
||||
downloads=downloads,
|
||||
other_interesting=other_interesting,
|
||||
total_entries=total,
|
||||
filtered_static=filtered_static,
|
||||
)
|
||||
|
||||
|
||||
def format_summary(summary: HarSummary) -> str:
|
||||
"""Format HarSummary as human-readable text for MCP tool response."""
|
||||
parts = [
|
||||
f"HAR Analysis: {summary.total_entries} total entries, "
|
||||
f"{summary.filtered_static} static assets filtered\n"
|
||||
f"Found: {len(summary.api_calls)} API calls, "
|
||||
f"{len(summary.downloads)} downloads, "
|
||||
f"{len(summary.other_interesting)} other\n",
|
||||
]
|
||||
|
||||
if summary.api_calls:
|
||||
parts.append("API Calls:")
|
||||
for e in summary.api_calls:
|
||||
parts.append(
|
||||
f" {e.method:<6} {e.url}"
|
||||
f" [{e.status}, {e.content_type}, {e.size_bytes:,}B]"
|
||||
)
|
||||
if e.post_body:
|
||||
parts.append(f" Body: {e.post_body}")
|
||||
parts.append("")
|
||||
|
||||
if summary.downloads:
|
||||
parts.append("Downloads:")
|
||||
for e in summary.downloads:
|
||||
parts.append(
|
||||
f" {e.method:<6} {e.url}"
|
||||
f" [{e.status}, {e.content_type}, {e.size_bytes:,}B]"
|
||||
)
|
||||
parts.append("")
|
||||
|
||||
if summary.other_interesting:
|
||||
parts.append("Other (non-static, non-JSON, non-download):")
|
||||
for e in summary.other_interesting[:10]: # cap output
|
||||
parts.append(f" {e.method:<6} {e.url} [{e.status}, {e.content_type}]")
|
||||
if len(summary.other_interesting) > 10:
|
||||
parts.append(f" ... and {len(summary.other_interesting) - 10} more")
|
||||
parts.append("")
|
||||
|
||||
if not summary.api_calls and not summary.downloads:
|
||||
parts.append("No API calls or downloads found after filtering static assets.")
|
||||
|
||||
return "\n".join(parts)
|
||||
396
tools/scout/src/scout/browser.py
Normal file
396
tools/scout/src/scout/browser.py
Normal file
@@ -0,0 +1,396 @@
|
||||
"""Pydoll browser session management for the scout MCP server.
|
||||
|
||||
Manages a single long-lived Chrome instance across multiple MCP tool calls.
|
||||
The browser starts on the first scout_visit and stays alive until scout_close.
|
||||
|
||||
State is module-level (lives for the duration of the MCP server process).
|
||||
HAR recording is managed via an asyncio.Task that holds the Pydoll context
|
||||
manager open between scout_har_start and scout_har_stop calls.
|
||||
|
||||
Bot evasion:
|
||||
- CDP-based (no chromedriver, navigator.webdriver stays false)
|
||||
- Humanized mouse movement (Bezier curves) on all clicks
|
||||
- Headed browser by default (no headless detection vectors)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import pathlib
|
||||
from datetime import datetime
|
||||
|
||||
import msgspec
|
||||
from pydoll.browser.chromium import Chrome
|
||||
|
||||
logger = logging.getLogger("scout.browser")
|
||||
|
||||
# Module-level browser state — lives for the MCP server process lifetime.
|
||||
# Using a plain dict so all fields are in one place and easy to reset.
|
||||
_state: dict = {
|
||||
"browser": None, # Chrome instance
|
||||
"tab": None, # Active tab
|
||||
"har_task": None, # asyncio.Task holding the recording context manager
|
||||
"har_stop_event": None, # asyncio.Event signalled to stop recording
|
||||
"har_result": None, # asyncio.Future resolving to HAR file path
|
||||
}
|
||||
|
||||
OUTPUT_DIR = pathlib.Path("data/scout")
|
||||
CLICK_TIMEOUT_SECONDS = 10
|
||||
NAVIGATION_WAIT_SECONDS = 2
|
||||
ELEMENT_CAP = 60 # max elements per category to avoid huge responses
|
||||
|
||||
|
||||
class PageElement(msgspec.Struct):
|
||||
"""An interactive element found on the current page."""
|
||||
|
||||
kind: str # "link", "button", "form", "select", "input"
|
||||
text: str # visible text or label (truncated)
|
||||
selector: str # usable CSS selector or description
|
||||
href: str = "" # for links
|
||||
action: str = "" # for forms (action URL)
|
||||
method: str = "" # for forms (GET/POST)
|
||||
options: list[str] = [] # for selects (option texts)
|
||||
|
||||
|
||||
class PageInfo(msgspec.Struct):
|
||||
"""Result of a page visit or navigation action."""
|
||||
|
||||
title: str
|
||||
url: str
|
||||
element_count: int
|
||||
|
||||
|
||||
async def _ensure_browser() -> None:
|
||||
"""Launch Chrome if not already running. Idempotent."""
|
||||
if _state["tab"] is not None:
|
||||
return
|
||||
browser = Chrome()
|
||||
tab = await browser.start()
|
||||
_state["browser"] = browser
|
||||
_state["tab"] = tab
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
logger.info("Chrome launched")
|
||||
|
||||
|
||||
async def visit(url: str) -> PageInfo:
|
||||
"""Navigate to url. Opens browser on first call."""
|
||||
await _ensure_browser()
|
||||
tab = _state["tab"]
|
||||
|
||||
await tab.go_to(url)
|
||||
await asyncio.sleep(1) # let dynamic content settle
|
||||
|
||||
title = await tab.title
|
||||
links = await tab.query("a", find_all=True)
|
||||
element_count = len(links) if links else 0
|
||||
|
||||
return PageInfo(title=title, url=url, element_count=element_count)
|
||||
|
||||
|
||||
async def get_elements(filter_type: str = "") -> list[PageElement]:
|
||||
"""Enumerate interactive elements on the current page.
|
||||
|
||||
filter_type: "", "links", "buttons", "forms", "selects", "inputs"
|
||||
Returns typed PageElement structs (not screenshots).
|
||||
"""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
elements: list[PageElement] = []
|
||||
|
||||
# Links
|
||||
if not filter_type or filter_type == "links":
|
||||
nodes = await tab.query("a[href]", find_all=True) or []
|
||||
for node in nodes[:ELEMENT_CAP]:
|
||||
try:
|
||||
text = (await node.text or "").strip()[:100]
|
||||
href = (await node.get_attribute("href") or "").strip()
|
||||
if text or href:
|
||||
elements.append(PageElement(
|
||||
kind="link",
|
||||
text=text,
|
||||
selector=f'a[href="{href}"]' if href else "a",
|
||||
href=href,
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Buttons
|
||||
if not filter_type or filter_type == "buttons":
|
||||
nodes = await tab.query(
|
||||
"button, input[type=submit], input[type=button]", find_all=True
|
||||
) or []
|
||||
for node in nodes[:20]:
|
||||
try:
|
||||
text = (await node.text or "").strip()
|
||||
if not text:
|
||||
text = await node.get_attribute("value") or ""
|
||||
text = text[:100]
|
||||
cls = (await node.get_attribute("class") or "").strip()
|
||||
sel = f"button.{cls.split()[0]}" if cls else "button"
|
||||
elements.append(PageElement(kind="button", text=text, selector=sel))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Selects
|
||||
if not filter_type or filter_type == "selects":
|
||||
nodes = await tab.query("select", find_all=True) or []
|
||||
for node in nodes[:10]:
|
||||
try:
|
||||
name = (
|
||||
await node.get_attribute("name")
|
||||
or await node.get_attribute("id")
|
||||
or ""
|
||||
).strip()
|
||||
option_nodes = await node.query("option", find_all=True) or []
|
||||
opts = []
|
||||
for opt in option_nodes[:15]:
|
||||
opt_text = (await opt.text or "").strip()
|
||||
if opt_text:
|
||||
opts.append(opt_text)
|
||||
sel = f"select[name='{name}']" if name else "select"
|
||||
elements.append(PageElement(
|
||||
kind="select", text=name, selector=sel, options=opts
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Forms
|
||||
if not filter_type or filter_type == "forms":
|
||||
nodes = await tab.query("form", find_all=True) or []
|
||||
for node in nodes[:10]:
|
||||
try:
|
||||
action = (await node.get_attribute("action") or "").strip()
|
||||
method = (await node.get_attribute("method") or "GET").upper()
|
||||
elements.append(PageElement(
|
||||
kind="form",
|
||||
text=f"{method} {action}",
|
||||
selector="form",
|
||||
action=action,
|
||||
method=method,
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Inputs
|
||||
if filter_type == "inputs":
|
||||
nodes = await tab.query(
|
||||
"input:not([type=hidden]):not([type=submit]):not([type=button])",
|
||||
find_all=True,
|
||||
) or []
|
||||
for node in nodes[:20]:
|
||||
try:
|
||||
name = (await node.get_attribute("name") or "").strip()
|
||||
input_type = (await node.get_attribute("type") or "text").strip()
|
||||
placeholder = (await node.get_attribute("placeholder") or "").strip()
|
||||
label = name or placeholder or input_type
|
||||
sel = f"input[name='{name}']" if name else f"input[type='{input_type}']"
|
||||
elements.append(PageElement(kind="input", text=label, selector=sel))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
def format_elements(elements: list[PageElement]) -> str:
|
||||
"""Format a list of PageElement structs as human-readable text."""
|
||||
if not elements:
|
||||
return "No interactive elements found."
|
||||
|
||||
# Group by kind
|
||||
groups: dict[str, list[PageElement]] = {}
|
||||
for e in elements:
|
||||
groups.setdefault(e.kind, []).append(e)
|
||||
|
||||
lines: list[str] = [f"Elements ({len(elements)} total):"]
|
||||
kind_labels = {
|
||||
"link": "Links", "button": "Buttons",
|
||||
"form": "Forms", "select": "Selects", "input": "Inputs",
|
||||
}
|
||||
|
||||
for kind in ["link", "button", "select", "form", "input"]:
|
||||
group = groups.get(kind, [])
|
||||
if not group:
|
||||
continue
|
||||
lines.append(f"\n{kind_labels.get(kind, kind.capitalize())} ({len(group)}):")
|
||||
for i, e in enumerate(group):
|
||||
if kind == "link":
|
||||
lines.append(f" [{i}] {e.text!r:<40} → {e.href}")
|
||||
elif kind == "select":
|
||||
opts = ", ".join(e.options[:5])
|
||||
if len(e.options) > 5:
|
||||
opts += f", ... (+{len(e.options) - 5} more)"
|
||||
lines.append(f" [{i}] {e.text!r} selector: {e.selector}")
|
||||
lines.append(f" options: {opts}")
|
||||
elif kind == "form":
|
||||
lines.append(f" [{i}] {e.text} selector: {e.selector}")
|
||||
else:
|
||||
lines.append(f" [{i}] {e.text!r:<40} selector: {e.selector}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def click(selector: str) -> PageInfo:
|
||||
"""Click an element. Use 'text=Foo' to click by visible text, else CSS selector."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
if selector.startswith("text="):
|
||||
element = await tab.find(text=selector[5:], timeout=CLICK_TIMEOUT_SECONDS)
|
||||
else:
|
||||
element = await tab.query(selector, timeout=CLICK_TIMEOUT_SECONDS)
|
||||
|
||||
assert element is not None, f"Element not found: {selector!r}"
|
||||
await element.click()
|
||||
await asyncio.sleep(NAVIGATION_WAIT_SECONDS)
|
||||
|
||||
title = await tab.title
|
||||
url = await tab.current_url if hasattr(tab, "current_url") else ""
|
||||
links = await tab.query("a", find_all=True) or []
|
||||
|
||||
return PageInfo(title=title, url=url or "", element_count=len(links))
|
||||
|
||||
|
||||
async def fill(selector: str, value: str) -> str:
|
||||
"""Type a value into a form field."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
if selector.startswith("text="):
|
||||
element = await tab.find(text=selector[5:], timeout=CLICK_TIMEOUT_SECONDS)
|
||||
else:
|
||||
element = await tab.query(selector, timeout=CLICK_TIMEOUT_SECONDS)
|
||||
|
||||
assert element is not None, f"Element not found: {selector!r}"
|
||||
# insert_text is instant (no keystroke simulation)
|
||||
await element.insert_text(value)
|
||||
return f"Filled {selector!r} with {value!r}"
|
||||
|
||||
|
||||
async def select_option(selector: str, value: str) -> str:
|
||||
"""Select an option in a <select> element."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
element = await tab.query(selector, timeout=CLICK_TIMEOUT_SECONDS)
|
||||
assert element is not None, f"Select element not found: {selector!r}"
|
||||
await element.select_option(value)
|
||||
return f"Selected {value!r} in {selector!r}"
|
||||
|
||||
|
||||
async def scroll(direction: str, amount_px: int = 400) -> str:
|
||||
"""Scroll the page up or down."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
# Execute scroll via JS — simple and reliable
|
||||
direction_sign = 1 if direction == "down" else -1
|
||||
await tab.execute_script(f"window.scrollBy(0, {direction_sign * amount_px})")
|
||||
return f"Scrolled {direction} {amount_px}px"
|
||||
|
||||
|
||||
async def get_text(selector: str = "") -> str:
|
||||
"""Get visible text from the page or a specific element."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
if selector:
|
||||
element = await tab.query(selector, timeout=CLICK_TIMEOUT_SECONDS)
|
||||
assert element is not None, f"Element not found: {selector!r}"
|
||||
text = await element.text or ""
|
||||
else:
|
||||
# Get body text content
|
||||
body = await tab.query("body", timeout=5)
|
||||
text = await body.text if body else ""
|
||||
|
||||
# Truncate very long text to avoid overwhelming the response
|
||||
return text[:3000] if text else "(no text content)"
|
||||
|
||||
|
||||
async def screenshot(label: str = "") -> str:
|
||||
"""Take a screenshot and save to data/scout/. Returns the file path."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
tab = _state["tab"]
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
name = f"{label}_{timestamp}" if label else timestamp
|
||||
path = OUTPUT_DIR / f"screenshot_{name}.png"
|
||||
|
||||
await tab.take_screenshot(str(path), beyond_viewport=False)
|
||||
assert path.exists(), f"Screenshot was not written: {path}"
|
||||
return str(path)
|
||||
|
||||
|
||||
# --- HAR recording (asyncio Task holds context manager open) ---
|
||||
|
||||
async def _har_recording_task(tab, har_path: pathlib.Path, stop_event: asyncio.Event, result_future: asyncio.Future) -> None:
|
||||
"""Background task: enters recording context, waits for stop, saves HAR."""
|
||||
try:
|
||||
async with tab.request.record() as capture:
|
||||
await stop_event.wait()
|
||||
# Save while still inside context manager (capture is valid here)
|
||||
capture.save(str(har_path))
|
||||
result_future.set_result(str(har_path))
|
||||
except Exception as e:
|
||||
result_future.set_exception(e)
|
||||
|
||||
|
||||
async def har_start() -> str:
|
||||
"""Start recording all network traffic. Use scout_har_stop to save."""
|
||||
assert _state["tab"] is not None, "No browser open — call scout_visit first"
|
||||
assert _state["har_task"] is None, "HAR recording already in progress"
|
||||
|
||||
tab = _state["tab"]
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
har_path = OUTPUT_DIR / f"har_{timestamp}.har"
|
||||
|
||||
stop_event = asyncio.Event()
|
||||
result_future: asyncio.Future = asyncio.get_event_loop().create_future()
|
||||
|
||||
_state["har_stop_event"] = stop_event
|
||||
_state["har_result"] = result_future
|
||||
_state["har_task"] = asyncio.create_task(
|
||||
_har_recording_task(tab, har_path, stop_event, result_future)
|
||||
)
|
||||
|
||||
# Give the task time to enter the context manager before returning
|
||||
await asyncio.sleep(0.2)
|
||||
return f"Recording started — will save to {har_path}"
|
||||
|
||||
|
||||
async def har_stop() -> str:
|
||||
"""Stop recording and save HAR file. Returns the file path."""
|
||||
assert _state["har_stop_event"] is not None, "No HAR recording in progress"
|
||||
|
||||
_state["har_stop_event"].set()
|
||||
har_path = await asyncio.wait_for(_state["har_result"], timeout=15.0)
|
||||
|
||||
_state["har_task"] = None
|
||||
_state["har_stop_event"] = None
|
||||
_state["har_result"] = None
|
||||
|
||||
assert pathlib.Path(har_path).exists(), f"HAR file not written: {har_path}"
|
||||
size_kb = pathlib.Path(har_path).stat().st_size // 1024
|
||||
return f"HAR saved: {har_path} ({size_kb}KB)"
|
||||
|
||||
|
||||
async def close() -> str:
|
||||
"""Close the browser and clean up all state."""
|
||||
# Stop any active HAR recording first
|
||||
if _state["har_stop_event"] is not None:
|
||||
try:
|
||||
await har_stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if _state["browser"] is not None:
|
||||
try:
|
||||
await _state["browser"].stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_state["browser"] = None
|
||||
_state["tab"] = None
|
||||
_state["har_task"] = None
|
||||
_state["har_stop_event"] = None
|
||||
_state["har_result"] = None
|
||||
|
||||
return "Browser closed"
|
||||
170
tools/scout/src/scout/server.py
Normal file
170
tools/scout/src/scout/server.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""Scout MCP server — browser recon tools for Claude Code.
|
||||
|
||||
Exposes browser automation as MCP tools. The server runs as a Claude Code
|
||||
child process (stdio transport) — starts when Claude Code starts, dies when
|
||||
Claude Code exits. No daemon, no port, no systemd.
|
||||
|
||||
The browser session is stateful across tool calls: scout_visit opens Chrome,
|
||||
subsequent tools operate on the same tab, scout_close shuts down.
|
||||
|
||||
Text-first: tools return structured text (element lists, page titles, HAR
|
||||
summaries). Screenshots are an explicit opt-in via scout_screenshot.
|
||||
|
||||
Usage (via .mcp.json):
|
||||
uv run --package scout scout-server
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from scout import analyze, browser
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(name)s %(levelname)s %(message)s",
|
||||
handlers=[logging.StreamHandler(sys.stderr)],
|
||||
)
|
||||
|
||||
mcp = FastMCP("scout")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_visit(url: str) -> str:
|
||||
"""Visit a URL in the browser. Opens Chrome on the first call.
|
||||
|
||||
Returns: page title, URL, and element count.
|
||||
"""
|
||||
info = await browser.visit(url)
|
||||
return f"Title: {info.title}\nURL: {info.url}\nElements detected: {info.element_count}"
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_elements(filter: str = "") -> str:
|
||||
"""List interactive elements on the current page.
|
||||
|
||||
Args:
|
||||
filter: Optional category — "links", "buttons", "forms", "selects",
|
||||
"inputs", or "" for all.
|
||||
|
||||
Returns: structured text list with selectors for use in scout_click.
|
||||
"""
|
||||
elements = await browser.get_elements(filter)
|
||||
return browser.format_elements(elements)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_click(selector: str) -> str:
|
||||
"""Click an element on the current page.
|
||||
|
||||
Args:
|
||||
selector: "text=Foo" to click by visible text, or a CSS selector
|
||||
like "a[href*=COFFEE]" or "button.download-btn".
|
||||
|
||||
Returns: new page title and URL if navigation occurred.
|
||||
"""
|
||||
info = await browser.click(selector)
|
||||
return f"Clicked {selector!r}\nTitle: {info.title}\nURL: {info.url}\nElements: {info.element_count}"
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_fill(selector: str, value: str) -> str:
|
||||
"""Type a value into a form field.
|
||||
|
||||
Args:
|
||||
selector: CSS selector or "text=Label" for the input field.
|
||||
value: The text to type.
|
||||
"""
|
||||
return await browser.fill(selector, value)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_select(selector: str, value: str) -> str:
|
||||
"""Select an option in a <select> dropdown.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for the <select> element.
|
||||
value: The option value or text to select.
|
||||
"""
|
||||
return await browser.select_option(selector, value)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_scroll(direction: str = "down", amount_px: int = 400) -> str:
|
||||
"""Scroll the page up or down.
|
||||
|
||||
Args:
|
||||
direction: "down" or "up".
|
||||
amount_px: Number of pixels to scroll (default 400).
|
||||
"""
|
||||
assert direction in ("down", "up"), f"direction must be 'down' or 'up', got {direction!r}"
|
||||
return await browser.scroll(direction, amount_px)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_text(selector: str = "") -> str:
|
||||
"""Get visible text from the page or a specific element.
|
||||
|
||||
Args:
|
||||
selector: CSS selector for a specific element, or "" for full page body text.
|
||||
Text is truncated to 3000 chars.
|
||||
"""
|
||||
return await browser.get_text(selector)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_screenshot(label: str = "") -> str:
|
||||
"""Take a screenshot and save to data/scout/. Use Read tool to view it.
|
||||
|
||||
Args:
|
||||
label: Optional label included in the filename.
|
||||
|
||||
Returns: file path to the saved PNG.
|
||||
"""
|
||||
path = await browser.screenshot(label)
|
||||
return f"Screenshot saved: {path}\nUse the Read tool to view it."
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_har_start() -> str:
|
||||
"""Start recording all network traffic (HAR format).
|
||||
|
||||
Call scout_har_stop when done navigating. HAR spans all tool calls
|
||||
between start and stop — visit, click, fill, etc.
|
||||
"""
|
||||
return await browser.har_start()
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_har_stop() -> str:
|
||||
"""Stop network recording and save the HAR file.
|
||||
|
||||
Returns the HAR file path. Pass to scout_analyze to extract API endpoints.
|
||||
"""
|
||||
return await browser.har_stop()
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_analyze(har_path: str) -> str:
|
||||
"""Analyze a HAR file for API endpoints, POST requests, and data downloads.
|
||||
|
||||
Filters out static assets (JS, CSS, images, fonts) and summarizes:
|
||||
- API calls (JSON responses, POST requests)
|
||||
- Downloads (CSV, PDF, Excel)
|
||||
|
||||
Args:
|
||||
har_path: Path to the HAR file returned by scout_har_stop.
|
||||
"""
|
||||
summary = analyze.analyze_har_file(har_path)
|
||||
return analyze.format_summary(summary)
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def scout_close() -> str:
|
||||
"""Close the browser and clean up. Stops any active HAR recording first."""
|
||||
return await browser.close()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
mcp.run()
|
||||
1107
tools/scout/uv.lock
generated
Normal file
1107
tools/scout/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -15,3 +15,17 @@ def cot_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all COT CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/cot/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def prices_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all coffee price CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/prices/coffee_kc/**/*.csv.gzip'"
|
||||
|
||||
|
||||
@macro()
|
||||
def ice_stocks_glob(evaluator) -> str:
|
||||
"""Return a quoted glob path for all ICE warehouse stock CSV gzip files under LANDING_DIR."""
|
||||
landing_dir = evaluator.var("LANDING_DIR") or os.environ.get("LANDING_DIR", "data/landing")
|
||||
return f"'{landing_dir}/ice_stocks/**/*.csv.gzip'"
|
||||
|
||||
@@ -17,7 +17,7 @@ MODEL (
|
||||
kind FULL
|
||||
);
|
||||
|
||||
SELECT usda_commodity_code, cftc_commodity_code, commodity_name, commodity_group
|
||||
SELECT usda_commodity_code, cftc_commodity_code, ticker, ice_stock_report_code, commodity_name, commodity_group
|
||||
FROM (VALUES
|
||||
('0711100', '083', 'Coffee, Green', 'Softs')
|
||||
) AS t(usda_commodity_code, cftc_commodity_code, commodity_name, commodity_group)
|
||||
('0711100', '083', 'KC=F', 'COFFEE-C', 'Coffee, Green', 'Softs')
|
||||
) AS t(usda_commodity_code, cftc_commodity_code, ticker, ice_stock_report_code, commodity_name, commodity_group)
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
-- Foundation fact: daily KC=F Coffee C futures prices.
|
||||
--
|
||||
-- Casts raw varchar columns to proper types and deduplicates via hash key.
|
||||
-- Covers all available history from the landing directory.
|
||||
--
|
||||
-- Grain: one row per trade_date.
|
||||
-- Dedup: hash of (trade_date, close) — if Yahoo Finance corrects a price,
|
||||
-- the new hash triggers a re-ingest on the next incremental run.
|
||||
|
||||
MODEL (
|
||||
name foundation.fct_coffee_prices,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column trade_date
|
||||
),
|
||||
grain (trade_date),
|
||||
start '1971-08-16',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(Date AS date) AS trade_date,
|
||||
TRY_CAST(Open AS double) AS open,
|
||||
TRY_CAST(High AS double) AS high,
|
||||
TRY_CAST(Low AS double) AS low,
|
||||
TRY_CAST(Close AS double) AS close,
|
||||
TRY_CAST(Adj_Close AS double) AS adj_close,
|
||||
TRY_CAST(Volume AS bigint) AS volume,
|
||||
|
||||
-- Filename encodes the content hash — use as ingest identifier
|
||||
filename AS source_file,
|
||||
|
||||
-- Dedup key: trade date + close price
|
||||
hash(Date, Close) AS hkey
|
||||
FROM raw.coffee_prices
|
||||
WHERE TRY_CAST(Date AS date) IS NOT NULL
|
||||
AND TRY_CAST(Close AS double) IS NOT NULL
|
||||
),
|
||||
|
||||
deduplicated AS (
|
||||
SELECT
|
||||
any_value(trade_date) AS trade_date,
|
||||
any_value(open) AS open,
|
||||
any_value(high) AS high,
|
||||
any_value(low) AS low,
|
||||
any_value(close) AS close,
|
||||
any_value(adj_close) AS adj_close,
|
||||
any_value(volume) AS volume,
|
||||
any_value(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY hkey
|
||||
)
|
||||
|
||||
SELECT *
|
||||
FROM deduplicated
|
||||
WHERE trade_date BETWEEN @start_ds AND @end_ds
|
||||
@@ -0,0 +1,47 @@
|
||||
-- Foundation fact: ICE certified Coffee C (Arabica) warehouse stocks.
|
||||
--
|
||||
-- Casts raw varchar columns to proper types and deduplicates via hash key.
|
||||
-- "Certified" means Coffee C graded and stamped as delivery-eligible
|
||||
-- against ICE futures contracts — a key physical supply indicator.
|
||||
--
|
||||
-- Grain: one row per report_date.
|
||||
|
||||
MODEL (
|
||||
name foundation.fct_ice_warehouse_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (report_date),
|
||||
start '2000-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH cast_and_clean AS (
|
||||
SELECT
|
||||
TRY_CAST(report_date AS date) AS report_date,
|
||||
TRY_CAST(total_certified_bags AS bigint) AS total_certified_bags,
|
||||
TRY_CAST(pending_grading_bags AS bigint) AS pending_grading_bags,
|
||||
|
||||
filename AS source_file,
|
||||
|
||||
-- Dedup key: report date + total bags
|
||||
hash(report_date, total_certified_bags) AS hkey
|
||||
FROM raw.ice_warehouse_stocks
|
||||
WHERE TRY_CAST(report_date AS date) IS NOT NULL
|
||||
AND TRY_CAST(total_certified_bags AS bigint) IS NOT NULL
|
||||
),
|
||||
|
||||
deduplicated AS (
|
||||
SELECT
|
||||
any_value(report_date) AS report_date,
|
||||
any_value(total_certified_bags) AS total_certified_bags,
|
||||
any_value(pending_grading_bags) AS pending_grading_bags,
|
||||
any_value(source_file) AS source_file,
|
||||
hkey
|
||||
FROM cast_and_clean
|
||||
GROUP BY hkey
|
||||
)
|
||||
|
||||
SELECT *
|
||||
FROM deduplicated
|
||||
WHERE report_date BETWEEN @start_ds AND @end_ds
|
||||
46
transform/sqlmesh_materia/models/raw/coffee_prices.sql
Normal file
46
transform/sqlmesh_materia/models/raw/coffee_prices.sql
Normal file
@@ -0,0 +1,46 @@
|
||||
-- Raw KC=F Coffee C futures prices — technical ingestion layer.
|
||||
--
|
||||
-- Reads daily OHLCV gzip CSVs from the landing directory. All values are
|
||||
-- varchar; casting happens in foundation.fct_coffee_prices.
|
||||
--
|
||||
-- Source: Yahoo Finance via yfinance (KC=F ticker)
|
||||
-- Coverage: 1971-present (historical futures data)
|
||||
-- Frequency: daily (trading days only)
|
||||
|
||||
MODEL (
|
||||
name raw.coffee_prices,
|
||||
kind FULL,
|
||||
grain (Date),
|
||||
cron '@daily',
|
||||
columns (
|
||||
Date varchar,
|
||||
Open varchar,
|
||||
High varchar,
|
||||
Low varchar,
|
||||
Close varchar,
|
||||
Adj_Close varchar,
|
||||
Volume varchar,
|
||||
filename varchar
|
||||
)
|
||||
);
|
||||
|
||||
SELECT
|
||||
"Date" AS Date,
|
||||
"Open" AS Open,
|
||||
"High" AS High,
|
||||
"Low" AS Low,
|
||||
"Close" AS Close,
|
||||
"Adj Close" AS Adj_Close,
|
||||
"Volume" AS Volume,
|
||||
filename
|
||||
FROM read_csv(
|
||||
@prices_glob(),
|
||||
delim = ',',
|
||||
encoding = 'utf-8',
|
||||
compression = 'gzip',
|
||||
header = true,
|
||||
union_by_name = true,
|
||||
filename = true,
|
||||
all_varchar = true,
|
||||
ignore_errors = true
|
||||
)
|
||||
@@ -0,0 +1,37 @@
|
||||
-- Raw ICE certified warehouse stocks — technical ingestion layer.
|
||||
--
|
||||
-- Reads daily stock report gzip CSVs from the landing directory.
|
||||
-- All values are varchar; casting happens in foundation.fct_ice_warehouse_stocks.
|
||||
--
|
||||
-- Source: ICE Report Center (Coffee C certified warehouse stocks)
|
||||
-- Coverage: varies by download history
|
||||
-- Frequency: daily (ICE updates after market close)
|
||||
|
||||
MODEL (
|
||||
name raw.ice_warehouse_stocks,
|
||||
kind FULL,
|
||||
cron '@daily',
|
||||
columns (
|
||||
report_date varchar,
|
||||
total_certified_bags varchar,
|
||||
pending_grading_bags varchar,
|
||||
filename varchar
|
||||
)
|
||||
);
|
||||
|
||||
SELECT
|
||||
report_date,
|
||||
total_certified_bags,
|
||||
pending_grading_bags,
|
||||
filename
|
||||
FROM read_csv(
|
||||
@ice_stocks_glob(),
|
||||
delim = ',',
|
||||
encoding = 'utf-8',
|
||||
compression = 'gzip',
|
||||
header = true,
|
||||
union_by_name = true,
|
||||
filename = true,
|
||||
all_varchar = true,
|
||||
ignore_errors = true
|
||||
)
|
||||
77
transform/sqlmesh_materia/models/serving/coffee_prices.sql
Normal file
77
transform/sqlmesh_materia/models/serving/coffee_prices.sql
Normal file
@@ -0,0 +1,77 @@
|
||||
-- Serving mart: KC=F Coffee C futures prices, analytics-ready.
|
||||
--
|
||||
-- Adds moving averages (20-day, 50-day SMA) and 52-week high/low range.
|
||||
-- Filtered to trading days only (NULL close rows excluded upstream).
|
||||
--
|
||||
-- Grain: one row per trade_date.
|
||||
|
||||
MODEL (
|
||||
name serving.coffee_prices,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column trade_date
|
||||
),
|
||||
grain (trade_date),
|
||||
start '1971-08-16',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.trade_date,
|
||||
f.open,
|
||||
f.high,
|
||||
f.low,
|
||||
f.close,
|
||||
f.adj_close,
|
||||
f.volume,
|
||||
|
||||
-- Daily return: (close - prev_close) / prev_close * 100
|
||||
round(
|
||||
(f.close - LAG(f.close, 1) OVER (ORDER BY f.trade_date))
|
||||
/ NULLIF(LAG(f.close, 1) OVER (ORDER BY f.trade_date), 0) * 100,
|
||||
4
|
||||
) AS daily_return_pct,
|
||||
|
||||
-- 20-day simple moving average (1 trading month)
|
||||
round(
|
||||
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 19 PRECEDING AND CURRENT ROW),
|
||||
4
|
||||
) AS sma_20d,
|
||||
|
||||
-- 50-day simple moving average (2.5 trading months)
|
||||
round(
|
||||
AVG(f.close) OVER (ORDER BY f.trade_date ROWS BETWEEN 49 PRECEDING AND CURRENT ROW),
|
||||
4
|
||||
) AS sma_50d,
|
||||
|
||||
-- 52-week high (approximately 252 trading days)
|
||||
MAX(f.high) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW)
|
||||
AS high_52w,
|
||||
|
||||
-- 52-week low
|
||||
MIN(f.low) OVER (ORDER BY f.trade_date ROWS BETWEEN 251 PRECEDING AND CURRENT ROW)
|
||||
AS low_52w
|
||||
|
||||
FROM foundation.fct_coffee_prices f
|
||||
WHERE f.trade_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
|
||||
SELECT
|
||||
b.trade_date,
|
||||
d.commodity_name,
|
||||
d.ticker,
|
||||
b.open,
|
||||
b.high,
|
||||
b.low,
|
||||
b.close,
|
||||
b.adj_close,
|
||||
b.volume,
|
||||
b.daily_return_pct,
|
||||
b.sma_20d,
|
||||
b.sma_50d,
|
||||
b.high_52w,
|
||||
b.low_52w
|
||||
FROM base b
|
||||
CROSS JOIN foundation.dim_commodity d
|
||||
WHERE d.ticker = 'KC=F'
|
||||
ORDER BY b.trade_date
|
||||
@@ -0,0 +1,78 @@
|
||||
-- Serving mart: ICE certified Coffee C warehouse stocks, analytics-ready.
|
||||
--
|
||||
-- Adds 30-day rolling average, week-over-week change, and drawdown from
|
||||
-- 52-week high. Physical supply indicator used alongside S/D and positioning.
|
||||
--
|
||||
-- "Certified stocks" = coffee graded and stamped as eligible for delivery
|
||||
-- against ICE Coffee C futures — traders watch this as a squeeze indicator.
|
||||
--
|
||||
-- Grain: one row per report_date.
|
||||
|
||||
MODEL (
|
||||
name serving.ice_warehouse_stocks,
|
||||
kind INCREMENTAL_BY_TIME_RANGE (
|
||||
time_column report_date
|
||||
),
|
||||
grain (report_date),
|
||||
start '2000-01-01',
|
||||
cron '@daily'
|
||||
);
|
||||
|
||||
WITH base AS (
|
||||
SELECT
|
||||
f.report_date,
|
||||
f.total_certified_bags,
|
||||
f.pending_grading_bags,
|
||||
|
||||
-- Week-over-week change (compare to 7 calendar days ago via LAG over ordered rows)
|
||||
-- Using LAG(1) since data is daily: compares to previous trading/reporting day
|
||||
f.total_certified_bags
|
||||
- LAG(f.total_certified_bags, 1) OVER (ORDER BY f.report_date) AS wow_change_bags,
|
||||
|
||||
-- 30-day rolling average (smooths daily noise)
|
||||
round(
|
||||
AVG(f.total_certified_bags::double) OVER (
|
||||
ORDER BY f.report_date ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
|
||||
),
|
||||
0
|
||||
) AS avg_30d_bags,
|
||||
|
||||
-- 52-week high (365 calendar days ≈ 252 trading days; use 365-row window as proxy)
|
||||
MAX(f.total_certified_bags) OVER (
|
||||
ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW
|
||||
) AS high_52w_bags,
|
||||
|
||||
-- Drawdown from 52-week high (pct below peak — squeeze indicator)
|
||||
round(
|
||||
(f.total_certified_bags::double
|
||||
- MAX(f.total_certified_bags) OVER (
|
||||
ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW
|
||||
)::double
|
||||
)
|
||||
/ NULLIF(
|
||||
MAX(f.total_certified_bags) OVER (
|
||||
ORDER BY f.report_date ROWS BETWEEN 364 PRECEDING AND CURRENT ROW
|
||||
)::double,
|
||||
0
|
||||
) * 100,
|
||||
2
|
||||
) AS drawdown_from_52w_high_pct
|
||||
|
||||
FROM foundation.fct_ice_warehouse_stocks f
|
||||
WHERE f.report_date BETWEEN @start_ds AND @end_ds
|
||||
)
|
||||
|
||||
SELECT
|
||||
b.report_date,
|
||||
d.commodity_name,
|
||||
d.ice_stock_report_code,
|
||||
b.total_certified_bags,
|
||||
b.pending_grading_bags,
|
||||
b.wow_change_bags,
|
||||
b.avg_30d_bags,
|
||||
b.high_52w_bags,
|
||||
b.drawdown_from_52w_high_pct
|
||||
FROM base b
|
||||
CROSS JOIN foundation.dim_commodity d
|
||||
WHERE d.ice_stock_report_code = 'COFFEE-C'
|
||||
ORDER BY b.report_date
|
||||
148
uv.lock
generated
148
uv.lock
generated
@@ -10,6 +10,8 @@ resolution-markers = [
|
||||
members = [
|
||||
"beanflows",
|
||||
"cftc-cot",
|
||||
"coffee-prices",
|
||||
"ice-stocks",
|
||||
"materia",
|
||||
"psdonline",
|
||||
"sqlmesh-materia",
|
||||
@@ -251,6 +253,19 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/71/cc/18245721fa7747065ab478316c7fea7c74777d07f37ae60db2e84f8172e8/beartype-0.22.9-py3-none-any.whl", hash = "sha256:d16c9bbc61ea14637596c5f6fbff2ee99cbe3573e46a716401734ef50c3060c2", size = 1333658, upload-time = "2025-12-13T06:50:28.266Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beautifulsoup4"
|
||||
version = "4.14.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "soupsieve" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c3/b0/1c6a16426d389813b48d95e26898aff79abbde42ad353958ad95cc8c9b21/beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86", size = 627737, upload-time = "2025-11-30T15:08:26.084Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "blinker"
|
||||
version = "1.9.0"
|
||||
@@ -418,6 +433,17 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "coffee-prices"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/coffee_prices" }
|
||||
dependencies = [
|
||||
{ name = "yfinance" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "yfinance", specifier = ">=0.2.55" }]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
@@ -580,6 +606,27 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/48/ef/0c2f4a8e31018a986949d34a01115dd057bf536905dca38897bacd21fac3/cryptography-46.0.5-cp38-abi3-win_amd64.whl", hash = "sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595", size = 3467050, upload-time = "2026-02-10T19:18:18.899Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "curl-cffi"
|
||||
version = "0.13.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "certifi" },
|
||||
{ name = "cffi" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4e/3d/f39ca1f8fdf14408888e7c25e15eed63eac5f47926e206fb93300d28378c/curl_cffi-0.13.0.tar.gz", hash = "sha256:62ecd90a382bd5023750e3606e0aa7cb1a3a8ba41c14270b8e5e149ebf72c5ca", size = 151303, upload-time = "2025-08-06T13:05:42.988Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/19/d1/acabfd460f1de26cad882e5ef344d9adde1507034528cb6f5698a2e6a2f1/curl_cffi-0.13.0-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:434cadbe8df2f08b2fc2c16dff2779fb40b984af99c06aa700af898e185bb9db", size = 5686337, upload-time = "2025-08-06T13:05:28.985Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/2c/1c/cdb4fb2d16a0e9de068e0e5bc02094e105ce58a687ff30b4c6f88e25a057/curl_cffi-0.13.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:59afa877a9ae09efa04646a7d068eeea48915a95d9add0a29854e7781679fcd7", size = 2994613, upload-time = "2025-08-06T13:05:31.027Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/04/3e/fdf617c1ec18c3038b77065d484d7517bb30f8fb8847224eb1f601a4e8bc/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d06ed389e45a7ca97b17c275dbedd3d6524560270e675c720e93a2018a766076", size = 7931353, upload-time = "2025-08-06T13:05:32.273Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3d/10/6f30c05d251cf03ddc2b9fd19880f3cab8c193255e733444a2df03b18944/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4e0de45ab3b7a835c72bd53640c2347415111b43421b5c7a1a0b18deae2e541", size = 7486378, upload-time = "2025-08-06T13:05:33.672Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/77/81/5bdb7dd0d669a817397b2e92193559bf66c3807f5848a48ad10cf02bf6c7/curl_cffi-0.13.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8eb4083371bbb94e9470d782de235fb5268bf43520de020c9e5e6be8f395443f", size = 8328585, upload-time = "2025-08-06T13:05:35.28Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ce/c1/df5c6b4cfad41c08442e0f727e449f4fb5a05f8aa564d1acac29062e9e8e/curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:28911b526e8cd4aa0e5e38401bfe6887e8093907272f1f67ca22e6beb2933a51", size = 8739831, upload-time = "2025-08-06T13:05:37.078Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1a/91/6dd1910a212f2e8eafe57877bcf97748eb24849e1511a266687546066b8a/curl_cffi-0.13.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d433ffcb455ab01dd0d7bde47109083aa38b59863aa183d29c668ae4c96bf8e", size = 8711908, upload-time = "2025-08-06T13:05:38.741Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/6d/e4/15a253f9b4bf8d008c31e176c162d2704a7e0c5e24d35942f759df107b68/curl_cffi-0.13.0-cp39-abi3-win_amd64.whl", hash = "sha256:66a6b75ce971de9af64f1b6812e275f60b88880577bac47ef1fa19694fa21cd3", size = 1614510, upload-time = "2025-08-06T13:05:40.451Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f9/0f/9c5275f17ad6ff5be70edb8e0120fdc184a658c9577ca426d4230f654beb/curl_cffi-0.13.0-cp39-abi3-win_arm64.whl", hash = "sha256:d438a3b45244e874794bc4081dc1e356d2bb926dcc7021e5a8fef2e2105ef1d8", size = 1365753, upload-time = "2025-08-06T13:05:41.879Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dateparser"
|
||||
version = "1.2.1"
|
||||
@@ -752,6 +799,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424, upload-time = "2026-02-19T05:00:56.027Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "frozendict"
|
||||
version = "2.4.7"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/90/b2/2a3d1374b7780999d3184e171e25439a8358c47b481f68be883c14086b4c/frozendict-2.4.7.tar.gz", hash = "sha256:e478fb2a1391a56c8a6e10cc97c4a9002b410ecd1ac28c18d780661762e271bd", size = 317082, upload-time = "2025-11-11T22:40:14.251Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/38/74/f94141b38a51a553efef7f510fc213894161ae49b88bffd037f8d2a7cb2f/frozendict-2.4.7-py3-none-any.whl", hash = "sha256:972af65924ea25cf5b4d9326d549e69a9a4918d8a76a9d3a7cd174d98b237550", size = 16264, upload-time = "2025-11-11T22:40:12.836Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fsspec"
|
||||
version = "2026.2.0"
|
||||
@@ -998,6 +1054,17 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c4/f7/5cc291d701094754a1d327b44d80a44971e13962881d9a400235726171da/hypothesis-6.151.9-py3-none-any.whl", hash = "sha256:7b7220585c67759b1b1ef839b1e6e9e3d82ed468cfc1ece43c67184848d7edd9", size = 529307, upload-time = "2026-02-16T22:59:20.443Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ice-stocks"
|
||||
version = "0.1.0"
|
||||
source = { editable = "extract/ice_stocks" }
|
||||
dependencies = [
|
||||
{ name = "niquests" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [{ name = "niquests", specifier = ">=3.14.1" }]
|
||||
|
||||
[[package]]
|
||||
name = "identify"
|
||||
version = "2.6.16"
|
||||
@@ -1473,6 +1540,7 @@ version = "0.1.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "hcloud" },
|
||||
{ name = "msgspec" },
|
||||
{ name = "niquests" },
|
||||
{ name = "prefect" },
|
||||
{ name = "python-dotenv" },
|
||||
@@ -1498,6 +1566,7 @@ exploration = [
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "hcloud", specifier = ">=2.8.0" },
|
||||
{ name = "msgspec", specifier = ">=0.19" },
|
||||
{ name = "niquests", specifier = ">=3.15.2" },
|
||||
{ name = "prefect", specifier = ">=3.6.15" },
|
||||
{ name = "python-dotenv", specifier = ">=1.1.0" },
|
||||
@@ -1539,6 +1608,44 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "msgspec"
|
||||
version = "0.20.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/ea/9c/bfbd12955a49180cbd234c5d29ec6f74fe641698f0cd9df154a854fc8a15/msgspec-0.20.0.tar.gz", hash = "sha256:692349e588fde322875f8d3025ac01689fead5901e7fb18d6870a44519d62a29", size = 317862, upload-time = "2025-11-24T03:56:28.934Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/8a/d1/b902d38b6e5ba3bdddbec469bba388d647f960aeed7b5b3623a8debe8a76/msgspec-0.20.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9c1ff8db03be7598b50dd4b4a478d6fe93faae3bd54f4f17aa004d0e46c14c46", size = 196463, upload-time = "2025-11-24T03:55:43.405Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/57/b6/eff0305961a1d9447ec2b02f8c73c8946f22564d302a504185b730c9a761/msgspec-0.20.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f6532369ece217fd37c5ebcfd7e981f2615628c21121b7b2df9d3adcf2fd69b8", size = 188650, upload-time = "2025-11-24T03:55:44.761Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/99/93/f2ec1ae1de51d3fdee998a1ede6b2c089453a2ee82b5c1b361ed9095064a/msgspec-0.20.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9a1697da2f85a751ac3cc6a97fceb8e937fc670947183fb2268edaf4016d1ee", size = 218834, upload-time = "2025-11-24T03:55:46.441Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/28/83/36557b04cfdc317ed8a525c4993b23e43a8fbcddaddd78619112ca07138c/msgspec-0.20.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7fac7e9c92eddcd24c19d9e5f6249760941485dff97802461ae7c995a2450111", size = 224917, upload-time = "2025-11-24T03:55:48.06Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8f/56/362037a1ed5be0b88aced59272442c4b40065c659700f4b195a7f4d0ac88/msgspec-0.20.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f953a66f2a3eb8d5ea64768445e2bb301d97609db052628c3e1bcb7d87192a9f", size = 222821, upload-time = "2025-11-24T03:55:49.388Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/92/75/fa2370ec341cedf663731ab7042e177b3742645c5dd4f64dc96bd9f18a6b/msgspec-0.20.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:247af0313ae64a066d3aea7ba98840f6681ccbf5c90ba9c7d17f3e39dbba679c", size = 227227, upload-time = "2025-11-24T03:55:51.125Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/25/5e8080fe0117f799b1b68008dc29a65862077296b92550632de015128579/msgspec-0.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:67d5e4dfad52832017018d30a462604c80561aa62a9d548fc2bd4e430b66a352", size = 189966, upload-time = "2025-11-24T03:55:52.458Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/79/b6/63363422153937d40e1cb349c5081338401f8529a5a4e216865decd981bf/msgspec-0.20.0-cp313-cp313-win_arm64.whl", hash = "sha256:91a52578226708b63a9a13de287b1ec3ed1123e4a088b198143860c087770458", size = 175378, upload-time = "2025-11-24T03:55:53.721Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bb/18/62dc13ab0260c7d741dda8dc7f481495b93ac9168cd887dda5929880eef8/msgspec-0.20.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:eead16538db1b3f7ec6e3ed1f6f7c5dec67e90f76e76b610e1ffb5671815633a", size = 196407, upload-time = "2025-11-24T03:55:55.001Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dd/1d/b9949e4ad6953e9f9a142c7997b2f7390c81e03e93570c7c33caf65d27e1/msgspec-0.20.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:703c3bb47bf47801627fb1438f106adbfa2998fe586696d1324586a375fca238", size = 188889, upload-time = "2025-11-24T03:55:56.311Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/19/f8bb2dc0f1bfe46cc7d2b6b61c5e9b5a46c62298e8f4d03bbe499c926180/msgspec-0.20.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cdb227dc585fb109305cee0fd304c2896f02af93ecf50a9c84ee54ee67dbb42", size = 219691, upload-time = "2025-11-24T03:55:57.908Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/b8/8e/6b17e43f6eb9369d9858ee32c97959fcd515628a1df376af96c11606cf70/msgspec-0.20.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:27d35044dd8818ac1bd0fedb2feb4fbdff4e3508dd7c5d14316a12a2d96a0de0", size = 224918, upload-time = "2025-11-24T03:55:59.322Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/1c/db/0e833a177db1a4484797adba7f429d4242585980b90882cc38709e1b62df/msgspec-0.20.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b4296393a29ee42dd25947981c65506fd4ad39beaf816f614146fa0c5a6c91ae", size = 223436, upload-time = "2025-11-24T03:56:00.716Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c3/30/d2ee787f4c918fd2b123441d49a7707ae9015e0e8e1ab51aa7967a97b90e/msgspec-0.20.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:205fbdadd0d8d861d71c8f3399fe1a82a2caf4467bc8ff9a626df34c12176980", size = 227190, upload-time = "2025-11-24T03:56:02.371Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ff/37/9c4b58ff11d890d788e700b827db2366f4d11b3313bf136780da7017278b/msgspec-0.20.0-cp314-cp314-win_amd64.whl", hash = "sha256:7dfebc94fe7d3feec6bc6c9df4f7e9eccc1160bb5b811fbf3e3a56899e398a6b", size = 193950, upload-time = "2025-11-24T03:56:03.668Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/4e/cab707bf2fa57408e2934e5197fc3560079db34a1e3cd2675ff2e47e07de/msgspec-0.20.0-cp314-cp314-win_arm64.whl", hash = "sha256:2ad6ae36e4a602b24b4bf4eaf8ab5a441fec03e1f1b5931beca8ebda68f53fc0", size = 179018, upload-time = "2025-11-24T03:56:05.038Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/4c/06/3da3fc9aaa55618a8f43eb9052453cfe01f82930bca3af8cea63a89f3a11/msgspec-0.20.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:f84703e0e6ef025663dd1de828ca028774797b8155e070e795c548f76dde65d5", size = 200389, upload-time = "2025-11-24T03:56:06.375Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/83/3b/cc4270a5ceab40dfe1d1745856951b0a24fd16ac8539a66ed3004a60c91e/msgspec-0.20.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7c83fc24dd09cf1275934ff300e3951b3adc5573f0657a643515cc16c7dee131", size = 193198, upload-time = "2025-11-24T03:56:07.742Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/ae/4c7905ac53830c8e3c06fdd60e3cdcfedc0bbc993872d1549b84ea21a1bd/msgspec-0.20.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f13ccb1c335a124e80c4562573b9b90f01ea9521a1a87f7576c2e281d547f56", size = 225973, upload-time = "2025-11-24T03:56:09.18Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d9/da/032abac1de4d0678d99eaeadb1323bd9d247f4711c012404ba77ed6f15ca/msgspec-0.20.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:17c2b5ca19f19306fc83c96d85e606d2cc107e0caeea85066b5389f664e04846", size = 229509, upload-time = "2025-11-24T03:56:10.898Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/69/52/fdc7bdb7057a166f309e0b44929e584319e625aaba4771b60912a9321ccd/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d931709355edabf66c2dd1a756b2d658593e79882bc81aae5964969d5a291b63", size = 230434, upload-time = "2025-11-24T03:56:12.48Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/fe/1dfd5f512b26b53043884e4f34710c73e294e7cc54278c3fe28380e42c37/msgspec-0.20.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:565f915d2e540e8a0c93a01ff67f50aebe1f7e22798c6a25873f9fda8d1325f8", size = 231758, upload-time = "2025-11-24T03:56:13.765Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/97/f6/9ba7121b8e0c4e0beee49575d1dbc804e2e72467692f0428cf39ceba1ea5/msgspec-0.20.0-cp314-cp314t-win_amd64.whl", hash = "sha256:726f3e6c3c323f283f6021ebb6c8ccf58d7cd7baa67b93d73bfbe9a15c34ab8d", size = 206540, upload-time = "2025-11-24T03:56:15.029Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c8/3e/c5187de84bb2c2ca334ab163fcacf19a23ebb1d876c837f81a1b324a15bf/msgspec-0.20.0-cp314-cp314t-win_arm64.whl", hash = "sha256:93f23528edc51d9f686808a361728e903d6f2be55c901d6f5c92e44c6d546bfc", size = 183011, upload-time = "2025-11-24T03:56:16.442Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multitasking"
|
||||
version = "0.0.12"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/17/0d/74f0293dfd7dcc3837746d0138cbedd60b31701ecc75caec7d3f281feba0/multitasking-0.0.12.tar.gz", hash = "sha256:2fba2fa8ed8c4b85e227c5dd7dc41c7d658de3b6f247927316175a57349b84d1", size = 19984, upload-time = "2025-07-20T21:27:51.636Z" }
|
||||
|
||||
[[package]]
|
||||
name = "nest-asyncio"
|
||||
version = "1.6.0"
|
||||
@@ -1774,6 +1881,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl", hash = "sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723", size = 55206, upload-time = "2026-01-27T03:59:45.137Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "peewee"
|
||||
version = "4.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/37/e3/98ed8ab20f26d429f61b3d5d455c52ac88ba343444fbcf7154374111eb3e/peewee-4.0.0.tar.gz", hash = "sha256:bc2722abf32a8074362c346fc8a95f2d34a9587873e81025b6429676c32044b6", size = 686951, upload-time = "2026-02-20T15:38:50.312Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/39/40/89664cce41f4bedab105d705885cbb152d7bd85ce0facbf0ec02e90eb02a/peewee-4.0.0-py3-none-any.whl", hash = "sha256:6de14ff11ab50c3152dc1d4e12628c1b28c1e03ff4e4213e463429bfcd7340b6", size = 139317, upload-time = "2026-02-20T15:38:48.519Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pexpect"
|
||||
version = "4.9.0"
|
||||
@@ -2866,6 +2982,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "soupsieve"
|
||||
version = "2.8.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/7b/ae/2d9c981590ed9999a0d91755b47fc74f74de286b0f5cee14c9269041e6c4/soupsieve-2.8.3.tar.gz", hash = "sha256:3267f1eeea4251fb42728b6dfb746edc9acaffc4a45b27e19450b676586e8349", size = 118627, upload-time = "2026-01-20T04:27:02.457Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlalchemy"
|
||||
version = "2.0.46"
|
||||
@@ -3466,6 +3591,29 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yfinance"
|
||||
version = "1.2.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "curl-cffi" },
|
||||
{ name = "frozendict" },
|
||||
{ name = "multitasking" },
|
||||
{ name = "numpy" },
|
||||
{ name = "pandas" },
|
||||
{ name = "peewee" },
|
||||
{ name = "platformdirs" },
|
||||
{ name = "protobuf" },
|
||||
{ name = "pytz" },
|
||||
{ name = "requests" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c9/1b/431d0ebd6a1e9deaffc8627cc4d26fd869841f31a1429cab7443eced0766/yfinance-1.2.0.tar.gz", hash = "sha256:80cec643eb983330ca63debab1b5492334fa1e6338d82cb17dd4e7b95079cfab", size = 140501, upload-time = "2026-02-16T19:52:34.368Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/1e/60/462859de757ac56830824da7e8cf314b8b0321af5853df867c84cd6c2128/yfinance-1.2.0-py2.py3-none-any.whl", hash = "sha256:1c27d1ebfc6275f476721cc6dba035a49d0cf9a806d6aa1785c9e10cf8a610d8", size = 130247, upload-time = "2026-02-16T19:52:33.109Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zipp"
|
||||
version = "3.23.0"
|
||||
|
||||
76
vision.md
76
vision.md
@@ -91,61 +91,47 @@ We move fast, ship incrementally, and prioritize value over vanity metrics.
|
||||
|
||||
## Current State (February 2026)
|
||||
|
||||
### What's Working
|
||||
- USDA PSD Online extraction (2006-present, monthly archives)
|
||||
- 4-layer SQLMesh pipeline (raw → staging → cleaned → serving)
|
||||
- DuckDB backend (local dev + production lakehouse)
|
||||
- Incremental-by-time-range models with deduplication
|
||||
- Development environment with pre-commit hooks, linting, formatting
|
||||
- **Web app (BeanFlows.coffee)** — Quart + HTMX, deployed via Docker
|
||||
- Magic-link auth + signup with waitlist flow
|
||||
- Coffee analytics dashboard: time series, top producers, stock-to-use trend, supply/demand balance, YoY change
|
||||
- Country comparison view
|
||||
- User settings + account management
|
||||
- API key management (create, revoke, prefix display)
|
||||
- Plan-based access control (free / starter / pro) with 5-year history cap on free tier
|
||||
- Billing via Paddle (subscriptions + webhooks)
|
||||
- Admin panel (users, waitlist, feedback, tasks)
|
||||
- REST API with Bearer token auth, rate limiting (1000 req/hr), CSV export
|
||||
- Feedback + waitlist capture
|
||||
- GitLab CI pipeline (lint, test, build), regression tests for billing/auth/API
|
||||
### What's Shipped
|
||||
- USDA PSD Online extraction + full SQLMesh pipeline (raw→staging→cleaned→serving)
|
||||
- CFTC COT disaggregated futures: weekly positioning, COT index, managed money net
|
||||
- KC=F Coffee C futures prices: daily OHLCV, 20d/50d SMA, 52-week range (1971–present)
|
||||
- ICE certified warehouse stocks: extractor ready, awaiting URL confirmation
|
||||
- Web app (Quart + HTMX): dashboard with supply/demand + COT + price + ICE charts
|
||||
- REST API with key auth + rate limiting: /metrics, /positioning, /prices, /stocks
|
||||
- Paddle billing (Starter/Pro plans), magic-link auth, admin panel
|
||||
- /methodology page with full data source documentation
|
||||
- Automated supervisor: all extractors + webhook alerting on failure
|
||||
- 23 passing tests, GitLab CI pipeline
|
||||
|
||||
### What We Have
|
||||
- Comprehensive commodity supply/demand data (USDA PSD, 2006–present)
|
||||
- Established naming conventions and data quality patterns
|
||||
- Full product pipeline: data → DB → API → web dashboard
|
||||
- Paddle billing integration (Starter + Pro tiers)
|
||||
- Working waitlist to capture early interest
|
||||
### What's Missing
|
||||
- ICE stocks URL confirmed and backfill running (URL needs manual discovery at theice.com/report-center)
|
||||
- Python SDK
|
||||
- Public API documentation
|
||||
|
||||
## Roadmap
|
||||
|
||||
### Phase 1: Coffee Market Foundation (In Progress → ~70% done)
|
||||
### Phase 1: Coffee Market Foundation (COMPLETE — ready for outreach)
|
||||
**Goal:** Build complete coffee analytics from supply to price
|
||||
|
||||
**Data Sources to Integrate:**
|
||||
**Data Sources:**
|
||||
- ✅ USDA PSD Online (production, stocks, consumption)
|
||||
- ⬜ CFTC COT data (trader positioning — weekly, Coffee C futures code 083731)
|
||||
- ⬜ Coffee futures prices — KC=F via Yahoo Finance / yfinance, or Databento for tick-level
|
||||
- ⬜ ICO (International Coffee Organization) data — trade volumes, consumption stats
|
||||
- ⬜ ICE certified warehouse stocks (daily CSV from ICE Report Center — free)
|
||||
- ⬜ Weather data for growing regions — ECMWF/Open-Meteo (free), Brazil frost alerts
|
||||
- ✅ CFTC COT data (trader positioning, COT index)
|
||||
- ✅ KC=F Coffee futures prices (daily OHLCV, moving averages)
|
||||
- ✅ ICE warehouse stocks (extractor built, seed models deployed)
|
||||
- ⬜ ICO (International Coffee Organization) — future
|
||||
|
||||
**Features to Build:**
|
||||
- ✅ Web dashboard (supply/demand, stock-to-use trend, YoY, country comparison)
|
||||
- ✅ REST API with key auth, plan-based access, rate limiting
|
||||
- ✅ CSV export
|
||||
- ⬜ CFTC COT integration → trader sentiment indicators
|
||||
- ⬜ Historical price data → price/supply correlation analysis
|
||||
- ⬜ Python SDK (`pip install beanflows`) — critical for the quant analyst beachhead
|
||||
- ⬜ Data methodology documentation page — P0 for trust (see strategy doc)
|
||||
- ⬜ Parquet export endpoint
|
||||
- ⬜ Example Jupyter notebooks (show how to pipe data into common models)
|
||||
**Features:**
|
||||
- ✅ Dashboard: supply/demand + COT + price + ICE warehouse charts
|
||||
- ✅ REST API: all 4 data sources
|
||||
- ✅ Data methodology page
|
||||
- ✅ Automated daily pipeline with alerting
|
||||
- ⬜ Python SDK
|
||||
- ⬜ Historical correlation analysis
|
||||
|
||||
**Infrastructure:**
|
||||
- ⬜ Cloudflare R2 for raw data storage (rclone sync is partly planned)
|
||||
- ⬜ Automated daily pipeline on Hetzner (SQLMesh prod + cron)
|
||||
- ⬜ Pipeline monitoring + alerting (failure notifications)
|
||||
- ⬜ Published SLA for data freshness
|
||||
- ✅ Supervisor loop with all extractors
|
||||
- ⬜ Move to Cloudflare R2 for raw data backup
|
||||
- ⬜ Deploy to Hetzner production
|
||||
|
||||
### Phase 2: Product Market Fit
|
||||
**Goal:** Validate with real traders, iterate on feedback
|
||||
|
||||
@@ -316,6 +316,79 @@ async def get_cot_index_trend(
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Coffee Prices Queries
|
||||
# =============================================================================
|
||||
|
||||
# KC=F Yahoo Finance ticker
|
||||
COFFEE_TICKER = "KC=F"
|
||||
|
||||
|
||||
async def get_price_time_series(ticker: str, limit: int = 504) -> list[dict]:
|
||||
"""Daily OHLCV + moving averages from serving.coffee_prices. Default ~2 years."""
|
||||
assert 1 <= limit <= 5000, "limit must be between 1 and 5000"
|
||||
return await fetch_analytics(
|
||||
"""
|
||||
SELECT trade_date, open, high, low, close, volume,
|
||||
daily_return_pct, sma_20d, sma_50d, high_52w, low_52w
|
||||
FROM serving.coffee_prices
|
||||
WHERE ticker = ?
|
||||
ORDER BY trade_date DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
[ticker, limit],
|
||||
)
|
||||
|
||||
|
||||
async def get_price_latest(ticker: str) -> dict | None:
|
||||
"""Latest trading day's close price, daily return, and 52-week range."""
|
||||
rows = await fetch_analytics(
|
||||
"""
|
||||
SELECT trade_date, close, daily_return_pct, high_52w, low_52w, sma_20d, sma_50d
|
||||
FROM serving.coffee_prices
|
||||
WHERE ticker = ?
|
||||
ORDER BY trade_date DESC
|
||||
LIMIT 1
|
||||
""",
|
||||
[ticker],
|
||||
)
|
||||
return rows[0] if rows else None
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# ICE Warehouse Stocks Queries
|
||||
# =============================================================================
|
||||
|
||||
|
||||
async def get_ice_stocks_trend(days: int = 365) -> list[dict]:
|
||||
"""Daily ICE certified stocks over the trailing N days."""
|
||||
assert 1 <= days <= 3650, "days must be between 1 and 3650"
|
||||
return await fetch_analytics(
|
||||
"""
|
||||
SELECT report_date, total_certified_bags, pending_grading_bags,
|
||||
wow_change_bags, avg_30d_bags, high_52w_bags, drawdown_from_52w_high_pct
|
||||
FROM serving.ice_warehouse_stocks
|
||||
ORDER BY report_date DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
[days],
|
||||
)
|
||||
|
||||
|
||||
async def get_ice_stocks_latest() -> dict | None:
|
||||
"""Latest ICE certified warehouse stock report."""
|
||||
rows = await fetch_analytics(
|
||||
"""
|
||||
SELECT report_date, total_certified_bags, pending_grading_bags,
|
||||
wow_change_bags, avg_30d_bags, drawdown_from_52w_high_pct
|
||||
FROM serving.ice_warehouse_stocks
|
||||
ORDER BY report_date DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
)
|
||||
return rows[0] if rows else None
|
||||
|
||||
|
||||
async def get_country_comparison(
|
||||
commodity_code: int,
|
||||
country_codes: list[str],
|
||||
|
||||
@@ -198,6 +198,55 @@ async def commodity_positioning_latest(code: str):
|
||||
return jsonify({"cftc_commodity_code": code, "data": data})
|
||||
|
||||
|
||||
@bp.route("/commodities/<code>/prices")
|
||||
@api_key_required(scopes=["read"])
|
||||
async def commodity_prices(code: str):
|
||||
"""Daily OHLCV price time series for a commodity ticker (e.g. KC=F).
|
||||
|
||||
Query params:
|
||||
start_date — ISO date filter (YYYY-MM-DD)
|
||||
end_date — ISO date filter (YYYY-MM-DD)
|
||||
limit — max rows returned (default 504 ≈ 2 years, max 5000)
|
||||
"""
|
||||
limit = min(int(request.args.get("limit", 504)), 5000)
|
||||
start_date = request.args.get("start_date")
|
||||
end_date = request.args.get("end_date")
|
||||
|
||||
data = await analytics.get_price_time_series(code, limit=limit)
|
||||
|
||||
# Apply date filters in Python — simpler than adding optional params to the query
|
||||
if start_date:
|
||||
data = [r for r in data if str(r["trade_date"]) >= start_date]
|
||||
if end_date:
|
||||
data = [r for r in data if str(r["trade_date"]) <= end_date]
|
||||
|
||||
return jsonify({"ticker": code, "data": data})
|
||||
|
||||
|
||||
@bp.route("/commodities/<code>/stocks")
|
||||
@api_key_required(scopes=["read"])
|
||||
async def commodity_ice_stocks(code: str):
|
||||
"""ICE certified warehouse stock time series.
|
||||
|
||||
Query params:
|
||||
start_date — ISO date filter (YYYY-MM-DD)
|
||||
end_date — ISO date filter (YYYY-MM-DD)
|
||||
days — trailing days (default 365, max 3650)
|
||||
"""
|
||||
days = min(int(request.args.get("days", 365)), 3650)
|
||||
start_date = request.args.get("start_date")
|
||||
end_date = request.args.get("end_date")
|
||||
|
||||
data = await analytics.get_ice_stocks_trend(days=days)
|
||||
|
||||
if start_date:
|
||||
data = [r for r in data if str(r["report_date"]) >= start_date]
|
||||
if end_date:
|
||||
data = [r for r in data if str(r["report_date"]) <= end_date]
|
||||
|
||||
return jsonify({"commodity": code, "data": data})
|
||||
|
||||
|
||||
@bp.route("/commodities/<int:code>/metrics.csv")
|
||||
@api_key_required(scopes=["read"])
|
||||
async def commodity_metrics_csv(code: int):
|
||||
|
||||
@@ -111,10 +111,19 @@ async def index():
|
||||
analytics.get_production_yoy_by_country(analytics.COFFEE_COMMODITY_CODE, limit=15),
|
||||
analytics.get_cot_positioning_latest(analytics.COFFEE_CFTC_CODE),
|
||||
analytics.get_cot_index_trend(analytics.COFFEE_CFTC_CODE, weeks=104),
|
||||
analytics.get_price_time_series(analytics.COFFEE_TICKER, limit=504),
|
||||
analytics.get_price_latest(analytics.COFFEE_TICKER),
|
||||
analytics.get_ice_stocks_trend(days=365),
|
||||
analytics.get_ice_stocks_latest(),
|
||||
return_exceptions=True,
|
||||
)
|
||||
defaults = [[], [], [], [], [], None, []]
|
||||
time_series, top_producers, stu_trend, balance, yoy, cot_latest, cot_trend = [
|
||||
defaults = [[], [], [], [], [], None, [], [], None, [], None]
|
||||
(
|
||||
time_series, top_producers, stu_trend, balance, yoy,
|
||||
cot_latest, cot_trend,
|
||||
price_series, price_latest,
|
||||
ice_stocks_trend, ice_stocks_latest,
|
||||
) = [
|
||||
r if not isinstance(r, Exception) else (
|
||||
current_app.logger.warning("Analytics query %d failed: %s", i, r) or defaults[i]
|
||||
)
|
||||
@@ -123,6 +132,8 @@ async def index():
|
||||
else:
|
||||
time_series, top_producers, stu_trend, balance, yoy = [], [], [], [], []
|
||||
cot_latest, cot_trend = None, []
|
||||
price_series, price_latest = [], None
|
||||
ice_stocks_trend, ice_stocks_latest = [], None
|
||||
|
||||
# Latest global snapshot for key metric cards
|
||||
latest = time_series[-1] if time_series else {}
|
||||
@@ -148,6 +159,10 @@ async def index():
|
||||
yoy=yoy,
|
||||
cot_latest=cot_latest,
|
||||
cot_trend=cot_trend,
|
||||
price_series=price_series,
|
||||
price_latest=price_latest,
|
||||
ice_stocks_trend=ice_stocks_trend,
|
||||
ice_stocks_latest=ice_stocks_latest,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -147,6 +147,80 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Coffee Futures Price (KC=F) -->
|
||||
{% if price_latest %}
|
||||
<div class="chart-container mb-8">
|
||||
<h2 class="text-xl mb-1">Coffee C Futures Price — KC=F</h2>
|
||||
<p class="text-muted mb-4">ICE Coffee C Arabica · Daily close price · Source: Yahoo Finance</p>
|
||||
<div class="grid-4 mb-4">
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">Latest Close</div>
|
||||
<div class="metric-value">{{ "{:.2f}".format(price_latest.close) }}</div>
|
||||
<div class="metric-sub">¢/lb · as of {{ price_latest.trade_date }}</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">Daily Change</div>
|
||||
<div class="metric-value {% if price_latest.daily_return_pct and price_latest.daily_return_pct > 0 %}text-green{% elif price_latest.daily_return_pct and price_latest.daily_return_pct < 0 %}text-red{% endif %}">
|
||||
{% if price_latest.daily_return_pct is not none %}
|
||||
{{ "{:+.2f}%".format(price_latest.daily_return_pct) }}
|
||||
{% else %}--{% endif %}
|
||||
</div>
|
||||
<div class="metric-sub">vs previous close</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">52-Week High</div>
|
||||
<div class="metric-value">{{ "{:.2f}".format(price_latest.high_52w) }}</div>
|
||||
<div class="metric-sub">¢/lb</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">52-Week Low</div>
|
||||
<div class="metric-value">{{ "{:.2f}".format(price_latest.low_52w) }}</div>
|
||||
<div class="metric-sub">¢/lb</div>
|
||||
</div>
|
||||
</div>
|
||||
<canvas id="priceChart"></canvas>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- ICE Certified Warehouse Stocks -->
|
||||
{% if ice_stocks_latest %}
|
||||
<div class="chart-container mb-8">
|
||||
<h2 class="text-xl mb-1">ICE Certified Warehouse Stocks</h2>
|
||||
<p class="text-muted mb-4">Physical Arabica certified for delivery against ICE Coffee C futures · as of {{ ice_stocks_latest.report_date }}</p>
|
||||
<div class="grid-4 mb-4">
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">Certified Stocks</div>
|
||||
<div class="metric-value">{{ "{:,.0f}".format(ice_stocks_latest.total_certified_bags) }}</div>
|
||||
<div class="metric-sub">60-kg bags</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">Week-over-Week</div>
|
||||
<div class="metric-value {% if ice_stocks_latest.wow_change_bags and ice_stocks_latest.wow_change_bags > 0 %}text-green{% elif ice_stocks_latest.wow_change_bags and ice_stocks_latest.wow_change_bags < 0 %}text-red{% endif %}">
|
||||
{% if ice_stocks_latest.wow_change_bags is not none %}
|
||||
{{ "{:+,d}".format(ice_stocks_latest.wow_change_bags | int) }}
|
||||
{% else %}--{% endif %}
|
||||
</div>
|
||||
<div class="metric-sub">bags vs previous day</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">30-Day Average</div>
|
||||
<div class="metric-value">{{ "{:,.0f}".format(ice_stocks_latest.avg_30d_bags) }}</div>
|
||||
<div class="metric-sub">60-kg bags</div>
|
||||
</div>
|
||||
<div class="metric-card">
|
||||
<div class="metric-label">Drawdown from 52w High</div>
|
||||
<div class="metric-value {% if ice_stocks_latest.drawdown_from_52w_high_pct and ice_stocks_latest.drawdown_from_52w_high_pct < -10 %}text-red{% endif %}">
|
||||
{% if ice_stocks_latest.drawdown_from_52w_high_pct is not none %}
|
||||
{{ "{:.1f}%".format(ice_stocks_latest.drawdown_from_52w_high_pct) }}
|
||||
{% else %}--{% endif %}
|
||||
</div>
|
||||
<div class="metric-sub">below 52-week peak</div>
|
||||
</div>
|
||||
</div>
|
||||
<canvas id="iceStocksChart"></canvas>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Quick Actions -->
|
||||
<div class="grid-3">
|
||||
<a href="{{ url_for('dashboard.countries') }}" class="btn-outline text-center">Country Comparison</a>
|
||||
@@ -284,6 +358,97 @@ if (cotRaw && cotRaw.length > 0) {
|
||||
});
|
||||
}
|
||||
|
||||
// -- Coffee Prices Chart (close + 20d MA + 50d MA) --
|
||||
const priceRaw = {{ price_series | tojson }};
|
||||
if (priceRaw && priceRaw.length > 0) {
|
||||
const priceData = [...priceRaw].reverse(); // query returns DESC, chart needs ASC
|
||||
new Chart(document.getElementById('priceChart'), {
|
||||
type: 'line',
|
||||
data: {
|
||||
labels: priceData.map(r => r.trade_date),
|
||||
datasets: [
|
||||
{
|
||||
label: 'Close (¢/lb)',
|
||||
data: priceData.map(r => r.close),
|
||||
borderColor: CHART_COLORS.copper,
|
||||
backgroundColor: CHART_COLORS.copper + '18',
|
||||
fill: true,
|
||||
tension: 0.2,
|
||||
pointRadius: 0,
|
||||
yAxisID: 'y'
|
||||
},
|
||||
{
|
||||
label: '20d MA',
|
||||
data: priceData.map(r => r.sma_20d),
|
||||
borderColor: CHART_COLORS.beanGreen,
|
||||
borderDash: [4, 3],
|
||||
tension: 0.2,
|
||||
pointRadius: 0,
|
||||
yAxisID: 'y'
|
||||
},
|
||||
{
|
||||
label: '50d MA',
|
||||
data: priceData.map(r => r.sma_50d),
|
||||
borderColor: CHART_COLORS.roast,
|
||||
borderDash: [8, 4],
|
||||
tension: 0.2,
|
||||
pointRadius: 0,
|
||||
yAxisID: 'y'
|
||||
}
|
||||
]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
interaction: {mode: 'index', intersect: false},
|
||||
plugins: {legend: {position: 'bottom'}},
|
||||
scales: {
|
||||
x: {ticks: {maxTicksLimit: 12}},
|
||||
y: {title: {display: true, text: '¢/lb'}, beginAtZero: false}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -- ICE Warehouse Stocks Chart --
|
||||
const iceRaw = {{ ice_stocks_trend | tojson }};
|
||||
if (iceRaw && iceRaw.length > 0) {
|
||||
const iceData = [...iceRaw].reverse(); // query returns DESC, chart needs ASC
|
||||
new Chart(document.getElementById('iceStocksChart'), {
|
||||
type: 'line',
|
||||
data: {
|
||||
labels: iceData.map(r => r.report_date),
|
||||
datasets: [
|
||||
{
|
||||
label: 'Certified Stocks (bags)',
|
||||
data: iceData.map(r => r.total_certified_bags),
|
||||
borderColor: CHART_COLORS.roast,
|
||||
backgroundColor: CHART_COLORS.roast + '18',
|
||||
fill: true,
|
||||
tension: 0.2,
|
||||
pointRadius: 0
|
||||
},
|
||||
{
|
||||
label: '30d Average',
|
||||
data: iceData.map(r => r.avg_30d_bags),
|
||||
borderColor: CHART_COLORS.stone,
|
||||
borderDash: [5, 4],
|
||||
tension: 0.2,
|
||||
pointRadius: 0
|
||||
}
|
||||
]
|
||||
},
|
||||
options: {
|
||||
responsive: true,
|
||||
interaction: {mode: 'index', intersect: false},
|
||||
plugins: {legend: {position: 'bottom'}},
|
||||
scales: {
|
||||
x: {ticks: {maxTicksLimit: 12}},
|
||||
y: {title: {display: true, text: '60-kg bags'}, beginAtZero: false}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// -- Top Producers Horizontal Bar --
|
||||
const topData = {{ top_producers | tojson }};
|
||||
if (topData.length > 0) {
|
||||
|
||||
@@ -46,6 +46,12 @@ async def about():
|
||||
return await render_template("about.html")
|
||||
|
||||
|
||||
@bp.route("/methodology")
|
||||
async def methodology():
|
||||
"""Data methodology page — explains all data sources."""
|
||||
return await render_template("methodology.html")
|
||||
|
||||
|
||||
@bp.route("/feedback", methods=["POST"])
|
||||
@csrf_protect
|
||||
async def feedback():
|
||||
|
||||
230
web/src/beanflows/public/templates/methodology.html
Normal file
230
web/src/beanflows/public/templates/methodology.html
Normal file
@@ -0,0 +1,230 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Data Methodology — {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<main>
|
||||
<!-- Hero -->
|
||||
<section class="hero">
|
||||
<div class="container-page">
|
||||
<h1 class="heading-display">Data Methodology</h1>
|
||||
<p>Every number on BeanFlows has a source, a frequency, and a known limitation. Here's exactly where the data comes from and how we process it.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- Table of Contents -->
|
||||
<section class="container-page py-8 max-w-3xl mx-auto">
|
||||
<nav class="bg-latte rounded-lg p-6 mb-12">
|
||||
<h2 class="text-sm font-semibold text-espresso uppercase tracking-wide mb-3">On this page</h2>
|
||||
<ul class="list-none p-0 space-y-1.5 text-sm">
|
||||
<li><a href="#usda-psd" class="text-copper">USDA Production, Supply & Distribution</a></li>
|
||||
<li><a href="#cftc-cot" class="text-copper">CFTC Commitments of Traders</a></li>
|
||||
<li><a href="#kc-price" class="text-copper">Coffee Futures Price (KC=F)</a></li>
|
||||
<li><a href="#ice-stocks" class="text-copper">ICE Certified Warehouse Stocks</a></li>
|
||||
<li><a href="#data-quality" class="text-copper">Data Quality</a></li>
|
||||
<li><a href="#update-schedule" class="text-copper">Update Schedule</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
<!-- USDA PSD -->
|
||||
<section id="usda-psd" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">USDA Production, Supply & Distribution</h2>
|
||||
<p class="text-stone mb-4">The USDA's <strong>Production, Supply and Distribution (PSD) Online</strong> database is the definitive public source for agricultural commodity supply and demand balances. It is maintained by the USDA Foreign Agricultural Service and covers 160+ countries and 50+ commodities going back to the 1960s for some crops.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">What we use</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>Commodity:</strong> Coffee, Green — USDA commodity code <code class="bg-parchment px-1 rounded">0711100</code></li>
|
||||
<li><strong>Coverage:</strong> 2006–present (monthly updates)</li>
|
||||
<li><strong>Geography:</strong> Country-level + world aggregate</li>
|
||||
<li><strong>Source URL:</strong> <code class="bg-parchment px-1 rounded">apps.fas.usda.gov/psdonlineapi</code></li>
|
||||
</ul>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Metrics</h3>
|
||||
<div class="overflow-x-auto mb-4">
|
||||
<table class="table text-sm">
|
||||
<thead>
|
||||
<tr><th>Metric</th><th>Definition</th><th>Unit</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Production</td><td>Harvested green coffee output</td><td>1,000 × 60-kg bags</td></tr>
|
||||
<tr><td>Imports</td><td>Physical coffee imported into country</td><td>1,000 × 60-kg bags</td></tr>
|
||||
<tr><td>Exports</td><td>Physical coffee exported from country</td><td>1,000 × 60-kg bags</td></tr>
|
||||
<tr><td>Domestic Consumption</td><td>Coffee consumed within country</td><td>1,000 × 60-kg bags</td></tr>
|
||||
<tr><td>Ending Stocks</td><td>Carry-over stocks at marketing year end</td><td>1,000 × 60-kg bags</td></tr>
|
||||
<tr><td>Stock-to-Use Ratio</td><td>Ending stocks ÷ consumption × 100</td><td>%</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Release schedule</h3>
|
||||
<p class="text-stone mb-4">USDA publishes PSD updates monthly, typically in the second week of the month as part of the <em>World Agricultural Supply and Demand Estimates (WASDE)</em> report. Our pipeline checks for updates daily and downloads new data when the file hash changes.</p>
|
||||
|
||||
<div class="bg-parchment rounded p-4 text-sm text-stone">
|
||||
<strong>Note on marketing years:</strong> Coffee marketing years vary by origin country. Brazil's marketing year runs April–March; Colombia's runs October–September. USDA normalizes all data to a common market year basis for the global aggregate.
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- CFTC COT -->
|
||||
<section id="cftc-cot" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">CFTC Commitments of Traders</h2>
|
||||
<p class="text-stone mb-4">The <strong>Commitments of Traders (COT)</strong> report is published weekly by the U.S. Commodity Futures Trading Commission (CFTC). It shows the net positions of large traders in regulated futures markets. It is the primary public indicator of speculative positioning in agricultural commodities.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">What we use</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>Report type:</strong> Disaggregated Futures-Only</li>
|
||||
<li><strong>Commodity:</strong> Coffee C — CFTC code <code class="bg-parchment px-1 rounded">083</code></li>
|
||||
<li><strong>Snapshot date:</strong> Every Tuesday close-of-business</li>
|
||||
<li><strong>Release date:</strong> The following Friday at 3:30 PM ET</li>
|
||||
<li><strong>Coverage:</strong> June 2006–present</li>
|
||||
<li><strong>Source:</strong> <code class="bg-parchment px-1 rounded">cftc.gov/files/dea/history/fut_disagg_txt_{year}.zip</code></li>
|
||||
</ul>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Trader categories</h3>
|
||||
<div class="overflow-x-auto mb-4">
|
||||
<table class="table text-sm">
|
||||
<thead>
|
||||
<tr><th>Category</th><th>Who they are</th><th>What to watch</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr><td>Managed Money</td><td>Hedge funds, CTAs, algorithmic traders</td><td>Primary speculative signal — net long = bullish</td></tr>
|
||||
<tr><td>Producer / Merchant</td><td>Coffee exporters, processors, roasters</td><td>Commercial hedgers — usually net short</td></tr>
|
||||
<tr><td>Swap Dealers</td><td>Banks providing OTC commodity exposure</td><td>Index fund replication — less directional signal</td></tr>
|
||||
<tr><td>Other Reportables</td><td>Large traders not fitting other categories</td><td>Mixed motivations</td></tr>
|
||||
<tr><td>Non-Reportable</td><td>Small speculators below CFTC threshold</td><td>Retail sentiment proxy</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">COT Index</h3>
|
||||
<p class="text-stone mb-4">The <strong>COT Index</strong> normalizes the managed money net position to a 0–100 scale over a trailing window (we publish both 26-week and 52-week). It is calculated as:</p>
|
||||
<div class="bg-parchment rounded p-4 text-sm font-mono mb-4">
|
||||
COT Index = (current net − min over window) ÷ (max over window − min over window) × 100
|
||||
</div>
|
||||
<p class="text-stone mb-4">A reading near 0 indicates managed money is at its most bearish extreme over the window. A reading near 100 indicates maximum bullish positioning. Think of it as an RSI for speculative positioning.</p>
|
||||
</section>
|
||||
|
||||
<!-- KC=F Price -->
|
||||
<section id="kc-price" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">Coffee Futures Price (KC=F)</h2>
|
||||
<p class="text-stone mb-4">The <strong>Coffee C contract</strong> (ticker: KC=F) is the global benchmark price for Arabica coffee, traded on ICE Futures U.S. (formerly New York Board of Trade). Each contract covers 37,500 lbs of green coffee. Price is quoted in US cents per pound (¢/lb).</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">What we use</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>Ticker:</strong> KC=F (front-month continuous contract)</li>
|
||||
<li><strong>Data:</strong> Daily OHLCV (Open, High, Low, Close, Adjusted Close, Volume)</li>
|
||||
<li><strong>Source:</strong> Yahoo Finance via yfinance</li>
|
||||
<li><strong>Coverage:</strong> 1971–present</li>
|
||||
<li><strong>Delay:</strong> ~15-minute delayed (Yahoo Finance standard)</li>
|
||||
</ul>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Derived metrics</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>Daily Return %:</strong> (close − prev close) ÷ prev close × 100</li>
|
||||
<li><strong>20-day SMA:</strong> Simple moving average of the last 20 trading days</li>
|
||||
<li><strong>50-day SMA:</strong> Simple moving average of the last 50 trading days</li>
|
||||
<li><strong>52-week High/Low:</strong> Rolling high/low over the trailing ~252 trading days</li>
|
||||
</ul>
|
||||
|
||||
<div class="bg-parchment rounded p-4 text-sm text-stone">
|
||||
<strong>Front-month continuity:</strong> KC=F is the continuous front-month contract. At roll dates, there is a price gap between expiring and next-month contracts. Adjusted Close accounts for roll adjustments. We use raw Close for current price display and Adjusted Close for historical return calculations.
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<!-- ICE Warehouse Stocks -->
|
||||
<section id="ice-stocks" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">ICE Certified Warehouse Stocks</h2>
|
||||
<p class="text-stone mb-4">ICE Futures U.S. publishes daily reports of <strong>certified warehouse stocks</strong> for Coffee C. These are physical bags of Arabica coffee that have been graded and stamped as meeting ICE delivery specifications — making them eligible for delivery against a futures contract at expiration.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Why certified stocks matter</h3>
|
||||
<p class="text-stone mb-4">Certified stocks are the physical backing of the futures market. When certified stocks fall sharply while open interest is high, shorts cannot easily deliver physical coffee — this creates a <strong>squeeze dynamic</strong> that can drive explosive price rallies. Tracking certified stocks alongside positioning data is essential for understanding delivery risk.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">What we track</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>Total Certified Bags:</strong> All ICE-approved warehouse receipts (60-kg bags)</li>
|
||||
<li><strong>Pending Grading:</strong> Coffee being evaluated for certification (may join or exit certified stock)</li>
|
||||
<li><strong>Source:</strong> ICE Report Center (daily publication)</li>
|
||||
<li><strong>Update frequency:</strong> Daily, after market close</li>
|
||||
</ul>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Derived metrics</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li><strong>WoW Change:</strong> Day-over-day change in certified bags</li>
|
||||
<li><strong>30-Day Average:</strong> Smoothed trend removing daily noise</li>
|
||||
<li><strong>52-Week High:</strong> Rolling maximum over trailing 365 days</li>
|
||||
<li><strong>Drawdown from 52w High:</strong> % decline from peak — measures how far stocks have been drawn down</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<!-- Data Quality -->
|
||||
<section id="data-quality" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">Data Quality</h2>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Immutable raw layer</h3>
|
||||
<p class="text-stone mb-4">All source files are stored as immutable gzip-compressed CSVs in a content-addressed landing directory. Files are never modified in place — a new download creates a new file only if the content hash differs from what is already stored. This means the full history of source corrections is preserved.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Incremental models with deduplication</h3>
|
||||
<p class="text-stone mb-4">Foundation models are incremental and deduplicate via a hash key computed from business grain columns and key metrics. If a source issues a correction (CFTC re-states a COT figure, USDA revises a production estimate), the corrected row produces a different hash and is ingested on the next pipeline run. Serving models select the most recent revision per grain.</p>
|
||||
|
||||
<h3 class="text-lg font-semibold mb-2 mt-6">Known limitations</h3>
|
||||
<ul class="list-disc list-inside text-stone space-y-1.5 mb-4">
|
||||
<li>USDA PSD revisions can extend back multiple years — always treat historical figures as estimates subject to revision.</li>
|
||||
<li>Yahoo Finance prices carry a ~15-minute delay and may have minor adjustments at roll dates.</li>
|
||||
<li>COT data reflects Tuesday close positions; the market may move significantly before Friday's release.</li>
|
||||
<li>ICE warehouse stocks do not distinguish between origins — certified stock drawdowns at specific ports are not visible here.</li>
|
||||
</ul>
|
||||
</section>
|
||||
|
||||
<!-- Update Schedule -->
|
||||
<section id="update-schedule" class="mb-12">
|
||||
<h2 class="text-2xl mb-4">Update Schedule</h2>
|
||||
<div class="overflow-x-auto">
|
||||
<table class="table text-sm">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Source</th>
|
||||
<th>Frequency</th>
|
||||
<th>Typical freshness</th>
|
||||
<th>Notes</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>USDA PSD</td>
|
||||
<td>Monthly</td>
|
||||
<td>~2nd week of month</td>
|
||||
<td>WASDE release day; daily pipeline detects hash change</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>CFTC COT</td>
|
||||
<td>Weekly (Friday)</td>
|
||||
<td>Friday 3:30 PM ET</td>
|
||||
<td>Reflects prior Tuesday positions</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>KC=F Price</td>
|
||||
<td>Daily</td>
|
||||
<td>Next morning</td>
|
||||
<td>Yahoo Finance ~15 min delayed; previous day close available next morning</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>ICE Warehouse Stocks</td>
|
||||
<td>Daily</td>
|
||||
<td>After market close</td>
|
||||
<td>ICE publishes report center data daily after the close</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<p class="text-stone mt-4 text-sm">Our pipeline runs continuously. Data is re-checked daily and new data is loaded within hours of publication. The dashboard shows the freshness date on each data section.</p>
|
||||
</section>
|
||||
|
||||
<!-- Questions -->
|
||||
<section class="bg-latte rounded-lg p-6">
|
||||
<h2 class="text-xl mb-2">Questions about the data?</h2>
|
||||
<p class="text-stone text-sm mb-4">If you spot an inconsistency or want to understand how a specific metric is calculated, use the feedback button on any page or reach out directly.</p>
|
||||
<a href="{{ url_for('auth.signup') }}" class="btn">Try BeanFlows free</a>
|
||||
</section>
|
||||
</section>
|
||||
</main>
|
||||
{% endblock %}
|
||||
@@ -77,6 +77,7 @@
|
||||
<li><a href="{{ url_for('public.features') }}">Features</a></li>
|
||||
<li><a href="{{ url_for('billing.pricing') }}">Pricing</a></li>
|
||||
<li><a href="{{ url_for('public.about') }}">About</a></li>
|
||||
<li><a href="{{ url_for('public.methodology') }}">Methodology</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
|
||||
Reference in New Issue
Block a user