fix(extract): default worker count to 200 when proxies configured
All checks were successful
CI / test (push) Successful in 49s
CI / tag (push) Successful in 3s

Previously fell back to len(tiers[0]) (e.g. 10 for Webshare) when
PROXY_CONCURRENCY was unset. Default is now MAX_PROXY_CONCURRENCY=200
so single-URL rotating proxies (DC/residential) run at full concurrency
without needing an explicit env var.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-28 18:06:55 +01:00
parent 710624f417
commit c1cdeec6be

View File

@@ -52,8 +52,7 @@ MAX_VENUES_PER_RUN = 20_000
MAX_RETRIES_PER_VENUE = 2
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
# Override worker count — useful when tier 0 is a single rotating endpoint (DC/residential)
# that supports many concurrent connections. Defaults to len(tiers[0]) when unset.
# Worker count: defaults to MAX_PROXY_CONCURRENCY (200). Override via PROXY_CONCURRENCY env var.
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
MAX_PROXY_CONCURRENCY = 200
@@ -300,8 +299,7 @@ def extract(
# Set up tiered proxy cycler with circuit breaker
tiers = load_proxy_tiers()
default_workers = len(tiers[0]) if tiers else 1
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
@@ -491,8 +489,7 @@ def extract_recheck(
# Set up tiered proxy cycler with circuit breaker
tiers = load_proxy_tiers()
default_workers = len(tiers[0]) if tiers else 1
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
if worker_count > 1 and len(venues_to_recheck) > 10: