fix(extract): default worker count to 200 when proxies configured
Previously fell back to len(tiers[0]) (e.g. 10 for Webshare) when PROXY_CONCURRENCY was unset. Default is now MAX_PROXY_CONCURRENCY=200 so single-URL rotating proxies (DC/residential) run at full concurrency without needing an explicit env var. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -52,8 +52,7 @@ MAX_VENUES_PER_RUN = 20_000
|
|||||||
MAX_RETRIES_PER_VENUE = 2
|
MAX_RETRIES_PER_VENUE = 2
|
||||||
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
|
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
|
||||||
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
||||||
# Override worker count — useful when tier 0 is a single rotating endpoint (DC/residential)
|
# Worker count: defaults to MAX_PROXY_CONCURRENCY (200). Override via PROXY_CONCURRENCY env var.
|
||||||
# that supports many concurrent connections. Defaults to len(tiers[0]) when unset.
|
|
||||||
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
|
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
|
||||||
MAX_PROXY_CONCURRENCY = 200
|
MAX_PROXY_CONCURRENCY = 200
|
||||||
|
|
||||||
@@ -300,8 +299,7 @@ def extract(
|
|||||||
|
|
||||||
# Set up tiered proxy cycler with circuit breaker
|
# Set up tiered proxy cycler with circuit breaker
|
||||||
tiers = load_proxy_tiers()
|
tiers = load_proxy_tiers()
|
||||||
default_workers = len(tiers[0]) if tiers else 1
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
||||||
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
|
|
||||||
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
||||||
|
|
||||||
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
|
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
@@ -491,8 +489,7 @@ def extract_recheck(
|
|||||||
|
|
||||||
# Set up tiered proxy cycler with circuit breaker
|
# Set up tiered proxy cycler with circuit breaker
|
||||||
tiers = load_proxy_tiers()
|
tiers = load_proxy_tiers()
|
||||||
default_workers = len(tiers[0]) if tiers else 1
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
||||||
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
|
|
||||||
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
||||||
|
|
||||||
if worker_count > 1 and len(venues_to_recheck) > 10:
|
if worker_count > 1 and len(venues_to_recheck) > 10:
|
||||||
|
|||||||
Reference in New Issue
Block a user