diff --git a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py index 9ca3694..f1e68a6 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py @@ -52,8 +52,7 @@ MAX_VENUES_PER_RUN = 20_000 MAX_RETRIES_PER_VENUE = 2 RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30")) CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10") -# Override worker count — useful when tier 0 is a single rotating endpoint (DC/residential) -# that supports many concurrent connections. Defaults to len(tiers[0]) when unset. +# Worker count: defaults to MAX_PROXY_CONCURRENCY (200). Override via PROXY_CONCURRENCY env var. _PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip() MAX_PROXY_CONCURRENCY = 200 @@ -300,8 +299,7 @@ def extract( # Set up tiered proxy cycler with circuit breaker tiers = load_proxy_tiers() - default_workers = len(tiers[0]) if tiers else 1 - worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers + worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1) cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD) start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S") @@ -491,8 +489,7 @@ def extract_recheck( # Set up tiered proxy cycler with circuit breaker tiers = load_proxy_tiers() - default_workers = len(tiers[0]) if tiers else 1 - worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers + worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1) cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD) if worker_count > 1 and len(venues_to_recheck) > 10: