|
|
|
@@ -52,8 +52,7 @@ MAX_VENUES_PER_RUN = 20_000
|
|
|
|
MAX_RETRIES_PER_VENUE = 2
|
|
|
|
MAX_RETRIES_PER_VENUE = 2
|
|
|
|
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
|
|
|
|
RECHECK_WINDOW_MINUTES = int(os.environ.get("RECHECK_WINDOW_MINUTES", "30"))
|
|
|
|
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
|
|
|
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
|
|
|
# Override worker count — useful when tier 0 is a single rotating endpoint (DC/residential)
|
|
|
|
# Worker count: defaults to MAX_PROXY_CONCURRENCY (200). Override via PROXY_CONCURRENCY env var.
|
|
|
|
# that supports many concurrent connections. Defaults to len(tiers[0]) when unset.
|
|
|
|
|
|
|
|
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
|
|
|
|
_PROXY_CONCURRENCY = os.environ.get("PROXY_CONCURRENCY", "").strip()
|
|
|
|
MAX_PROXY_CONCURRENCY = 200
|
|
|
|
MAX_PROXY_CONCURRENCY = 200
|
|
|
|
|
|
|
|
|
|
|
|
@@ -300,8 +299,7 @@ def extract(
|
|
|
|
|
|
|
|
|
|
|
|
# Set up tiered proxy cycler with circuit breaker
|
|
|
|
# Set up tiered proxy cycler with circuit breaker
|
|
|
|
tiers = load_proxy_tiers()
|
|
|
|
tiers = load_proxy_tiers()
|
|
|
|
default_workers = len(tiers[0]) if tiers else 1
|
|
|
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
|
|
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
|
|
|
|
|
|
|
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
|
|
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
|
|
|
|
|
|
|
|
|
|
|
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
|
|
|
|
start_min_str = start_min.strftime("%Y-%m-%dT%H:%M:%S")
|
|
|
|
@@ -491,8 +489,7 @@ def extract_recheck(
|
|
|
|
|
|
|
|
|
|
|
|
# Set up tiered proxy cycler with circuit breaker
|
|
|
|
# Set up tiered proxy cycler with circuit breaker
|
|
|
|
tiers = load_proxy_tiers()
|
|
|
|
tiers = load_proxy_tiers()
|
|
|
|
default_workers = len(tiers[0]) if tiers else 1
|
|
|
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else (MAX_PROXY_CONCURRENCY if tiers else 1)
|
|
|
|
worker_count = min(int(_PROXY_CONCURRENCY), MAX_PROXY_CONCURRENCY) if _PROXY_CONCURRENCY else default_workers
|
|
|
|
|
|
|
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
|
|
|
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD)
|
|
|
|
|
|
|
|
|
|
|
|
if worker_count > 1 and len(venues_to_recheck) > 10:
|
|
|
|
if worker_count > 1 and len(venues_to_recheck) > 10:
|
|
|
|
|