diff --git a/extract/padelnomics_extract/src/padelnomics_extract/proxy.py b/extract/padelnomics_extract/src/padelnomics_extract/proxy.py index 9904c80..394d2f1 100644 --- a/extract/padelnomics_extract/src/padelnomics_extract/proxy.py +++ b/extract/padelnomics_extract/src/padelnomics_extract/proxy.py @@ -3,10 +3,9 @@ Proxies are configured via environment variables. When unset, all functions return None/no-op — extractors fall back to direct requests. -Three-tier escalation: free → datacenter → residential. - Tier 1 (free): WEBSHARE_DOWNLOAD_URL — auto-fetched from Webshare API - Tier 2 (datacenter): PROXY_URLS_DATACENTER — comma-separated paid DC proxies - Tier 3 (residential): PROXY_URLS_RESIDENTIAL — comma-separated paid residential proxies +Two-tier escalation: datacenter → residential. + Tier 1 (datacenter): PROXY_URLS_DATACENTER — comma-separated paid DC proxies + Tier 2 (residential): PROXY_URLS_RESIDENTIAL — comma-separated paid residential proxies Tiered circuit breaker: Active tier is used until consecutive failures >= threshold, then escalates @@ -69,22 +68,15 @@ def fetch_webshare_proxies(download_url: str, max_proxies: int = MAX_WEBSHARE_PR def load_proxy_tiers() -> list[list[str]]: - """Assemble proxy tiers in escalation order: free → datacenter → residential. + """Assemble proxy tiers in escalation order: datacenter → residential. - Tier 1 (free): fetched from WEBSHARE_DOWNLOAD_URL if set. - Tier 2 (datacenter): PROXY_URLS_DATACENTER (comma-separated). - Tier 3 (residential): PROXY_URLS_RESIDENTIAL (comma-separated). + Tier 1 (datacenter): PROXY_URLS_DATACENTER (comma-separated). + Tier 2 (residential): PROXY_URLS_RESIDENTIAL (comma-separated). Empty tiers are omitted. Returns [] if no proxies configured anywhere. """ tiers: list[list[str]] = [] - webshare_url = os.environ.get("WEBSHARE_DOWNLOAD_URL", "").strip() - if webshare_url: - free_proxies = fetch_webshare_proxies(webshare_url) - if free_proxies: - tiers.append(free_proxies) - for var in ("PROXY_URLS_DATACENTER", "PROXY_URLS_RESIDENTIAL"): raw = os.environ.get(var, "") urls = [u.strip() for u in raw.split(",") if u.strip()]