Compare commits
10 Commits
v202603010
...
v202603011
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
169092c8ea | ||
|
|
6ae16f6c1f | ||
|
|
8b33daa4f3 | ||
|
|
a898a06575 | ||
|
|
219554b7cb | ||
|
|
1aedf78ec6 | ||
|
|
8f2ffd432b | ||
|
|
c9dec066f7 | ||
|
|
fea4f85da3 | ||
|
|
2590020014 |
@@ -32,10 +32,6 @@ LITESTREAM_R2_BUCKET=ENC[AES256_GCM,data:pAqSkoJzsw==,iv:5J1Js7JPH/j1oTmEBdNXjwd
|
||||
LITESTREAM_R2_ACCESS_KEY_ID=ENC[AES256_GCM,data:e89yGzousImmdO7WVqmRWLJNejDFH5eTaw7G74CyZSw=,iv:bR1jgqSzJlxPA8LMMg2Mc1Lnp01iZgaqa9dgAoV0RpY=,tag:m92xzCP0qaP2onK7ChwA1Q==,type:str]
|
||||
LITESTREAM_R2_SECRET_ACCESS_KEY=ENC[AES256_GCM,data:yzXeb8c/Y0d+EluY7g6buo4BnFvBDEVblOi7doNgOp3siLvfMmPkjdRLqZzA14ET6CW5vef9i51yijPYwuhnbw==,iv:IYQRZ8SsquUQpsHH3X/iovz2wFskR4iHyvr0arY7Ag4=,tag:9G5lpHloacjQbEhSk9T2pw==,type:str]
|
||||
LITESTREAM_R2_ENDPOINT=ENC[AES256_GCM,data:qqDLfsPeiWOfwtgpZeItypnYNmIOD07fV0IPlZfphhUFeY0Z/BRpkVXA7nfqQ2M6PmcYKVIlBiBY,iv:hsEBxxv1+fvUY4v3nhBP8puKlu216eAGZDUNBAjibas=,tag:MvnsJ8W3oSrv4ZrWW/p+dg==,type:str]
|
||||
#ENC[AES256_GCM,data:YGV2exKdGOUkblNZZos=,iv:NuabFM/gNHIzYmDMRZ2tglFYdMPVFuHFGd+AAWvvu6Q=,tag:gZRoNNEmjL9v3nC8j9YkHw==,type:comment]
|
||||
DUCKDB_PATH=ENC[AES256_GCM,data:GgOEQ5B1KeQrVavhoMU/JGXcVu3H,iv:XY8JiaosxaUDv5PwizrZFWuNKMSOeuE3cfVyp51r++8=,tag:RnoDE5+7WQolFLejfRZ//w==,type:str]
|
||||
SERVING_DUCKDB_PATH=ENC[AES256_GCM,data:U2X9KmlgnWXM9uCfhHCJ03HMGCLm,iv:KHHdBTq+ct4AG7Jt4zLog/5jbDC7LvHA6KzWNTDS/Yw=,tag:m5uIG/bS4vaBooSYoYa6SA==,type:str]
|
||||
LANDING_DIR=ENC[AES256_GCM,data:NkEmV8LOwEiN9Sal,iv:mQHBVT6lNoEEEVbl7a5bNN5qoF/LvTyWXQvvkv/z/B0=,tag:IgA5A1nfF91fOBdYxEN71g==,type:str]
|
||||
#ENC[AES256_GCM,data:jvZYm7ceM4jtNRg=,iv:nuv65SDTZiaVukVZ40seBZevpqP8uiKCgJyQcIrY524=,tag:cq6gB3vmJzJWIXCLHaIc9g==,type:comment]
|
||||
REPO_DIR=ENC[AES256_GCM,data:ae8i6PpGFaiYFA/gGIhczg==,iv:nmsIRMPJYocIO6Z2Gz4OIzAOvSpdgDYmUaIr2hInFo0=,tag:EmAYG5NujnHg8lPaO/uAnQ==,type:str]
|
||||
WORKFLOWS_PATH=ENC[AES256_GCM,data:sGU4l68Pbb1thsPyG104mWXWD+zJGTIcR/TqVbPmew==,iv:+xhGkX+ep4kFEAU65ELdDrfjrl/WyuaOi35JI3OB/zM=,tag:brauZhFq8twPXmvhZKjhDQ==,type:str]
|
||||
@@ -62,7 +58,7 @@ sops_age__list_1__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb2
|
||||
sops_age__list_1__map_recipient=age1wjepykv3glvsrtegu25tevg7vyn3ngpl607u3yjc9ucay04s045s796msw
|
||||
sops_age__list_2__map_enc=-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBFeHhaOURNZnRVMEwxNThu\nUjF4Q0kwUXhTUE1QSzZJbmpubnh3RnpQTmdvCjRmWWxpNkxFUmVGb3NRbnlydW5O\nWEg3ZXJQTU4vcndzS2pUQXY3Q0ttYjAKLS0tIE9IRFJ1c2ZxbGVHa2xTL0swbGN1\nTzgwMThPUDRFTWhuZHJjZUYxOTZrU00KY62qrNBCUQYxwcLMXFEnLkwncxq3BPJB\nKm4NzeHBU87XmPWVrgrKuf+PH1mxJlBsl7Hev8xBTy7l6feiZjLIvQ==\n-----END AGE ENCRYPTED FILE-----\n
|
||||
sops_age__list_2__map_recipient=age1c783ym2q5x9tv7py5d28uc4k44aguudjn03g97l9nzs00dd9tsrqum8h4d
|
||||
sops_lastmodified=2026-02-28T17:03:44Z
|
||||
sops_mac=ENC[AES256_GCM,data:IQ9jpRxVUssaMK+qFcM3nPdzXHkiqp6E+DhEey1TfqUu5GCBNsWeVy9m9A6p9RWhu2NtJV7aKdUeqneuMtD1q5Tnm6L96zuyot2ESnx2N2ssD9ilrDauQxoBJcrJVnGV61CgaCz9458w8BuVUZydn3MoHeRaU7bOBBzQlTI6vZk=,iv:qHqdt3av/KZRQHr/OS/9KdAJUgKlKEDgan7qI3Zzkck=,tag:fOvdO9iRTTF1Siobu2mLqg==,type:str]
|
||||
sops_lastmodified=2026-03-01T00:26:54Z
|
||||
sops_mac=ENC[AES256_GCM,data:DdcABGVm9KbAcFrF0iuZlAaugsouNs7Hon2mZISaHs15/2H/Pd9FniXW3KeQ0+/NdZFQkz/h3i3bVFampcpFS1AxuOE5+1/IgWn8sKtaqPc7E9y8g6lxMnwTkUX2z+n/Q2nR8KAcO9IyE0GNjIluMWkxPWQuLzlRYDOjRN4/1e0=,iv:rm+6lXhYu6VUmrdCIrU0BRN2/ooa21Fw1ESWxr7vATg=,tag:GZmLLZf/LQaNeNNAAEg5bA==,type:str]
|
||||
sops_unencrypted_suffix=_unencrypted
|
||||
sops_version=3.12.1
|
||||
|
||||
@@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
- **Per-proxy dead tracking in tiered cycler** — `make_tiered_cycler` now accepts a `proxy_failure_limit` parameter (default 3). Individual proxies that hit the limit are marked dead and permanently skipped by `next_proxy()`. If all proxies in the active tier are dead, `next_proxy()` auto-escalates to the next tier without needing the tier-level threshold. `record_failure(proxy_url)` and `record_success(proxy_url)` accept an optional `proxy_url` argument for per-proxy tracking; callers without `proxy_url` are fully backward-compatible. New `dead_proxy_count()` callable exposed for monitoring.
|
||||
- `extract/padelnomics_extract/src/padelnomics_extract/proxy.py`: added per-proxy state (`proxy_failure_counts`, `dead_proxies`), updated `next_proxy`/`record_failure`/`record_success`, added `dead_proxy_count`
|
||||
- `extract/padelnomics_extract/src/padelnomics_extract/playtomic_tenants.py`: `_fetch_page_via_cycler` passes `proxy_url` to `record_success`/`record_failure`
|
||||
- `extract/padelnomics_extract/src/padelnomics_extract/playtomic_availability.py`: `_worker` returns `(proxy_url, result)` tuple; serial loops in `extract` and `extract_recheck` capture `proxy_url` before passing to `record_success`/`record_failure`
|
||||
- `web/tests/test_supervisor.py`: 11 new tests in `TestTieredCyclerDeadProxyTracking` covering dead proxy skipping, auto-escalation, `dead_proxy_count`, backward compat, and thread safety
|
||||
|
||||
### Added
|
||||
- **Affiliate programs management** — centralised retailer config (`affiliate_programs` table) with URL template + tracking tag + commission %. Products now use a program dropdown + product identifier (e.g. ASIN) instead of manually baking full URLs. URL is assembled at redirect time via `build_affiliate_url()`, so changing a tag propagates instantly to all products. Legacy products (baked `affiliate_url`) continue to work via fallback. Amazon OneLink configured in the Associates dashboard handles geo-redirect to local marketplaces — no per-country programs needed.
|
||||
- `web/src/padelnomics/migrations/versions/0027_affiliate_programs.py`: `affiliate_programs` table, nullable `program_id` + `product_identifier` columns on `affiliate_products`, seeds "Amazon" program, backfills ASINs from existing URLs
|
||||
@@ -17,6 +24,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
||||
- 15 new tests in `web/tests/test_affiliate.py` (41 total)
|
||||
|
||||
### Fixed
|
||||
- **Data Platform admin view showing stale/zero row counts** — Docker web containers were mounting `/opt/padelnomics/data` (stale copy) instead of `/data/padelnomics` (live supervisor output). Fixed volume mount in all 6 containers (blue/green × app/worker/scheduler) and added `LANDING_DIR=/app/data/pipeline/landing` so extraction stats and landing zone file stats are visible to the web app.
|
||||
- **`workflows.toml` never found in dev** — `_REPO_ROOT` in `pipeline_routes.py` used `parents[5]` (one level too far up) instead of `parents[4]`. Workflow schedules now display correctly on the pipeline overview tab in dev.
|
||||
- **Article preview frontmatter bug** — `_rebuild_article()` in `admin/routes.py` now strips YAML frontmatter before passing markdown to `mistune.html()`, preventing raw `title:`, `slug:` etc. from appearing as visible text in article previews.
|
||||
|
||||
### Added
|
||||
|
||||
@@ -1,67 +1,67 @@
|
||||
---
|
||||
title: "Padelschläger für Fortgeschrittene: Die besten Modelle 2026"
|
||||
slug: padelschlaeger-fortgeschrittene-de
|
||||
language: de
|
||||
url_path: /padelschlaeger-fortgeschrittene
|
||||
meta_description: "Die besten Padelschläger für fortgeschrittene und ambitionierte Spieler. High-End-Modelle mit Carbon, Kevlar und ausgereifter Schlagbalance für Spieler ab 3.0."
|
||||
---
|
||||
|
||||
# Padelschläger für Fortgeschrittene: Die besten Modelle 2026
|
||||
|
||||
<!-- TODO: Einleitung — wann ist man bereit für einen Fortgeschrittenenschläger? -->
|
||||
|
||||
Ab einem gewissen Spielniveau lohnt sich der Griff zu einem anspruchsvolleren Schläger. Wer sauber trifft, kann von einer härteren Bespannung und einer präziseren Balance profitieren. Die Schläger in dieser Liste sind kein Selbstläufer — aber in den richtigen Händen ein echter Vorteil.
|
||||
|
||||
---
|
||||
|
||||
## Top-Schläger für Fortgeschrittene im Überblick
|
||||
|
||||
[product-group:racket]
|
||||
|
||||
---
|
||||
|
||||
## Carbon, Kevlar, Glasfaser: Was steckt drin?
|
||||
|
||||
<!-- TODO: Materialüberblick mit Vor- und Nachteilen -->
|
||||
|
||||
### Carbon-Rahmen
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
### 3K vs. 12K Carbon
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
### Kevlar-Einlagen
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
---
|
||||
|
||||
## Testbericht: Unser Empfehlungsschläger
|
||||
|
||||
[product:platzhalter-fortgeschrittene-schlaeger-amazon]
|
||||
|
||||
<!-- TODO: Praxistest -->
|
||||
|
||||
---
|
||||
|
||||
## Häufige Fragen
|
||||
|
||||
<details>
|
||||
<summary>Ab welcher Spielstufe lohnt sich ein Fortgeschrittenenschläger?</summary>
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
Wer regelmäßig spielt (2–3 Mal pro Woche), seit mindestens einem Jahr dabei ist und an Taktik und Technik arbeitet, kann von einem hochwertigeren Schläger profitieren. Für gelegentliche Spieler ist der Unterschied zu einem Mittelklassemodell kaum spürbar.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Müssen Fortgeschrittenenschläger teurer sein?</summary>
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
Nicht zwingend. Es gibt ausgezeichnete Modelle im 150–200-Euro-Segment, die professionell verarbeitete Carbon-Elemente enthalten. Alles über 300 Euro richtet sich meist an Spieler mit Wettkampfambitionen.
|
||||
|
||||
</details>
|
||||
---
|
||||
title: "Padelschläger für Fortgeschrittene: Die besten Modelle 2026"
|
||||
slug: padelschlaeger-fortgeschrittene-de
|
||||
language: de
|
||||
url_path: /padelschlaeger-fortgeschrittene
|
||||
meta_description: "Die besten Padelschläger für fortgeschrittene und ambitionierte Spieler. High-End-Modelle mit Carbon, Kevlar und ausgereifter Schlagbalance für Spieler ab 3.0."
|
||||
---
|
||||
|
||||
# Padelschläger für Fortgeschrittene: Die besten Modelle 2026
|
||||
|
||||
<!-- TODO: Einleitung — wann ist man bereit für einen Fortgeschrittenenschläger? -->
|
||||
|
||||
Ab einem gewissen Spielniveau lohnt sich der Griff zu einem anspruchsvolleren Schläger. Wer sauber trifft, kann von einer härteren Bespannung und einer präziseren Balance profitieren. Die Schläger in dieser Liste sind kein Selbstläufer — aber in den richtigen Händen ein echter Vorteil.
|
||||
|
||||
---
|
||||
|
||||
## Top-Schläger für Fortgeschrittene im Überblick
|
||||
|
||||
[product-group:racket]
|
||||
|
||||
---
|
||||
|
||||
## Carbon, Kevlar, Glasfaser: Was steckt drin?
|
||||
|
||||
<!-- TODO: Materialüberblick mit Vor- und Nachteilen -->
|
||||
|
||||
### Carbon-Rahmen
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
### 3K vs. 12K Carbon
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
### Kevlar-Einlagen
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
---
|
||||
|
||||
## Testbericht: Unser Empfehlungsschläger
|
||||
|
||||
[product:platzhalter-fortgeschrittene-schlaeger-amazon]
|
||||
|
||||
<!-- TODO: Praxistest -->
|
||||
|
||||
---
|
||||
|
||||
## Häufige Fragen
|
||||
|
||||
<details>
|
||||
<summary>Ab welcher Spielstufe lohnt sich ein Fortgeschrittenenschläger?</summary>
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
Wer regelmäßig spielt (2–3 Mal pro Woche), seit mindestens einem Jahr dabei ist und an Taktik und Technik arbeitet, kann von einem hochwertigeren Schläger profitieren. Für gelegentliche Spieler ist der Unterschied zu einem Mittelklassemodell kaum spürbar.
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>Müssen Fortgeschrittenenschläger teurer sein?</summary>
|
||||
|
||||
<!-- TODO -->
|
||||
|
||||
Nicht zwingend. Es gibt ausgezeichnete Modelle im 150–200-Euro-Segment, die professionell verarbeitete Carbon-Elemente enthalten. Alles über 300 Euro richtet sich meist an Spieler mit Wettkampfambitionen.
|
||||
|
||||
</details>
|
||||
@@ -60,9 +60,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
healthcheck:
|
||||
@@ -82,9 +83,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
|
||||
@@ -98,9 +100,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
|
||||
@@ -115,9 +118,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
healthcheck:
|
||||
@@ -137,9 +141,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
|
||||
@@ -153,9 +158,10 @@ services:
|
||||
environment:
|
||||
- DATABASE_PATH=/app/data/app.db
|
||||
- SERVING_DUCKDB_PATH=/app/data/pipeline/analytics.duckdb
|
||||
- LANDING_DIR=/app/data/pipeline/landing
|
||||
volumes:
|
||||
- app-data:/app/data
|
||||
- /opt/padelnomics/data:/app/data/pipeline:ro
|
||||
- /data/padelnomics:/app/data/pipeline:ro
|
||||
networks:
|
||||
- net
|
||||
|
||||
|
||||
@@ -213,9 +213,10 @@ def _fetch_venues_parallel(
|
||||
completed_count = 0
|
||||
lock = threading.Lock()
|
||||
|
||||
def _worker(tenant_id: str) -> dict | None:
|
||||
def _worker(tenant_id: str) -> tuple[str | None, dict | None]:
|
||||
proxy_url = cycler["next_proxy"]()
|
||||
return _fetch_venue_availability(tenant_id, start_min_str, start_max_str, proxy_url)
|
||||
result = _fetch_venue_availability(tenant_id, start_min_str, start_max_str, proxy_url)
|
||||
return proxy_url, result
|
||||
|
||||
with ThreadPoolExecutor(max_workers=worker_count) as pool:
|
||||
for batch_start in range(0, len(tenant_ids), PARALLEL_BATCH_SIZE):
|
||||
@@ -231,17 +232,17 @@ def _fetch_venues_parallel(
|
||||
batch_futures = {pool.submit(_worker, tid): tid for tid in batch}
|
||||
|
||||
for future in as_completed(batch_futures):
|
||||
result = future.result()
|
||||
proxy_url, result = future.result()
|
||||
with lock:
|
||||
completed_count += 1
|
||||
if result is not None:
|
||||
venues_data.append(result)
|
||||
cycler["record_success"]()
|
||||
cycler["record_success"](proxy_url)
|
||||
if on_result is not None:
|
||||
on_result(result)
|
||||
else:
|
||||
venues_errored += 1
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"](proxy_url)
|
||||
|
||||
if completed_count % 500 == 0:
|
||||
logger.info(
|
||||
@@ -336,16 +337,17 @@ def extract(
|
||||
else:
|
||||
logger.info("Serial mode: 1 worker, %d venues", len(venues_to_process))
|
||||
for i, tenant_id in enumerate(venues_to_process):
|
||||
proxy_url = cycler["next_proxy"]()
|
||||
result = _fetch_venue_availability(
|
||||
tenant_id, start_min_str, start_max_str, cycler["next_proxy"](),
|
||||
tenant_id, start_min_str, start_max_str, proxy_url,
|
||||
)
|
||||
if result is not None:
|
||||
new_venues_data.append(result)
|
||||
cycler["record_success"]()
|
||||
cycler["record_success"](proxy_url)
|
||||
_on_result(result)
|
||||
else:
|
||||
venues_errored += 1
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"](proxy_url)
|
||||
if cycler["is_exhausted"]():
|
||||
logger.error("All proxy tiers exhausted — writing partial results")
|
||||
break
|
||||
@@ -500,13 +502,14 @@ def extract_recheck(
|
||||
venues_data = []
|
||||
venues_errored = 0
|
||||
for tid in venues_to_recheck:
|
||||
result = _fetch_venue_availability(tid, start_min_str, start_max_str, cycler["next_proxy"]())
|
||||
proxy_url = cycler["next_proxy"]()
|
||||
result = _fetch_venue_availability(tid, start_min_str, start_max_str, proxy_url)
|
||||
if result is not None:
|
||||
venues_data.append(result)
|
||||
cycler["record_success"]()
|
||||
cycler["record_success"](proxy_url)
|
||||
else:
|
||||
venues_errored += 1
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"](proxy_url)
|
||||
if cycler["is_exhausted"]():
|
||||
logger.error("All proxy tiers exhausted — writing partial recheck results")
|
||||
break
|
||||
|
||||
@@ -10,11 +10,11 @@ API notes (discovered 2026-02):
|
||||
- `size=100` is the maximum effective page size
|
||||
- ~14K venues globally as of Feb 2026
|
||||
|
||||
Parallel mode: when PROXY_URLS is set, fires batch_size = len(proxy_urls)
|
||||
pages concurrently. Each page gets its own fresh session + proxy. Pages beyond
|
||||
the last one return empty lists (safe — just triggers the done condition).
|
||||
Without proxies, falls back to single-threaded with THROTTLE_SECONDS between
|
||||
pages.
|
||||
Parallel mode: when proxy tiers are configured, fires BATCH_SIZE pages
|
||||
concurrently. Each page gets its own fresh session + proxy from the tiered
|
||||
cycler. On failure the cycler escalates through free → datacenter →
|
||||
residential tiers. Without proxies, falls back to single-threaded with
|
||||
THROTTLE_SECONDS between pages.
|
||||
|
||||
Rate: 1 req / 2 s per IP (see docs/data-sources-inventory.md §1.2).
|
||||
|
||||
@@ -22,6 +22,7 @@ Landing: {LANDING_DIR}/playtomic/{year}/{month}/tenants.jsonl.gz
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
@@ -31,7 +32,7 @@ from pathlib import Path
|
||||
import niquests
|
||||
|
||||
from ._shared import HTTP_TIMEOUT_SECONDS, run_extractor, setup_logging, ua_for_proxy
|
||||
from .proxy import load_proxy_tiers, make_round_robin_cycler
|
||||
from .proxy import load_proxy_tiers, make_tiered_cycler
|
||||
from .utils import compress_jsonl_atomic, landing_path
|
||||
|
||||
logger = setup_logging("padelnomics.extract.playtomic_tenants")
|
||||
@@ -42,6 +43,9 @@ PLAYTOMIC_TENANTS_URL = "https://api.playtomic.io/v1/tenants"
|
||||
THROTTLE_SECONDS = 2
|
||||
PAGE_SIZE = 100
|
||||
MAX_PAGES = 500 # safety bound — ~50K venues max, well above current ~14K
|
||||
BATCH_SIZE = 20 # concurrent pages per batch (fixed, independent of proxy count)
|
||||
CIRCUIT_BREAKER_THRESHOLD = int(os.environ.get("CIRCUIT_BREAKER_THRESHOLD") or "10")
|
||||
MAX_PAGE_ATTEMPTS = 5 # max retries per individual page before giving up
|
||||
|
||||
|
||||
def _fetch_one_page(proxy_url: str | None, page: int) -> tuple[int, list[dict]]:
|
||||
@@ -61,22 +65,57 @@ def _fetch_one_page(proxy_url: str | None, page: int) -> tuple[int, list[dict]]:
|
||||
return (page, tenants)
|
||||
|
||||
|
||||
def _fetch_pages_parallel(pages: list[int], next_proxy) -> list[tuple[int, list[dict]]]:
|
||||
"""Fetch multiple pages concurrently. Returns [(page_num, tenants_list), ...]."""
|
||||
def _fetch_page_via_cycler(cycler: dict, page: int) -> tuple[int, list[dict]]:
|
||||
"""Fetch a single page, retrying across proxy tiers via the circuit breaker.
|
||||
|
||||
On each attempt, pulls the next proxy from the active tier. Records
|
||||
success/failure so the circuit breaker can escalate tiers. Raises
|
||||
RuntimeError if all tiers are exhausted or MAX_PAGE_ATTEMPTS is exceeded.
|
||||
"""
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(MAX_PAGE_ATTEMPTS):
|
||||
proxy_url = cycler["next_proxy"]()
|
||||
if proxy_url is None: # all tiers exhausted
|
||||
raise RuntimeError(f"All proxy tiers exhausted fetching page {page}")
|
||||
try:
|
||||
result = _fetch_one_page(proxy_url, page)
|
||||
cycler["record_success"](proxy_url)
|
||||
return result
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
logger.warning(
|
||||
"Page %d attempt %d/%d failed (proxy=%s): %s",
|
||||
page,
|
||||
attempt + 1,
|
||||
MAX_PAGE_ATTEMPTS,
|
||||
proxy_url,
|
||||
exc,
|
||||
)
|
||||
cycler["record_failure"](proxy_url)
|
||||
if cycler["is_exhausted"]():
|
||||
raise RuntimeError(f"All proxy tiers exhausted fetching page {page}") from exc
|
||||
raise RuntimeError(f"Page {page} failed after {MAX_PAGE_ATTEMPTS} attempts") from last_exc
|
||||
|
||||
|
||||
def _fetch_pages_parallel(pages: list[int], cycler: dict) -> list[tuple[int, list[dict]]]:
|
||||
"""Fetch multiple pages concurrently using the tiered cycler.
|
||||
|
||||
Returns [(page_num, tenants_list), ...]. Raises if any page exhausts all tiers.
|
||||
"""
|
||||
with ThreadPoolExecutor(max_workers=len(pages)) as pool:
|
||||
futures = [pool.submit(_fetch_one_page, next_proxy(), p) for p in pages]
|
||||
futures = [pool.submit(_fetch_page_via_cycler, cycler, p) for p in pages]
|
||||
return [f.result() for f in as_completed(futures)]
|
||||
|
||||
|
||||
def extract(
|
||||
landing_dir: Path,
|
||||
year_month: str, # noqa: ARG001 — unused; tenants uses ISO week partition instead
|
||||
year_month: str, # noqa: ARG001 — unused; tenants uses daily partition instead
|
||||
conn: sqlite3.Connection,
|
||||
session: niquests.Session,
|
||||
) -> dict:
|
||||
"""Fetch all Playtomic venues via global pagination. Returns run metrics.
|
||||
|
||||
Partitioned by ISO week (e.g. 2026/W09) so each weekly run produces a
|
||||
Partitioned by day (e.g. 2026/03/01) so each daily run produces a
|
||||
fresh file. _load_tenant_ids() in playtomic_availability globs across all
|
||||
partitions and picks the most recent one.
|
||||
"""
|
||||
@@ -89,12 +128,16 @@ def extract(
|
||||
return {"files_written": 0, "files_skipped": 1, "bytes_written": 0}
|
||||
|
||||
tiers = load_proxy_tiers()
|
||||
all_proxies = [url for tier in tiers for url in tier]
|
||||
next_proxy = make_round_robin_cycler(all_proxies) if all_proxies else None
|
||||
batch_size = len(all_proxies) if all_proxies else 1
|
||||
cycler = make_tiered_cycler(tiers, CIRCUIT_BREAKER_THRESHOLD) if tiers else None
|
||||
batch_size = BATCH_SIZE if cycler else 1
|
||||
|
||||
if next_proxy:
|
||||
logger.info("Parallel mode: %d pages per batch (%d proxies across %d tier(s))", batch_size, len(all_proxies), len(tiers))
|
||||
if cycler:
|
||||
logger.info(
|
||||
"Parallel mode: %d pages/batch, %d tier(s), threshold=%d",
|
||||
batch_size,
|
||||
cycler["tier_count"](),
|
||||
CIRCUIT_BREAKER_THRESHOLD,
|
||||
)
|
||||
else:
|
||||
logger.info("Serial mode: 1 page at a time (no proxies)")
|
||||
|
||||
@@ -104,15 +147,33 @@ def extract(
|
||||
done = False
|
||||
|
||||
while not done and page < MAX_PAGES:
|
||||
if cycler and cycler["is_exhausted"]():
|
||||
logger.error(
|
||||
"All proxy tiers exhausted — stopping at page %d (%d venues collected)",
|
||||
page,
|
||||
len(all_tenants),
|
||||
)
|
||||
break
|
||||
|
||||
batch_end = min(page + batch_size, MAX_PAGES)
|
||||
pages_to_fetch = list(range(page, batch_end))
|
||||
|
||||
if next_proxy and len(pages_to_fetch) > 1:
|
||||
if cycler and len(pages_to_fetch) > 1:
|
||||
logger.info(
|
||||
"Fetching pages %d-%d in parallel (%d workers, total so far: %d)",
|
||||
page, batch_end - 1, len(pages_to_fetch), len(all_tenants),
|
||||
page,
|
||||
batch_end - 1,
|
||||
len(pages_to_fetch),
|
||||
len(all_tenants),
|
||||
)
|
||||
results = _fetch_pages_parallel(pages_to_fetch, next_proxy)
|
||||
try:
|
||||
results = _fetch_pages_parallel(pages_to_fetch, cycler)
|
||||
except RuntimeError:
|
||||
logger.error(
|
||||
"Proxy tiers exhausted mid-batch — writing partial results (%d venues)",
|
||||
len(all_tenants),
|
||||
)
|
||||
break
|
||||
else:
|
||||
# Serial: reuse the shared session, throttle between pages
|
||||
page_num = pages_to_fetch[0]
|
||||
@@ -126,7 +187,7 @@ def extract(
|
||||
)
|
||||
results = [(page_num, tenants)]
|
||||
|
||||
# Process pages in order so the done-detection on < PAGE_SIZE is deterministic
|
||||
# Process pages in order so done-detection on < PAGE_SIZE is deterministic
|
||||
for p, tenants in sorted(results):
|
||||
new_count = 0
|
||||
for tenant in tenants:
|
||||
@@ -137,7 +198,11 @@ def extract(
|
||||
new_count += 1
|
||||
|
||||
logger.info(
|
||||
"page=%d got=%d new=%d total=%d", p, len(tenants), new_count, len(all_tenants),
|
||||
"page=%d got=%d new=%d total=%d",
|
||||
p,
|
||||
len(tenants),
|
||||
new_count,
|
||||
len(all_tenants),
|
||||
)
|
||||
|
||||
# Last page — fewer than PAGE_SIZE results means we've exhausted the list
|
||||
@@ -146,7 +211,7 @@ def extract(
|
||||
break
|
||||
|
||||
page = batch_end
|
||||
if not next_proxy:
|
||||
if not cycler:
|
||||
time.sleep(THROTTLE_SECONDS)
|
||||
|
||||
# Write each tenant as a JSONL line, then compress atomically
|
||||
|
||||
@@ -134,8 +134,8 @@ def make_sticky_selector(proxy_urls: list[str]):
|
||||
return select_proxy
|
||||
|
||||
|
||||
def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
"""Thread-safe N-tier proxy cycler with circuit breaker.
|
||||
def make_tiered_cycler(tiers: list[list[str]], threshold: int, proxy_failure_limit: int = 3) -> dict:
|
||||
"""Thread-safe N-tier proxy cycler with circuit breaker and per-proxy dead tracking.
|
||||
|
||||
Uses tiers[0] until consecutive failures >= threshold, then escalates
|
||||
to tiers[1], then tiers[2], etc. Once all tiers are exhausted,
|
||||
@@ -144,13 +144,21 @@ def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
Failure counter resets on each escalation — the new tier gets a fresh start.
|
||||
Once exhausted, further record_failure() calls are no-ops.
|
||||
|
||||
Per-proxy dead tracking (when proxy_failure_limit > 0):
|
||||
Individual proxies are marked dead after proxy_failure_limit failures and
|
||||
skipped by next_proxy(). If all proxies in the active tier are dead,
|
||||
next_proxy() auto-escalates to the next tier. Both mechanisms coexist:
|
||||
per-proxy dead tracking removes broken individuals; tier-level threshold
|
||||
catches systemic failure even before any single proxy hits the limit.
|
||||
|
||||
Returns a dict of callables:
|
||||
next_proxy() -> str | None — URL from the active tier, or None
|
||||
record_success() -> None — resets consecutive failure counter
|
||||
record_failure() -> bool — True if just escalated to next tier
|
||||
next_proxy() -> str | None — URL from active tier (skips dead), or None
|
||||
record_success(proxy_url=None) -> None — resets consecutive failure counter
|
||||
record_failure(proxy_url=None) -> bool — True if just escalated to next tier
|
||||
is_exhausted() -> bool — True if all tiers exhausted
|
||||
active_tier_index() -> int — 0-based index of current tier
|
||||
tier_count() -> int — total number of tiers
|
||||
dead_proxy_count() -> int — number of individual proxies marked dead
|
||||
|
||||
Edge cases:
|
||||
Empty tiers list: next_proxy() always returns None, is_exhausted() True.
|
||||
@@ -158,28 +166,75 @@ def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
"""
|
||||
assert threshold > 0, f"threshold must be positive, got {threshold}"
|
||||
assert isinstance(tiers, list), f"tiers must be a list, got {type(tiers)}"
|
||||
assert proxy_failure_limit >= 0, f"proxy_failure_limit must be >= 0, got {proxy_failure_limit}"
|
||||
|
||||
lock = threading.Lock()
|
||||
cycles = [itertools.cycle(t) for t in tiers]
|
||||
state = {
|
||||
"active_tier": 0,
|
||||
"consecutive_failures": 0,
|
||||
"proxy_failure_counts": {}, # proxy_url -> int
|
||||
"dead_proxies": set(), # proxy URLs marked dead
|
||||
}
|
||||
|
||||
def next_proxy() -> str | None:
|
||||
with lock:
|
||||
idx = state["active_tier"]
|
||||
if idx >= len(cycles):
|
||||
return None
|
||||
return next(cycles[idx])
|
||||
# Try each remaining tier (bounded: at most len(tiers) escalations)
|
||||
for _ in range(len(tiers) + 1):
|
||||
idx = state["active_tier"]
|
||||
if idx >= len(cycles):
|
||||
return None
|
||||
|
||||
def record_success() -> None:
|
||||
tier_proxies = tiers[idx]
|
||||
tier_len = len(tier_proxies)
|
||||
|
||||
# Find a live proxy in this tier (bounded: try each proxy at most once)
|
||||
for _ in range(tier_len):
|
||||
candidate = next(cycles[idx])
|
||||
if candidate not in state["dead_proxies"]:
|
||||
return candidate
|
||||
|
||||
# All proxies in this tier are dead — auto-escalate
|
||||
state["consecutive_failures"] = 0
|
||||
state["active_tier"] += 1
|
||||
new_idx = state["active_tier"]
|
||||
if new_idx < len(tiers):
|
||||
logger.warning(
|
||||
"All proxies in tier %d are dead — auto-escalating to tier %d/%d",
|
||||
idx + 1,
|
||||
new_idx + 1,
|
||||
len(tiers),
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"All proxies in all %d tier(s) are dead — no more fallbacks",
|
||||
len(tiers),
|
||||
)
|
||||
|
||||
return None # safety fallback
|
||||
|
||||
def record_success(proxy_url: str | None = None) -> None:
|
||||
with lock:
|
||||
state["consecutive_failures"] = 0
|
||||
if proxy_url is not None:
|
||||
state["proxy_failure_counts"][proxy_url] = 0
|
||||
|
||||
def record_failure() -> bool:
|
||||
def record_failure(proxy_url: str | None = None) -> bool:
|
||||
"""Increment failure counter. Returns True if just escalated to next tier."""
|
||||
with lock:
|
||||
# Per-proxy dead tracking (additional to tier-level circuit breaker)
|
||||
if proxy_url is not None and proxy_failure_limit > 0:
|
||||
count = state["proxy_failure_counts"].get(proxy_url, 0) + 1
|
||||
state["proxy_failure_counts"][proxy_url] = count
|
||||
if count >= proxy_failure_limit and proxy_url not in state["dead_proxies"]:
|
||||
state["dead_proxies"].add(proxy_url)
|
||||
logger.warning(
|
||||
"Proxy %s marked dead after %d consecutive failures",
|
||||
proxy_url,
|
||||
count,
|
||||
)
|
||||
|
||||
# Tier-level circuit breaker (existing behavior)
|
||||
idx = state["active_tier"]
|
||||
if idx >= len(tiers):
|
||||
# Already exhausted — no-op
|
||||
@@ -219,6 +274,10 @@ def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
def tier_count() -> int:
|
||||
return len(tiers)
|
||||
|
||||
def dead_proxy_count() -> int:
|
||||
with lock:
|
||||
return len(state["dead_proxies"])
|
||||
|
||||
return {
|
||||
"next_proxy": next_proxy,
|
||||
"record_success": record_success,
|
||||
@@ -226,4 +285,5 @@ def make_tiered_cycler(tiers: list[list[str]], threshold: int) -> dict:
|
||||
"is_exhausted": is_exhausted,
|
||||
"active_tier_index": active_tier_index,
|
||||
"tier_count": tier_count,
|
||||
"dead_proxy_count": dead_proxy_count,
|
||||
}
|
||||
|
||||
@@ -19,8 +19,10 @@
|
||||
-- 4. Country-level income (global fallback from stg_income / ilc_di03)
|
||||
--
|
||||
-- Distance calculations use ST_Distance_Sphere (DuckDB spatial extension).
|
||||
-- A bounding-box pre-filter (~0.5°, ≈55km) reduces the cross-join before the
|
||||
-- exact sphere distance is computed.
|
||||
-- Spatial joins use BETWEEN predicates (not ABS()) to enable DuckDB's IEJoin
|
||||
-- (interval join) optimization: O((N+M) log M) vs O(N×M) nested-loop.
|
||||
-- Country pre-filters restrict the left side to ~20K rows for padel/tennis CTEs
|
||||
-- (~8 countries each), down from ~140K global locations.
|
||||
|
||||
MODEL (
|
||||
name foundation.dim_locations,
|
||||
@@ -147,6 +149,8 @@ padel_courts AS (
|
||||
WHERE lat IS NOT NULL AND lon IS NOT NULL
|
||||
),
|
||||
-- Nearest padel court distance per location (bbox pre-filter → exact sphere distance)
|
||||
-- BETWEEN enables DuckDB IEJoin (O((N+M) log M)) vs ABS() nested-loop (O(N×M)).
|
||||
-- Country pre-filter reduces left side from ~140K to ~20K rows (padel is ~8 countries).
|
||||
nearest_padel AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
@@ -158,9 +162,12 @@ nearest_padel AS (
|
||||
) AS nearest_padel_court_km
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
-- ~55km bounding box pre-filter to limit cross-join before sphere calc
|
||||
ON ABS(l.lat - p.lat) < 0.5
|
||||
AND ABS(l.lon - p.lon) < 0.5
|
||||
-- ~55km bounding box pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN p.lat - 0.5 AND p.lat + 0.5
|
||||
AND l.lon BETWEEN p.lon - 0.5 AND p.lon + 0.5
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||
)
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Padel venues within 5km of each location (counts as "local padel supply")
|
||||
@@ -170,24 +177,35 @@ padel_local AS (
|
||||
COUNT(*) AS padel_venue_count
|
||||
FROM locations l
|
||||
JOIN padel_courts p
|
||||
ON ABS(l.lat - p.lat) < 0.05 -- ~5km bbox pre-filter
|
||||
AND ABS(l.lon - p.lon) < 0.05
|
||||
WHERE ST_Distance_Sphere(
|
||||
-- ~5km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN p.lat - 0.05 AND p.lat + 0.05
|
||||
AND l.lon BETWEEN p.lon - 0.05 AND p.lon + 0.05
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code FROM padel_courts WHERE country_code IS NOT NULL
|
||||
)
|
||||
AND ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(p.lon, p.lat)
|
||||
) / 1000.0 <= 5.0
|
||||
GROUP BY l.geoname_id
|
||||
),
|
||||
-- Tennis courts within 25km of each location (sports culture proxy)
|
||||
-- Country pre-filter reduces left side from ~140K to ~20K rows (tennis courts are European only).
|
||||
tennis_nearby AS (
|
||||
SELECT
|
||||
l.geoname_id,
|
||||
COUNT(*) AS tennis_courts_within_25km
|
||||
FROM locations l
|
||||
JOIN staging.stg_tennis_courts t
|
||||
ON ABS(l.lat - t.lat) < 0.23 -- ~25km bbox pre-filter
|
||||
AND ABS(l.lon - t.lon) < 0.23
|
||||
WHERE ST_Distance_Sphere(
|
||||
-- ~25km bbox pre-filter; BETWEEN triggers IEJoin optimization
|
||||
ON l.lat BETWEEN t.lat - 0.23 AND t.lat + 0.23
|
||||
AND l.lon BETWEEN t.lon - 0.23 AND t.lon + 0.23
|
||||
WHERE l.country_code IN (
|
||||
SELECT DISTINCT country_code
|
||||
FROM staging.stg_tennis_courts
|
||||
WHERE country_code IS NOT NULL
|
||||
)
|
||||
AND ST_Distance_Sphere(
|
||||
ST_Point(l.lon, l.lat),
|
||||
ST_Point(t.lon, t.lat)
|
||||
) / 1000.0 <= 25.0
|
||||
|
||||
@@ -49,7 +49,7 @@ _LANDING_DIR = os.environ.get("LANDING_DIR", "data/landing")
|
||||
_SERVING_DUCKDB_PATH = os.environ.get("SERVING_DUCKDB_PATH", "data/analytics.duckdb")
|
||||
|
||||
# Repo root: web/src/padelnomics/admin/ → up 4 levels
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[5]
|
||||
_REPO_ROOT = Path(__file__).resolve().parents[4]
|
||||
_WORKFLOWS_TOML = _REPO_ROOT / "infra" / "supervisor" / "workflows.toml"
|
||||
|
||||
# A "running" row older than this is considered stale/crashed.
|
||||
|
||||
@@ -169,7 +169,6 @@ async def pseo_generate_gaps(slug: str):
|
||||
"template_slug": slug,
|
||||
"start_date": date.today().isoformat(),
|
||||
"articles_per_day": 500,
|
||||
"limit": 500,
|
||||
})
|
||||
await flash(
|
||||
f"Queued generation for {len(gaps)} missing articles in '{config['name']}'.",
|
||||
|
||||
@@ -1865,7 +1865,7 @@ async def template_preview(slug: str, row_key: str):
|
||||
@csrf_protect
|
||||
async def template_generate(slug: str):
|
||||
"""Generate articles from template + DuckDB data."""
|
||||
from ..content import fetch_template_data, load_template
|
||||
from ..content import count_template_data, load_template
|
||||
|
||||
try:
|
||||
config = load_template(slug)
|
||||
@@ -1873,8 +1873,7 @@ async def template_generate(slug: str):
|
||||
await flash("Template not found.", "error")
|
||||
return redirect(url_for("admin.templates"))
|
||||
|
||||
data_rows = await fetch_template_data(config["data_table"], limit=501)
|
||||
row_count = len(data_rows)
|
||||
row_count = await count_template_data(config["data_table"])
|
||||
|
||||
if request.method == "POST":
|
||||
form = await request.form
|
||||
@@ -1888,7 +1887,6 @@ async def template_generate(slug: str):
|
||||
"template_slug": slug,
|
||||
"start_date": start_date.isoformat(),
|
||||
"articles_per_day": articles_per_day,
|
||||
"limit": 500,
|
||||
})
|
||||
await flash(
|
||||
f"Article generation queued for '{config['name']}'. "
|
||||
@@ -1923,7 +1921,6 @@ async def template_regenerate(slug: str):
|
||||
"template_slug": slug,
|
||||
"start_date": date.today().isoformat(),
|
||||
"articles_per_day": 500,
|
||||
"limit": 500,
|
||||
})
|
||||
await flash("Regeneration queued. The worker will process it in the background.", "success")
|
||||
return redirect(url_for("admin.template_detail", slug=slug))
|
||||
@@ -2729,7 +2726,6 @@ async def rebuild_all():
|
||||
"template_slug": t["slug"],
|
||||
"start_date": date.today().isoformat(),
|
||||
"articles_per_day": 500,
|
||||
"limit": 500,
|
||||
})
|
||||
|
||||
# Manual articles still need inline rebuild
|
||||
@@ -3037,6 +3033,7 @@ async def outreach():
|
||||
current_search=search,
|
||||
current_follow_up=follow_up,
|
||||
page=page,
|
||||
outreach_email=EMAIL_ADDRESSES["outreach"],
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -40,8 +40,10 @@
|
||||
.admin-subnav {
|
||||
display: flex; align-items: stretch; padding: 0 2rem;
|
||||
background: #fff; border-bottom: 1px solid #E2E8F0;
|
||||
flex-shrink: 0; overflow-x: auto; gap: 0;
|
||||
flex-shrink: 0; overflow-x: auto; overflow-y: hidden; gap: 0;
|
||||
scrollbar-width: none;
|
||||
}
|
||||
.admin-subnav::-webkit-scrollbar { display: none; }
|
||||
.admin-subnav a {
|
||||
display: flex; align-items: center; gap: 5px;
|
||||
padding: 0 1px; margin: 0 13px 0 0; height: 42px;
|
||||
|
||||
@@ -3,6 +3,19 @@
|
||||
|
||||
{% block title %}Admin Dashboard - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.funnel-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.75rem;
|
||||
}
|
||||
@media (min-width: 768px) {
|
||||
.funnel-grid { grid-template-columns: repeat(5, 1fr); }
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-8">
|
||||
<div>
|
||||
@@ -47,7 +60,7 @@
|
||||
|
||||
<!-- Lead Funnel -->
|
||||
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">Lead Funnel</p>
|
||||
<div style="display:grid;grid-template-columns:repeat(5,1fr);gap:0.75rem" class="mb-8">
|
||||
<div class="funnel-grid mb-8">
|
||||
<div class="card text-center border-l-4 border-l-electric" style="padding:0.75rem">
|
||||
<p class="text-xs text-slate">Planner Users</p>
|
||||
<p class="text-xl font-bold text-navy">{{ stats.planner_users }}</p>
|
||||
@@ -72,7 +85,7 @@
|
||||
|
||||
<!-- Supplier Stats -->
|
||||
<p class="text-xs font-semibold text-slate uppercase tracking-wider mb-2">Supplier Funnel</p>
|
||||
<div style="display:grid;grid-template-columns:repeat(5,1fr);gap:0.75rem" class="mb-8">
|
||||
<div class="funnel-grid mb-8">
|
||||
<div class="card text-center border-l-4 border-l-accent" style="padding:0.75rem">
|
||||
<p class="text-xs text-slate">Claimed Suppliers</p>
|
||||
<p class="text-xl font-bold text-navy">{{ stats.suppliers_claimed }}</p>
|
||||
|
||||
@@ -2,13 +2,30 @@
|
||||
{% set admin_page = "outreach" %}
|
||||
{% block title %}Outreach Pipeline - Admin - {{ config.APP_NAME }}{% endblock %}
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.pipeline-status-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.75rem;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
@media (min-width: 640px) {
|
||||
.pipeline-status-grid { grid-template-columns: repeat(3, 1fr); }
|
||||
}
|
||||
@media (min-width: 1024px) {
|
||||
.pipeline-status-grid { grid-template-columns: repeat(6, 1fr); }
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block admin_content %}
|
||||
<header class="flex justify-between items-center mb-6">
|
||||
<div>
|
||||
<h1 class="text-2xl">Outreach</h1>
|
||||
<p class="text-sm text-slate mt-1">
|
||||
{{ pipeline.total }} supplier{{ 's' if pipeline.total != 1 else '' }} in pipeline
|
||||
· Sending domain: <span class="mono text-xs">hello.padelnomics.io</span>
|
||||
· Sending from: <span class="mono text-xs">{{ outreach_email }}</span>
|
||||
</p>
|
||||
</div>
|
||||
<div class="flex gap-2">
|
||||
@@ -18,7 +35,7 @@
|
||||
</header>
|
||||
|
||||
<!-- Pipeline cards -->
|
||||
<div style="display:grid;grid-template-columns:repeat(6,1fr);gap:0.75rem;margin-bottom:1.5rem">
|
||||
<div class="pipeline-status-grid">
|
||||
{% set status_colors = {
|
||||
'prospect': '#E2E8F0',
|
||||
'contacted': '#DBEAFE',
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{% if emails %}
|
||||
<div class="card">
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -38,6 +39,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="card text-center" style="padding:2rem">
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
{% if leads %}
|
||||
<div class="card">
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -58,6 +59,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Pagination -->
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{% if suppliers %}
|
||||
<div class="card">
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -19,6 +20,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="card text-center" style="padding:2rem">
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
</div>
|
||||
|
||||
<!-- Two-column row: Serving Freshness + Landing Zone -->
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr;gap:1rem">
|
||||
<div class="pipeline-two-col">
|
||||
|
||||
<!-- Serving Freshness -->
|
||||
<div class="card">
|
||||
@@ -68,6 +68,7 @@
|
||||
</p>
|
||||
{% endif %}
|
||||
{% if serving_tables %}
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table" style="font-size:0.8125rem">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -86,6 +87,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<p class="text-sm text-slate">No serving tables found — run the pipeline first.</p>
|
||||
{% endif %}
|
||||
@@ -99,6 +101,7 @@
|
||||
</span>
|
||||
</p>
|
||||
{% if landing_stats %}
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table" style="font-size:0.8125rem">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -119,6 +122,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<p class="text-sm text-slate">
|
||||
Landing zone empty or not found at <code>data/landing</code>.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
{% if suppliers %}
|
||||
<div class="card">
|
||||
<div style="overflow-x:auto">
|
||||
<table class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
@@ -47,6 +48,7 @@
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="card text-center" style="padding:2rem">
|
||||
|
||||
@@ -4,8 +4,18 @@
|
||||
|
||||
{% block admin_head %}
|
||||
<style>
|
||||
.pipeline-stat-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.75rem;
|
||||
}
|
||||
@media (min-width: 768px) {
|
||||
.pipeline-stat-grid { grid-template-columns: repeat(4, 1fr); }
|
||||
}
|
||||
|
||||
.pipeline-tabs {
|
||||
display: flex; gap: 0; border-bottom: 2px solid #E2E8F0; margin-bottom: 1.5rem;
|
||||
overflow-x: auto; -webkit-overflow-scrolling: touch;
|
||||
}
|
||||
.pipeline-tabs button {
|
||||
padding: 0.625rem 1.25rem; font-size: 0.8125rem; font-weight: 600;
|
||||
@@ -24,6 +34,15 @@
|
||||
.status-dot.stale { background: #D97706; }
|
||||
.status-dot.running { background: #3B82F6; }
|
||||
.status-dot.pending { background: #CBD5E1; }
|
||||
|
||||
.pipeline-two-col {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
@media (min-width: 640px) {
|
||||
.pipeline-two-col { grid-template-columns: 1fr 1fr; }
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
@@ -46,7 +65,7 @@
|
||||
</header>
|
||||
|
||||
<!-- Health stat cards -->
|
||||
<div style="display:grid;grid-template-columns:repeat(4,1fr);gap:0.75rem" class="mb-6">
|
||||
<div class="pipeline-stat-grid mb-6">
|
||||
<div class="card text-center" style="padding:0.875rem">
|
||||
<p class="text-xs text-slate">Total Runs</p>
|
||||
<p class="text-2xl font-bold text-navy metric">{{ summary.total | default(0) }}</p>
|
||||
|
||||
@@ -123,17 +123,19 @@ async def get_table_columns(data_table: str) -> list[dict]:
|
||||
async def fetch_template_data(
|
||||
data_table: str,
|
||||
order_by: str | None = None,
|
||||
limit: int = 500,
|
||||
limit: int = 0,
|
||||
) -> list[dict]:
|
||||
"""Fetch all rows from a DuckDB serving table."""
|
||||
"""Fetch rows from a DuckDB serving table. limit=0 means all rows."""
|
||||
assert "." in data_table, "data_table must be schema-qualified"
|
||||
_validate_table_name(data_table)
|
||||
|
||||
order_clause = f"ORDER BY {order_by} DESC" if order_by else ""
|
||||
return await fetch_analytics(
|
||||
f"SELECT * FROM {data_table} {order_clause} LIMIT ?",
|
||||
[limit],
|
||||
)
|
||||
if limit:
|
||||
return await fetch_analytics(
|
||||
f"SELECT * FROM {data_table} {order_clause} LIMIT ?",
|
||||
[limit],
|
||||
)
|
||||
return await fetch_analytics(f"SELECT * FROM {data_table} {order_clause}")
|
||||
|
||||
|
||||
async def count_template_data(data_table: str) -> int:
|
||||
@@ -290,7 +292,7 @@ async def generate_articles(
|
||||
start_date: date,
|
||||
articles_per_day: int,
|
||||
*,
|
||||
limit: int = 500,
|
||||
limit: int = 0,
|
||||
base_url: str = "https://padelnomics.io",
|
||||
task_id: int | None = None,
|
||||
) -> int:
|
||||
|
||||
@@ -218,9 +218,7 @@
|
||||
.nav-bar[data-navopen="true"] .nav-mobile {
|
||||
display: flex;
|
||||
}
|
||||
.nav-mobile a,
|
||||
.nav-mobile button.nav-auth-btn,
|
||||
.nav-mobile a.nav-auth-btn {
|
||||
.nav-mobile a:not(.nav-auth-btn) {
|
||||
display: block;
|
||||
padding: 0.625rem 0;
|
||||
border-bottom: 1px solid #F1F5F9;
|
||||
@@ -230,15 +228,18 @@
|
||||
text-decoration: none;
|
||||
transition: color 0.15s;
|
||||
}
|
||||
.nav-mobile a:last-child { border-bottom: none; }
|
||||
.nav-mobile a:hover { color: #1D4ED8; }
|
||||
.nav-mobile a:not(.nav-auth-btn):last-child { border-bottom: none; }
|
||||
.nav-mobile a:not(.nav-auth-btn):hover { color: #1D4ED8; }
|
||||
/* nav-auth-btn in mobile menu: override block style, keep button colours */
|
||||
.nav-mobile a.nav-auth-btn,
|
||||
.nav-mobile button.nav-auth-btn {
|
||||
display: inline-flex;
|
||||
margin-top: 0.5rem;
|
||||
padding: 6px 16px;
|
||||
border-bottom: none;
|
||||
width: auto;
|
||||
align-self: flex-start;
|
||||
color: #fff;
|
||||
}
|
||||
.nav-mobile .nav-mobile-section {
|
||||
font-size: 0.6875rem;
|
||||
|
||||
@@ -745,7 +745,7 @@ async def handle_generate_articles(payload: dict) -> None:
|
||||
slug = payload["template_slug"]
|
||||
start_date = date_cls.fromisoformat(payload["start_date"])
|
||||
articles_per_day = payload.get("articles_per_day", 3)
|
||||
limit = payload.get("limit", 500)
|
||||
limit = payload.get("limit", 0)
|
||||
task_id = payload.get("_task_id")
|
||||
|
||||
count = await generate_articles(
|
||||
|
||||
@@ -500,3 +500,131 @@ class TestTieredCyclerNTier:
|
||||
t.join()
|
||||
|
||||
assert errors == [], f"Thread safety errors: {errors}"
|
||||
|
||||
|
||||
class TestTieredCyclerDeadProxyTracking:
|
||||
"""Per-proxy dead tracking: individual proxies marked dead are skipped."""
|
||||
|
||||
def test_dead_proxy_skipped_in_next_proxy(self):
|
||||
"""After a proxy hits the failure limit it is never returned again."""
|
||||
tiers = [["http://dead", "http://live"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=1)
|
||||
# Mark http://dead as dead
|
||||
cycler["record_failure"]("http://dead")
|
||||
# next_proxy must always return the live one
|
||||
for _ in range(6):
|
||||
assert cycler["next_proxy"]() == "http://live"
|
||||
|
||||
def test_dead_proxy_count_increments(self):
|
||||
tiers = [["http://a", "http://b", "http://c"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=2)
|
||||
assert cycler["dead_proxy_count"]() == 0
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 0 # only 1 failure, limit is 2
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 1
|
||||
cycler["record_failure"]("http://b")
|
||||
cycler["record_failure"]("http://b")
|
||||
assert cycler["dead_proxy_count"]() == 2
|
||||
|
||||
def test_auto_escalates_when_all_proxies_in_tier_dead(self):
|
||||
"""If all proxies in the active tier are dead, next_proxy auto-escalates."""
|
||||
tiers = [["http://t0a", "http://t0b"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=1)
|
||||
# Kill all proxies in tier 0
|
||||
cycler["record_failure"]("http://t0a")
|
||||
cycler["record_failure"]("http://t0b")
|
||||
# next_proxy should transparently escalate and return tier 1 proxy
|
||||
assert cycler["next_proxy"]() == "http://t1"
|
||||
|
||||
def test_auto_escalates_updates_active_tier_index(self):
|
||||
"""Auto-escalation via dead proxies bumps active_tier_index."""
|
||||
tiers = [["http://t0a", "http://t0b"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=1)
|
||||
cycler["record_failure"]("http://t0a")
|
||||
cycler["record_failure"]("http://t0b")
|
||||
cycler["next_proxy"]() # triggers auto-escalation
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
|
||||
def test_returns_none_when_all_tiers_exhausted_by_dead_proxies(self):
|
||||
tiers = [["http://t0"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=1)
|
||||
cycler["record_failure"]("http://t0")
|
||||
cycler["record_failure"]("http://t1")
|
||||
assert cycler["next_proxy"]() is None
|
||||
|
||||
def test_record_success_resets_per_proxy_counter(self):
|
||||
"""Success resets the failure count so proxy is not marked dead."""
|
||||
tiers = [["http://a", "http://b"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=3)
|
||||
# Two failures — not dead yet
|
||||
cycler["record_failure"]("http://a")
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 0
|
||||
# Success resets the counter
|
||||
cycler["record_success"]("http://a")
|
||||
# Two more failures — still not dead (counter was reset)
|
||||
cycler["record_failure"]("http://a")
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 0
|
||||
# Third failure after reset — now dead
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 1
|
||||
|
||||
def test_dead_proxy_stays_dead_after_success(self):
|
||||
"""Once marked dead, a proxy is not revived by record_success."""
|
||||
tiers = [["http://a", "http://b"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=1)
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 1
|
||||
cycler["record_success"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 1
|
||||
# http://a is still skipped
|
||||
for _ in range(6):
|
||||
assert cycler["next_proxy"]() == "http://b"
|
||||
|
||||
def test_backward_compat_no_proxy_url(self):
|
||||
"""Calling record_failure/record_success without proxy_url still works."""
|
||||
tiers = [["http://t0"], ["http://t1"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=2)
|
||||
cycler["record_failure"]()
|
||||
cycler["record_failure"]() # escalates
|
||||
assert cycler["active_tier_index"]() == 1
|
||||
cycler["record_success"]()
|
||||
assert cycler["dead_proxy_count"]() == 0 # no per-proxy tracking happened
|
||||
|
||||
def test_proxy_failure_limit_zero_disables_per_proxy_tracking(self):
|
||||
"""proxy_failure_limit=0 disables per-proxy dead tracking entirely."""
|
||||
tiers = [["http://a", "http://b"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=10, proxy_failure_limit=0)
|
||||
for _ in range(100):
|
||||
cycler["record_failure"]("http://a")
|
||||
assert cycler["dead_proxy_count"]() == 0
|
||||
|
||||
def test_thread_safety_with_per_proxy_tracking(self):
|
||||
"""Concurrent record_failure(proxy_url) calls don't corrupt state."""
|
||||
import threading as _threading
|
||||
|
||||
tiers = [["http://t0a", "http://t0b", "http://t0c"], ["http://t1a"]]
|
||||
cycler = make_tiered_cycler(tiers, threshold=50, proxy_failure_limit=5)
|
||||
errors = []
|
||||
lock = _threading.Lock()
|
||||
|
||||
def worker():
|
||||
try:
|
||||
for _ in range(30):
|
||||
p = cycler["next_proxy"]()
|
||||
if p is not None:
|
||||
cycler["record_failure"](p)
|
||||
cycler["record_success"](p)
|
||||
except Exception as e:
|
||||
with lock:
|
||||
errors.append(e)
|
||||
|
||||
threads = [_threading.Thread(target=worker) for _ in range(10)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert errors == [], f"Thread safety errors: {errors}"
|
||||
|
||||
Reference in New Issue
Block a user