scout: add German DSGVO text patterns + Usercentrics shadow DOM support

- German accept texts: Alle akzeptieren, Akzeptieren, Zustimmen, Einverstanden, etc.
- Usercentrics (shadow DOM) support — very common with German publishers
  (Bild, Spiegel, Focus, etc.) — requires shadowRoot traversal, not addressable
  by normal CSS selectors
- Consentmanager selectors — another common German CMP
- Note: German sites tested (Spiegel, Zeit, finanzen.net, Bild) showed no banners
  because Pydoll reuses the existing Chrome user profile with stored consents.
  New-site behaviour will be handled by the added patterns.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Deeman
2026-02-21 17:23:33 +01:00
parent ec7cfda605
commit ab9dc62dd6

View File

@@ -111,6 +111,9 @@ _COOKIE_SELECTORS = [
".cookie-law-accept", ".cookie-law-accept",
# Termly # Termly
"#termly-code-snippet-support", "#termly-code-snippet-support",
# Consentmanager (common in Germany)
"#cmpwrapper .cmpboxbtnyes",
"#cmpbox .cmptxt_btn_yes",
# Generic accept buttons (text-based fallback) # Generic accept buttons (text-based fallback)
"button[id*='accept']", "button[id*='accept']",
"button[class*='accept']", "button[class*='accept']",
@@ -120,16 +123,32 @@ _COOKIE_SELECTORS = [
"button[class*='consent']", "button[class*='consent']",
] ]
# Generic visible text patterns for cookie accept buttons # Text patterns for cookie accept buttons — English + German (DSGVO)
_COOKIE_ACCEPT_TEXTS = [ _COOKIE_ACCEPT_TEXTS = [
# English
"Accept All", "Accept all", "Accept all cookies", "Accept All", "Accept all", "Accept all cookies",
"Accept Cookies", "Accept cookies", "Accept Cookies", "Accept cookies",
"I Accept", "I accept", "Accept", "I Accept", "I accept", "Accept",
"Allow All", "Allow all", "Allow All", "Allow all",
"Agree", "I Agree", "OK", "Got it", "Agree", "I Agree", "OK", "Got it",
"Continue", "Dismiss", "Continue", "Dismiss",
# German (DSGVO)
"Alle akzeptieren", "Akzeptieren", "Zustimmen",
"Alle zustimmen", "Einverstanden",
"Alle Cookies akzeptieren", "Cookies akzeptieren",
"Akzeptieren und weiter", "Weiter",
"Ich stimme zu", "OK, verstanden",
] ]
# Usercentrics shadow DOM JS — common in German publishers (Bild, Spiegel, etc.)
_USERCENTRICS_JS = (
"var host = document.querySelector('#usercentrics-root');"
"if (host && host.shadowRoot) {"
" var btn = host.shadowRoot.querySelector('[data-testid=\"uc-accept-all-button\"]');"
" if (btn) { btn.click(); true; } else { false; }"
"} else { false; }"
)
async def _click_via_js(tab, selector: str) -> bool: async def _click_via_js(tab, selector: str) -> bool:
"""Click an element via JS injection — bypasses pointer-events/z-index issues.""" """Click an element via JS injection — bypasses pointer-events/z-index issues."""
@@ -150,7 +169,19 @@ async def _dismiss_cookie_banner(tab) -> bool:
Uses JS injection (click_js / execute_script) as primary method since cookie Uses JS injection (click_js / execute_script) as primary method since cookie
banners often have z-index/pointer-events issues that block Pydoll's mouse simulation. banners often have z-index/pointer-events issues that block Pydoll's mouse simulation.
Covers: OneTrust, Cookiebot, CookieYes, Consentmanager, Usercentrics (shadow DOM),
and generic text patterns in English + German (DSGVO).
""" """
# Usercentrics (shadow DOM) — common in German publishers, requires special handling
try:
result = await tab.execute_script(_USERCENTRICS_JS)
if result:
await asyncio.sleep(0.8)
logger.info("Cookie banner dismissed via Usercentrics shadow DOM")
return True
except Exception:
pass
# Try CSS selectors via JS click (bypasses visibility/pointer-events issues) # Try CSS selectors via JS click (bypasses visibility/pointer-events issues)
for selector in _COOKIE_SELECTORS: for selector in _COOKIE_SELECTORS:
try: try: