scout: add German DSGVO text patterns + Usercentrics shadow DOM support
- German accept texts: Alle akzeptieren, Akzeptieren, Zustimmen, Einverstanden, etc. - Usercentrics (shadow DOM) support — very common with German publishers (Bild, Spiegel, Focus, etc.) — requires shadowRoot traversal, not addressable by normal CSS selectors - Consentmanager selectors — another common German CMP - Note: German sites tested (Spiegel, Zeit, finanzen.net, Bild) showed no banners because Pydoll reuses the existing Chrome user profile with stored consents. New-site behaviour will be handled by the added patterns. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -111,6 +111,9 @@ _COOKIE_SELECTORS = [
|
||||
".cookie-law-accept",
|
||||
# Termly
|
||||
"#termly-code-snippet-support",
|
||||
# Consentmanager (common in Germany)
|
||||
"#cmpwrapper .cmpboxbtnyes",
|
||||
"#cmpbox .cmptxt_btn_yes",
|
||||
# Generic accept buttons (text-based fallback)
|
||||
"button[id*='accept']",
|
||||
"button[class*='accept']",
|
||||
@@ -120,16 +123,32 @@ _COOKIE_SELECTORS = [
|
||||
"button[class*='consent']",
|
||||
]
|
||||
|
||||
# Generic visible text patterns for cookie accept buttons
|
||||
# Text patterns for cookie accept buttons — English + German (DSGVO)
|
||||
_COOKIE_ACCEPT_TEXTS = [
|
||||
# English
|
||||
"Accept All", "Accept all", "Accept all cookies",
|
||||
"Accept Cookies", "Accept cookies",
|
||||
"I Accept", "I accept", "Accept",
|
||||
"Allow All", "Allow all",
|
||||
"Agree", "I Agree", "OK", "Got it",
|
||||
"Continue", "Dismiss",
|
||||
# German (DSGVO)
|
||||
"Alle akzeptieren", "Akzeptieren", "Zustimmen",
|
||||
"Alle zustimmen", "Einverstanden",
|
||||
"Alle Cookies akzeptieren", "Cookies akzeptieren",
|
||||
"Akzeptieren und weiter", "Weiter",
|
||||
"Ich stimme zu", "OK, verstanden",
|
||||
]
|
||||
|
||||
# Usercentrics shadow DOM JS — common in German publishers (Bild, Spiegel, etc.)
|
||||
_USERCENTRICS_JS = (
|
||||
"var host = document.querySelector('#usercentrics-root');"
|
||||
"if (host && host.shadowRoot) {"
|
||||
" var btn = host.shadowRoot.querySelector('[data-testid=\"uc-accept-all-button\"]');"
|
||||
" if (btn) { btn.click(); true; } else { false; }"
|
||||
"} else { false; }"
|
||||
)
|
||||
|
||||
|
||||
async def _click_via_js(tab, selector: str) -> bool:
|
||||
"""Click an element via JS injection — bypasses pointer-events/z-index issues."""
|
||||
@@ -150,7 +169,19 @@ async def _dismiss_cookie_banner(tab) -> bool:
|
||||
|
||||
Uses JS injection (click_js / execute_script) as primary method since cookie
|
||||
banners often have z-index/pointer-events issues that block Pydoll's mouse simulation.
|
||||
Covers: OneTrust, Cookiebot, CookieYes, Consentmanager, Usercentrics (shadow DOM),
|
||||
and generic text patterns in English + German (DSGVO).
|
||||
"""
|
||||
# Usercentrics (shadow DOM) — common in German publishers, requires special handling
|
||||
try:
|
||||
result = await tab.execute_script(_USERCENTRICS_JS)
|
||||
if result:
|
||||
await asyncio.sleep(0.8)
|
||||
logger.info("Cookie banner dismissed via Usercentrics shadow DOM")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try CSS selectors via JS click (bypasses visibility/pointer-events issues)
|
||||
for selector in _COOKIE_SELECTORS:
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user