scout: add German DSGVO text patterns + Usercentrics shadow DOM support
- German accept texts: Alle akzeptieren, Akzeptieren, Zustimmen, Einverstanden, etc. - Usercentrics (shadow DOM) support — very common with German publishers (Bild, Spiegel, Focus, etc.) — requires shadowRoot traversal, not addressable by normal CSS selectors - Consentmanager selectors — another common German CMP - Note: German sites tested (Spiegel, Zeit, finanzen.net, Bild) showed no banners because Pydoll reuses the existing Chrome user profile with stored consents. New-site behaviour will be handled by the added patterns. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -111,6 +111,9 @@ _COOKIE_SELECTORS = [
|
|||||||
".cookie-law-accept",
|
".cookie-law-accept",
|
||||||
# Termly
|
# Termly
|
||||||
"#termly-code-snippet-support",
|
"#termly-code-snippet-support",
|
||||||
|
# Consentmanager (common in Germany)
|
||||||
|
"#cmpwrapper .cmpboxbtnyes",
|
||||||
|
"#cmpbox .cmptxt_btn_yes",
|
||||||
# Generic accept buttons (text-based fallback)
|
# Generic accept buttons (text-based fallback)
|
||||||
"button[id*='accept']",
|
"button[id*='accept']",
|
||||||
"button[class*='accept']",
|
"button[class*='accept']",
|
||||||
@@ -120,16 +123,32 @@ _COOKIE_SELECTORS = [
|
|||||||
"button[class*='consent']",
|
"button[class*='consent']",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Generic visible text patterns for cookie accept buttons
|
# Text patterns for cookie accept buttons — English + German (DSGVO)
|
||||||
_COOKIE_ACCEPT_TEXTS = [
|
_COOKIE_ACCEPT_TEXTS = [
|
||||||
|
# English
|
||||||
"Accept All", "Accept all", "Accept all cookies",
|
"Accept All", "Accept all", "Accept all cookies",
|
||||||
"Accept Cookies", "Accept cookies",
|
"Accept Cookies", "Accept cookies",
|
||||||
"I Accept", "I accept", "Accept",
|
"I Accept", "I accept", "Accept",
|
||||||
"Allow All", "Allow all",
|
"Allow All", "Allow all",
|
||||||
"Agree", "I Agree", "OK", "Got it",
|
"Agree", "I Agree", "OK", "Got it",
|
||||||
"Continue", "Dismiss",
|
"Continue", "Dismiss",
|
||||||
|
# German (DSGVO)
|
||||||
|
"Alle akzeptieren", "Akzeptieren", "Zustimmen",
|
||||||
|
"Alle zustimmen", "Einverstanden",
|
||||||
|
"Alle Cookies akzeptieren", "Cookies akzeptieren",
|
||||||
|
"Akzeptieren und weiter", "Weiter",
|
||||||
|
"Ich stimme zu", "OK, verstanden",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Usercentrics shadow DOM JS — common in German publishers (Bild, Spiegel, etc.)
|
||||||
|
_USERCENTRICS_JS = (
|
||||||
|
"var host = document.querySelector('#usercentrics-root');"
|
||||||
|
"if (host && host.shadowRoot) {"
|
||||||
|
" var btn = host.shadowRoot.querySelector('[data-testid=\"uc-accept-all-button\"]');"
|
||||||
|
" if (btn) { btn.click(); true; } else { false; }"
|
||||||
|
"} else { false; }"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _click_via_js(tab, selector: str) -> bool:
|
async def _click_via_js(tab, selector: str) -> bool:
|
||||||
"""Click an element via JS injection — bypasses pointer-events/z-index issues."""
|
"""Click an element via JS injection — bypasses pointer-events/z-index issues."""
|
||||||
@@ -150,7 +169,19 @@ async def _dismiss_cookie_banner(tab) -> bool:
|
|||||||
|
|
||||||
Uses JS injection (click_js / execute_script) as primary method since cookie
|
Uses JS injection (click_js / execute_script) as primary method since cookie
|
||||||
banners often have z-index/pointer-events issues that block Pydoll's mouse simulation.
|
banners often have z-index/pointer-events issues that block Pydoll's mouse simulation.
|
||||||
|
Covers: OneTrust, Cookiebot, CookieYes, Consentmanager, Usercentrics (shadow DOM),
|
||||||
|
and generic text patterns in English + German (DSGVO).
|
||||||
"""
|
"""
|
||||||
|
# Usercentrics (shadow DOM) — common in German publishers, requires special handling
|
||||||
|
try:
|
||||||
|
result = await tab.execute_script(_USERCENTRICS_JS)
|
||||||
|
if result:
|
||||||
|
await asyncio.sleep(0.8)
|
||||||
|
logger.info("Cookie banner dismissed via Usercentrics shadow DOM")
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Try CSS selectors via JS click (bypasses visibility/pointer-events issues)
|
# Try CSS selectors via JS click (bypasses visibility/pointer-events issues)
|
||||||
for selector in _COOKIE_SELECTORS:
|
for selector in _COOKIE_SELECTORS:
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user