diff --git a/tools/scout/src/scout/browser.py b/tools/scout/src/scout/browser.py index ecb7888..4df75f7 100644 --- a/tools/scout/src/scout/browser.py +++ b/tools/scout/src/scout/browser.py @@ -111,6 +111,9 @@ _COOKIE_SELECTORS = [ ".cookie-law-accept", # Termly "#termly-code-snippet-support", + # Consentmanager (common in Germany) + "#cmpwrapper .cmpboxbtnyes", + "#cmpbox .cmptxt_btn_yes", # Generic accept buttons (text-based fallback) "button[id*='accept']", "button[class*='accept']", @@ -120,16 +123,32 @@ _COOKIE_SELECTORS = [ "button[class*='consent']", ] -# Generic visible text patterns for cookie accept buttons +# Text patterns for cookie accept buttons — English + German (DSGVO) _COOKIE_ACCEPT_TEXTS = [ + # English "Accept All", "Accept all", "Accept all cookies", "Accept Cookies", "Accept cookies", "I Accept", "I accept", "Accept", "Allow All", "Allow all", "Agree", "I Agree", "OK", "Got it", "Continue", "Dismiss", + # German (DSGVO) + "Alle akzeptieren", "Akzeptieren", "Zustimmen", + "Alle zustimmen", "Einverstanden", + "Alle Cookies akzeptieren", "Cookies akzeptieren", + "Akzeptieren und weiter", "Weiter", + "Ich stimme zu", "OK, verstanden", ] +# Usercentrics shadow DOM JS — common in German publishers (Bild, Spiegel, etc.) +_USERCENTRICS_JS = ( + "var host = document.querySelector('#usercentrics-root');" + "if (host && host.shadowRoot) {" + " var btn = host.shadowRoot.querySelector('[data-testid=\"uc-accept-all-button\"]');" + " if (btn) { btn.click(); true; } else { false; }" + "} else { false; }" +) + async def _click_via_js(tab, selector: str) -> bool: """Click an element via JS injection — bypasses pointer-events/z-index issues.""" @@ -150,7 +169,19 @@ async def _dismiss_cookie_banner(tab) -> bool: Uses JS injection (click_js / execute_script) as primary method since cookie banners often have z-index/pointer-events issues that block Pydoll's mouse simulation. + Covers: OneTrust, Cookiebot, CookieYes, Consentmanager, Usercentrics (shadow DOM), + and generic text patterns in English + German (DSGVO). """ + # Usercentrics (shadow DOM) — common in German publishers, requires special handling + try: + result = await tab.execute_script(_USERCENTRICS_JS) + if result: + await asyncio.sleep(0.8) + logger.info("Cookie banner dismissed via Usercentrics shadow DOM") + return True + except Exception: + pass + # Try CSS selectors via JS click (bypasses visibility/pointer-events issues) for selector in _COOKIE_SELECTORS: try: