"""Bing Webmaster Tools sync via REST API. Uses an API key for auth. Fetches query stats and page stats. """ from datetime import UTC, datetime, timedelta from urllib.parse import urlparse import httpx from ..core import config, execute, utcnow, utcnow_iso _TIMEOUT_SECONDS = 30 def _normalize_url(full_url: str) -> str: """Strip a full URL to just the path.""" parsed = urlparse(full_url) return parsed.path or "/" async def sync_bing(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int: """Sync Bing Webmaster query stats into seo_search_metrics. Returns rows synced.""" assert 1 <= days_back <= 90, "days_back must be 1-90" assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120" if not config.BING_WEBMASTER_API_KEY or not config.BING_SITE_URL: return 0 # Bing not configured — skip silently started_at = utcnow() try: rows_synced = 0 async with httpx.AsyncClient(timeout=timeout_seconds) as client: # Fetch query stats for the date range response = await client.get( "https://ssl.bing.com/webmaster/api.svc/json/GetQueryStats", params={ "apikey": config.BING_WEBMASTER_API_KEY, "siteUrl": config.BING_SITE_URL, }, ) response.raise_for_status() data = response.json() # Bing returns {"d": [{"Query": ..., "Date": ..., ...}, ...]} entries = data.get("d", []) if isinstance(data, dict) else data if not isinstance(entries, list): entries = [] cutoff = utcnow() - timedelta(days=days_back) for entry in entries: # Bing date format: "/Date(1708905600000)/" (ms since epoch) date_str = entry.get("Date", "") if "/Date(" in date_str: ms = int(date_str.split("(")[1].split(")")[0]) entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC) else: continue if entry_date < cutoff: continue metric_date = entry_date.strftime("%Y-%m-%d") query = entry.get("Query", "") await execute( """INSERT OR REPLACE INTO seo_search_metrics (source, metric_date, page_url, query, country, device, clicks, impressions, ctr, position_avg) VALUES ('bing', ?, '/', ?, NULL, NULL, ?, ?, ?, ?)""", ( metric_date, query, entry.get("Clicks", 0), entry.get("Impressions", 0), entry.get("AvgCTR", 0.0), entry.get("AvgClickPosition", 0.0), ), ) rows_synced += 1 # Also fetch page-level stats page_response = await client.get( "https://ssl.bing.com/webmaster/api.svc/json/GetPageStats", params={ "apikey": config.BING_WEBMASTER_API_KEY, "siteUrl": config.BING_SITE_URL, }, ) page_response.raise_for_status() page_data = page_response.json() page_entries = page_data.get("d", []) if isinstance(page_data, dict) else page_data if not isinstance(page_entries, list): page_entries = [] for entry in page_entries: date_str = entry.get("Date", "") if "/Date(" in date_str: ms = int(date_str.split("(")[1].split(")")[0]) entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC) else: continue if entry_date < cutoff: continue metric_date = entry_date.strftime("%Y-%m-%d") page_url = _normalize_url(entry.get("Url", "/")) await execute( """INSERT OR REPLACE INTO seo_search_metrics (source, metric_date, page_url, query, country, device, clicks, impressions, ctr, position_avg) VALUES ('bing', ?, ?, '', NULL, NULL, ?, ?, NULL, NULL)""", ( metric_date, page_url, entry.get("Clicks", 0), entry.get("Impressions", 0), ), ) rows_synced += 1 duration_ms = int((utcnow() - started_at).total_seconds() * 1000) await execute( """INSERT INTO seo_sync_log (source, status, rows_synced, started_at, completed_at, duration_ms) VALUES ('bing', 'success', ?, ?, ?, ?)""", (rows_synced, started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms), ) return rows_synced except Exception as exc: duration_ms = int((utcnow() - started_at).total_seconds() * 1000) await execute( """INSERT INTO seo_sync_log (source, status, rows_synced, error, started_at, completed_at, duration_ms) VALUES ('bing', 'failed', 0, ?, ?, ?, ?)""", (str(exc), started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms), ) raise