Files
padelnomics/web/src/padelnomics/seo/_bing.py
Deeman 5644a1ebf8 fix: replace datetime.utcnow() with utcnow()/utcnow_iso() across all source files
Migrates 15 source files from the deprecated datetime.utcnow() API.
Uses utcnow() for in-memory math and utcnow_iso() (strftime format)
for SQLite TEXT column writes to preserve lexicographic sort order.
Also fixes datetime.utcfromtimestamp() in seo/_bing.py.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 10:22:42 +01:00

143 lines
5.4 KiB
Python

"""Bing Webmaster Tools sync via REST API.
Uses an API key for auth. Fetches query stats and page stats.
"""
from datetime import UTC, datetime, timedelta
from urllib.parse import urlparse
import httpx
from ..core import config, execute, utcnow, utcnow_iso
_TIMEOUT_SECONDS = 30
def _normalize_url(full_url: str) -> str:
"""Strip a full URL to just the path."""
parsed = urlparse(full_url)
return parsed.path or "/"
async def sync_bing(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
"""Sync Bing Webmaster query stats into seo_search_metrics. Returns rows synced."""
assert 1 <= days_back <= 90, "days_back must be 1-90"
assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
if not config.BING_WEBMASTER_API_KEY or not config.BING_SITE_URL:
return 0 # Bing not configured — skip silently
started_at = utcnow()
try:
rows_synced = 0
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
# Fetch query stats for the date range
response = await client.get(
"https://ssl.bing.com/webmaster/api.svc/json/GetQueryStats",
params={
"apikey": config.BING_WEBMASTER_API_KEY,
"siteUrl": config.BING_SITE_URL,
},
)
response.raise_for_status()
data = response.json()
# Bing returns {"d": [{"Query": ..., "Date": ..., ...}, ...]}
entries = data.get("d", []) if isinstance(data, dict) else data
if not isinstance(entries, list):
entries = []
cutoff = utcnow() - timedelta(days=days_back)
for entry in entries:
# Bing date format: "/Date(1708905600000)/" (ms since epoch)
date_str = entry.get("Date", "")
if "/Date(" in date_str:
ms = int(date_str.split("(")[1].split(")")[0])
entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC)
else:
continue
if entry_date < cutoff:
continue
metric_date = entry_date.strftime("%Y-%m-%d")
query = entry.get("Query", "")
await execute(
"""INSERT OR REPLACE INTO seo_search_metrics
(source, metric_date, page_url, query, country, device,
clicks, impressions, ctr, position_avg)
VALUES ('bing', ?, '/', ?, NULL, NULL, ?, ?, ?, ?)""",
(
metric_date, query,
entry.get("Clicks", 0),
entry.get("Impressions", 0),
entry.get("AvgCTR", 0.0),
entry.get("AvgClickPosition", 0.0),
),
)
rows_synced += 1
# Also fetch page-level stats
page_response = await client.get(
"https://ssl.bing.com/webmaster/api.svc/json/GetPageStats",
params={
"apikey": config.BING_WEBMASTER_API_KEY,
"siteUrl": config.BING_SITE_URL,
},
)
page_response.raise_for_status()
page_data = page_response.json()
page_entries = page_data.get("d", []) if isinstance(page_data, dict) else page_data
if not isinstance(page_entries, list):
page_entries = []
for entry in page_entries:
date_str = entry.get("Date", "")
if "/Date(" in date_str:
ms = int(date_str.split("(")[1].split(")")[0])
entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC)
else:
continue
if entry_date < cutoff:
continue
metric_date = entry_date.strftime("%Y-%m-%d")
page_url = _normalize_url(entry.get("Url", "/"))
await execute(
"""INSERT OR REPLACE INTO seo_search_metrics
(source, metric_date, page_url, query, country, device,
clicks, impressions, ctr, position_avg)
VALUES ('bing', ?, ?, '', NULL, NULL, ?, ?, NULL, NULL)""",
(
metric_date, page_url,
entry.get("Clicks", 0),
entry.get("Impressions", 0),
),
)
rows_synced += 1
duration_ms = int((utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, started_at, completed_at, duration_ms)
VALUES ('bing', 'success', ?, ?, ?, ?)""",
(rows_synced, started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms),
)
return rows_synced
except Exception as exc:
duration_ms = int((utcnow() - started_at).total_seconds() * 1000)
await execute(
"""INSERT INTO seo_sync_log
(source, status, rows_synced, error, started_at, completed_at, duration_ms)
VALUES ('bing', 'failed', 0, ?, ?, ?, ?)""",
(str(exc), started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms),
)
raise