Migrates 15 source files from the deprecated datetime.utcnow() API. Uses utcnow() for in-memory math and utcnow_iso() (strftime format) for SQLite TEXT column writes to preserve lexicographic sort order. Also fixes datetime.utcfromtimestamp() in seo/_bing.py. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
143 lines
5.4 KiB
Python
143 lines
5.4 KiB
Python
"""Bing Webmaster Tools sync via REST API.
|
|
|
|
Uses an API key for auth. Fetches query stats and page stats.
|
|
"""
|
|
|
|
from datetime import UTC, datetime, timedelta
|
|
from urllib.parse import urlparse
|
|
|
|
import httpx
|
|
|
|
from ..core import config, execute, utcnow, utcnow_iso
|
|
|
|
_TIMEOUT_SECONDS = 30
|
|
|
|
|
|
def _normalize_url(full_url: str) -> str:
|
|
"""Strip a full URL to just the path."""
|
|
parsed = urlparse(full_url)
|
|
return parsed.path or "/"
|
|
|
|
|
|
async def sync_bing(days_back: int = 3, timeout_seconds: int = _TIMEOUT_SECONDS) -> int:
|
|
"""Sync Bing Webmaster query stats into seo_search_metrics. Returns rows synced."""
|
|
assert 1 <= days_back <= 90, "days_back must be 1-90"
|
|
assert 1 <= timeout_seconds <= 120, "timeout_seconds must be 1-120"
|
|
|
|
if not config.BING_WEBMASTER_API_KEY or not config.BING_SITE_URL:
|
|
return 0 # Bing not configured — skip silently
|
|
|
|
started_at = utcnow()
|
|
|
|
try:
|
|
rows_synced = 0
|
|
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
|
|
# Fetch query stats for the date range
|
|
response = await client.get(
|
|
"https://ssl.bing.com/webmaster/api.svc/json/GetQueryStats",
|
|
params={
|
|
"apikey": config.BING_WEBMASTER_API_KEY,
|
|
"siteUrl": config.BING_SITE_URL,
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Bing returns {"d": [{"Query": ..., "Date": ..., ...}, ...]}
|
|
entries = data.get("d", []) if isinstance(data, dict) else data
|
|
if not isinstance(entries, list):
|
|
entries = []
|
|
|
|
cutoff = utcnow() - timedelta(days=days_back)
|
|
|
|
for entry in entries:
|
|
# Bing date format: "/Date(1708905600000)/" (ms since epoch)
|
|
date_str = entry.get("Date", "")
|
|
if "/Date(" in date_str:
|
|
ms = int(date_str.split("(")[1].split(")")[0])
|
|
entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC)
|
|
else:
|
|
continue
|
|
|
|
if entry_date < cutoff:
|
|
continue
|
|
|
|
metric_date = entry_date.strftime("%Y-%m-%d")
|
|
query = entry.get("Query", "")
|
|
|
|
await execute(
|
|
"""INSERT OR REPLACE INTO seo_search_metrics
|
|
(source, metric_date, page_url, query, country, device,
|
|
clicks, impressions, ctr, position_avg)
|
|
VALUES ('bing', ?, '/', ?, NULL, NULL, ?, ?, ?, ?)""",
|
|
(
|
|
metric_date, query,
|
|
entry.get("Clicks", 0),
|
|
entry.get("Impressions", 0),
|
|
entry.get("AvgCTR", 0.0),
|
|
entry.get("AvgClickPosition", 0.0),
|
|
),
|
|
)
|
|
rows_synced += 1
|
|
|
|
# Also fetch page-level stats
|
|
page_response = await client.get(
|
|
"https://ssl.bing.com/webmaster/api.svc/json/GetPageStats",
|
|
params={
|
|
"apikey": config.BING_WEBMASTER_API_KEY,
|
|
"siteUrl": config.BING_SITE_URL,
|
|
},
|
|
)
|
|
page_response.raise_for_status()
|
|
page_data = page_response.json()
|
|
|
|
page_entries = page_data.get("d", []) if isinstance(page_data, dict) else page_data
|
|
if not isinstance(page_entries, list):
|
|
page_entries = []
|
|
|
|
for entry in page_entries:
|
|
date_str = entry.get("Date", "")
|
|
if "/Date(" in date_str:
|
|
ms = int(date_str.split("(")[1].split(")")[0])
|
|
entry_date = datetime.fromtimestamp(ms / 1000, tz=UTC)
|
|
else:
|
|
continue
|
|
|
|
if entry_date < cutoff:
|
|
continue
|
|
|
|
metric_date = entry_date.strftime("%Y-%m-%d")
|
|
page_url = _normalize_url(entry.get("Url", "/"))
|
|
|
|
await execute(
|
|
"""INSERT OR REPLACE INTO seo_search_metrics
|
|
(source, metric_date, page_url, query, country, device,
|
|
clicks, impressions, ctr, position_avg)
|
|
VALUES ('bing', ?, ?, '', NULL, NULL, ?, ?, NULL, NULL)""",
|
|
(
|
|
metric_date, page_url,
|
|
entry.get("Clicks", 0),
|
|
entry.get("Impressions", 0),
|
|
),
|
|
)
|
|
rows_synced += 1
|
|
|
|
duration_ms = int((utcnow() - started_at).total_seconds() * 1000)
|
|
await execute(
|
|
"""INSERT INTO seo_sync_log
|
|
(source, status, rows_synced, started_at, completed_at, duration_ms)
|
|
VALUES ('bing', 'success', ?, ?, ?, ?)""",
|
|
(rows_synced, started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms),
|
|
)
|
|
return rows_synced
|
|
|
|
except Exception as exc:
|
|
duration_ms = int((utcnow() - started_at).total_seconds() * 1000)
|
|
await execute(
|
|
"""INSERT INTO seo_sync_log
|
|
(source, status, rows_synced, error, started_at, completed_at, duration_ms)
|
|
VALUES ('bing', 'failed', 0, ?, ?, ?, ?)""",
|
|
(str(exc), started_at.strftime("%Y-%m-%dT%H:%M:%S"), utcnow_iso(), duration_ms),
|
|
)
|
|
raise
|