252 lines
9.8 KiB
Python
252 lines
9.8 KiB
Python
from __future__ import annotations
|
|
|
|
import importlib.resources
|
|
import json
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from urllib.parse import quote
|
|
|
|
|
|
SENTINEL_ORIGIN = "https://sentinel.openai.com"
|
|
SENTINEL_BOOTSTRAP_URL = f"{SENTINEL_ORIGIN}/backend-api/sentinel/sdk.js"
|
|
SDK_VERSION_RE = re.compile(r"script\.src\s*=\s*'https://sentinel\.openai\.com/sentinel/([^']+)/sdk\.js'")
|
|
|
|
|
|
class SentinelSolveError(RuntimeError):
|
|
pass
|
|
|
|
|
|
class SentinelSolver:
|
|
def __init__(
|
|
self,
|
|
http,
|
|
*,
|
|
node_binary: str = "node",
|
|
runner_path: str | Path | None = None,
|
|
timeout_seconds: float = 30.0,
|
|
) -> None:
|
|
self.http = http
|
|
self.node_binary = node_binary
|
|
self.runner_path = Path(runner_path) if runner_path else self._default_runner_path()
|
|
self.timeout_seconds = timeout_seconds
|
|
self._flow_cache: dict[str, dict[str, Any]] = {}
|
|
|
|
def build_token(self, flow: str) -> str:
|
|
cached = self._prepare_flow(flow)
|
|
chat_req = cached["chat_req"]
|
|
env = cached["env"]
|
|
proof = cached["prepare_proof"]
|
|
turnstile = chat_req.get("turnstile") if isinstance(chat_req, dict) else None
|
|
dx = turnstile.get("dx") if isinstance(turnstile, dict) else None
|
|
encoded_turnstile = None
|
|
if dx:
|
|
encoded_turnstile = self._run_node("turnstile", {**env, "p": proof, "dx": dx})["t"]
|
|
if not isinstance(encoded_turnstile, str) or not encoded_turnstile:
|
|
raise SentinelSolveError(
|
|
f"sentinel turnstile runner returned invalid result: {encoded_turnstile!r}"
|
|
)
|
|
enforcement_proof = self._run_node("enforcement", {**env, "chat_req": chat_req})["p"]
|
|
if not isinstance(enforcement_proof, str) or not enforcement_proof:
|
|
raise SentinelSolveError(
|
|
f"sentinel enforcement runner returned invalid proof: {enforcement_proof!r}"
|
|
)
|
|
|
|
token: dict[str, Any] = {
|
|
"p": enforcement_proof,
|
|
"t": encoded_turnstile,
|
|
"c": chat_req.get("token") if isinstance(chat_req, dict) else None,
|
|
"flow": flow,
|
|
}
|
|
did = self._cookie_value("oai-did") or getattr(self.http, "device_id", "")
|
|
if did:
|
|
token["id"] = did
|
|
return json.dumps(token, separators=(",", ":"))
|
|
|
|
def build_session_observer_token(self, flow: str) -> str:
|
|
cached = self._prepare_flow(flow)
|
|
chat_req = cached["chat_req"]
|
|
so_payload = chat_req.get("so") if isinstance(chat_req, dict) else None
|
|
snapshot_dx = so_payload.get("snapshot_dx") if isinstance(so_payload, dict) else None
|
|
if not snapshot_dx:
|
|
raise SentinelSolveError(f"sentinel session observer snapshot missing for flow {flow!r}")
|
|
|
|
so_value = self._run_node(
|
|
"session-observer",
|
|
{
|
|
**cached["env"],
|
|
"flow": flow,
|
|
"c": chat_req.get("token") if isinstance(chat_req, dict) else None,
|
|
"snapshot_dx": snapshot_dx,
|
|
"chat_req": chat_req,
|
|
},
|
|
)["so"]
|
|
if not isinstance(so_value, str) or not so_value:
|
|
raise SentinelSolveError(
|
|
f"sentinel session observer runner returned invalid payload: {so_value!r}"
|
|
)
|
|
|
|
token: dict[str, Any] = {
|
|
"so": so_value,
|
|
"c": chat_req.get("token") if isinstance(chat_req, dict) else None,
|
|
"flow": flow,
|
|
}
|
|
did = self._cookie_value("oai-did") or getattr(self.http, "device_id", "")
|
|
if did:
|
|
token["id"] = did
|
|
return json.dumps(token, separators=(",", ":"))
|
|
|
|
def _fetch_sdk_version(self) -> str:
|
|
response = self.http.request("GET", SENTINEL_BOOTSTRAP_URL)
|
|
text = getattr(response, "text", "") or ""
|
|
match = SDK_VERSION_RE.search(text)
|
|
if not match:
|
|
raise SentinelSolveError("failed to parse active sentinel SDK version")
|
|
return match.group(1)
|
|
|
|
def _fetch_req_payload(self, flow: str, proof: str, sdk_version: str) -> dict[str, Any]:
|
|
payload: dict[str, Any] = {"p": proof, "flow": flow}
|
|
did = self._cookie_value("oai-did") or getattr(self.http, "device_id", "")
|
|
if did:
|
|
payload["id"] = did
|
|
|
|
frame_url = f"{SENTINEL_ORIGIN}/backend-api/sentinel/frame.html?sv={quote(sdk_version)}"
|
|
self._warm_frame(frame_url)
|
|
response = self.http.request(
|
|
"POST",
|
|
f"{SENTINEL_ORIGIN}/backend-api/sentinel/req",
|
|
data=json.dumps(payload),
|
|
headers={
|
|
"Content-Type": "text/plain;charset=UTF-8",
|
|
"Accept": "*/*",
|
|
"Origin": SENTINEL_ORIGIN,
|
|
"Referer": frame_url,
|
|
},
|
|
)
|
|
if getattr(response, "status_code", 0) != 200:
|
|
raise SentinelSolveError(
|
|
f"sentinel req failed: {getattr(response, 'status_code', 'unknown')} {self._response_excerpt(response)}"
|
|
)
|
|
try:
|
|
data = response.json()
|
|
except Exception as error: # pragma: no cover - defensive
|
|
raise SentinelSolveError(f"sentinel req returned non-JSON response: {error}") from error
|
|
if not isinstance(data, dict):
|
|
raise SentinelSolveError(f"sentinel req returned invalid payload: {data!r}")
|
|
return data
|
|
|
|
def _prepare_flow(self, flow: str) -> dict[str, Any]:
|
|
cached = self._flow_cache.get(flow)
|
|
if cached:
|
|
return cached
|
|
|
|
version = self._fetch_sdk_version()
|
|
env = self._node_environment(version)
|
|
proof = self._run_node("prepare", env)["p"]
|
|
if not isinstance(proof, str) or not proof:
|
|
raise SentinelSolveError(f"sentinel prepare returned invalid proof: {proof!r}")
|
|
|
|
chat_req = self._fetch_req_payload(flow, proof, version)
|
|
cached = {
|
|
"sdk_version": version,
|
|
"env": env,
|
|
"prepare_proof": proof,
|
|
"chat_req": chat_req,
|
|
}
|
|
self._flow_cache[flow] = cached
|
|
return cached
|
|
|
|
def _node_environment(self, sdk_version: str) -> dict[str, Any]:
|
|
sdk_url = f"{SENTINEL_ORIGIN}/sentinel/{sdk_version}/sdk.js"
|
|
accept_language = self._session_header("Accept-Language")
|
|
return {
|
|
"sdk_version": sdk_version,
|
|
"sdk_url": sdk_url,
|
|
"sdk_bootstrap_url": SENTINEL_BOOTSTRAP_URL,
|
|
"sdk_version_url": sdk_url,
|
|
"location_search_prepare": f"?sv={sdk_version}",
|
|
"location_search_enforcement": "",
|
|
"user_agent": self._session_header("User-Agent"),
|
|
"accept_language": accept_language,
|
|
"hardware_concurrency": 8,
|
|
"screen_width": 1920,
|
|
"screen_height": 1080,
|
|
"js_heap_size_limit": 4294967296,
|
|
}
|
|
|
|
def _run_node(self, mode: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
command = [self.node_binary, str(self.runner_path), mode]
|
|
try:
|
|
completed = subprocess.run(
|
|
command,
|
|
input=json.dumps(payload),
|
|
text=True,
|
|
capture_output=True,
|
|
timeout=self.timeout_seconds,
|
|
check=False,
|
|
)
|
|
except FileNotFoundError as error:
|
|
raise SentinelSolveError(f"Node.js executable not found: {self.node_binary}") from error
|
|
except subprocess.TimeoutExpired as error:
|
|
raise SentinelSolveError(f"sentinel {mode} timed out") from error
|
|
|
|
if completed.returncode != 0:
|
|
stderr = (completed.stderr or "").strip()
|
|
stdout = (completed.stdout or "").strip()
|
|
detail = stderr or stdout or f"exit code {completed.returncode}"
|
|
raise SentinelSolveError(f"sentinel {mode} failed: {detail}")
|
|
|
|
stdout = (completed.stdout or "").strip()
|
|
if not stdout:
|
|
raise SentinelSolveError(f"sentinel {mode} returned empty stdout")
|
|
try:
|
|
data = json.loads(stdout)
|
|
except json.JSONDecodeError as error:
|
|
raise SentinelSolveError(f"sentinel {mode} returned invalid JSON: {stdout!r}") from error
|
|
if not isinstance(data, dict):
|
|
raise SentinelSolveError(f"sentinel {mode} returned invalid payload: {data!r}")
|
|
return data
|
|
|
|
def _cookie_value(self, name: str) -> str:
|
|
session = getattr(self.http, "session", None)
|
|
cookies = getattr(session, "cookies", None)
|
|
jar = getattr(cookies, "jar", None)
|
|
if not jar:
|
|
return ""
|
|
for cookie in jar:
|
|
if getattr(cookie, "name", "") == name:
|
|
return getattr(cookie, "value", "")
|
|
return ""
|
|
|
|
def _session_header(self, name: str) -> str:
|
|
session = getattr(self.http, "session", None)
|
|
headers = getattr(session, "headers", {}) or {}
|
|
return headers.get(name, "")
|
|
|
|
def _warm_frame(self, frame_url: str) -> None:
|
|
response = self.http.request(
|
|
"GET",
|
|
frame_url,
|
|
headers={
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
"Referer": SENTINEL_BOOTSTRAP_URL,
|
|
},
|
|
)
|
|
if getattr(response, "status_code", 0) not in {200, 204}:
|
|
raise SentinelSolveError(
|
|
f"sentinel frame warmup failed: {getattr(response, 'status_code', 'unknown')} {self._response_excerpt(response)}"
|
|
)
|
|
|
|
@staticmethod
|
|
def _response_excerpt(response, limit: int = 300) -> str:
|
|
text = getattr(response, "text", "") or ""
|
|
return text.replace("\n", " ").replace("\r", " ")[:limit]
|
|
|
|
@staticmethod
|
|
def _default_runner_path() -> Path:
|
|
try:
|
|
return Path(importlib.resources.files(__package__).joinpath("sentinel_runner.js"))
|
|
except Exception:
|
|
return Path(__file__).with_name("sentinel_runner.js")
|