From 71e31c570388aec06080e7e084b03ec9f92f08c3 Mon Sep 17 00:00:00 2001 From: Raincloud Date: Sat, 25 Apr 2026 13:41:30 -0600 Subject: [PATCH] prism ingest attempts from 2 days previous --- .gitignore | 4 + Assets/Scripts/prism_ingest.py | 241 ++++++++++++++++++++++++++++++++- 2 files changed, 238 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 258ba39..4bb56a6 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,7 @@ RnR/ # Microsoft Office temporary files ~$* + +# Local Prism ingest secrets (see Assets/Scripts/prism_ingest.py) +prism.local.env +Assets/Scripts/prism.local.env diff --git a/Assets/Scripts/prism_ingest.py b/Assets/Scripts/prism_ingest.py index 53cdb17..a476f0e 100644 --- a/Assets/Scripts/prism_ingest.py +++ b/Assets/Scripts/prism_ingest.py @@ -5,7 +5,14 @@ Push Story/*.md Prism profiles to prism.tal.one via the production API. Auth (pick one — use environment variables, never commit secrets): PRISM_API_KEY If set, sends Authorization: Bearer by default. PRISM_AUTH_HEADER Optional full header value, e.g. "Bearer sk-..." or "ApiKey ...". - PRISM_SESSION_COOKIE Raw session cookie string (same value as browser Cookie "session"). + PRISM_SESSION_COOKIE Session only: the cookie *value* (script sends as session=), or a full Cookie + fragment e.g. session=…; XSRF-TOKEN=… (used as-is — do not double-prefix). + +Persistent local config (no PowerShell paste each run): + Create Assets/Scripts/prism.local.env (gitignored) with lines like PRISM_ALLOW_WRITE=1, PRISM_SESSION_COOKIE=…, + PRISM_CSRF_TOKEN=… (optional if bootstrap works), PRISM_API_KEY=… (best — skips CSRF entirely). + Or set PRISM_ENV_FILE to another path. Optional CLI: --prism-env-file PATH (must appear before the subcommand). + Real environment variables always override values from the file. Optional: PRISM_BASE_URL Default https://prism.tal.one @@ -13,6 +20,9 @@ Optional: PRISM_ALLOW_WRITE Must be exactly "1" for sync --apply PRISM_USER_AGENT Override browser-like User-Agent (default: Chrome on Windows) PRISM_ORIGIN Default https://prism.tal.one (Origin + Referer for API calls) + PRISM_CSRF_TOKEN If writes return CSRF_ERROR with session cookie auth: paste token from DevTools, or rely on auto-bootstrap below. + PRISM_CSRF_HEADER Header name for that token (default X-CSRFToken — matches /static/js/csrf.js on prism.tal.one). + PRISM_CSRF_SEND_BOTH Set to 1 to also send X-CSRF-Token and X-XSRF-TOKEN with the same value (if a proxy strips one header). Examples: python Assets/Scripts/prism_ingest.py list @@ -37,8 +47,12 @@ import re import sys import urllib.error import urllib.request +from urllib.parse import urlparse +from http.cookiejar import CookieJar from pathlib import Path from typing import Any +from urllib.parse import unquote + FIELD_BULLET = re.compile(r"^-\s+\*\*(.+?)\*\*:\s*(.*)\s*$") # Alternate: `- **Label:** value` (colon inside bold — common typo / variant) @@ -53,6 +67,81 @@ _DEFAULT_UA = ( ) +def _strip_prism_env_file_from_argv() -> Path | None: + """Remove --prism-env-file PATH from sys.argv; return path if present.""" + argv = sys.argv + out: list[str] = [argv[0]] + i = 1 + found: Path | None = None + while i < len(argv): + a = argv[i] + if a == "--prism-env-file" and i + 1 < len(argv): + found = Path(argv[i + 1]) + i += 2 + continue + if a.startswith("--prism-env-file="): + found = Path(a.split("=", 1)[1]) + i += 1 + continue + out.append(a) + i += 1 + if found is not None: + sys.argv[:] = out + return found + + +def _load_prism_env_file(path: Path, *, warn_missing: bool) -> None: + """Set os.environ for keys from a simple KEY=value file. Does not override existing env.""" + if not path.is_file(): + if warn_missing: + print(f"Prism: env file not found: {path}", file=sys.stderr) + return + raw = path.read_text(encoding="utf-8") + for line in raw.splitlines(): + s = line.strip() + if not s or s.startswith("#"): + continue + if "=" not in s: + continue + k, _, v = s.partition("=") + k = k.strip() + v = v.strip() + if not k: + continue + if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'": + v = v[1:-1] + if k not in os.environ: + os.environ[k] = v + + +def _apply_prism_env_files() -> None: + cli_path = _strip_prism_env_file_from_argv() + explicit = bool(cli_path) or bool(os.environ.get("PRISM_ENV_FILE", "").strip()) + path = cli_path + if path is None: + e = os.environ.get("PRISM_ENV_FILE", "").strip() + if e: + path = Path(e) + if path is None: + default = Path(__file__).resolve().parent / "prism.local.env" + if default.is_file(): + path = default + explicit = False + if path is not None: + _load_prism_env_file(path, warn_missing=explicit) + + +def _cookie_header_from_session_env() -> str: + """Build Cookie header from PRISM_SESSION_COOKIE without duplicating session=.""" + s = os.environ.get("PRISM_SESSION_COOKIE", "").strip() + if not s: + return "" + sl = s.lower() + if sl.startswith("session=") or ";" in s or s.count("=") > 1: + return s + return f"session={s}" + + def _auth_headers() -> dict[str, str]: """Authorization / session only (no browser fingerprint).""" h: dict[str, str] = {} @@ -68,9 +157,9 @@ def _auth_headers() -> dict[str, str]: if key: h["Authorization"] = f"Bearer {key}" return h - sess = os.environ.get("PRISM_SESSION_COOKIE", "").strip() - if sess: - h["Cookie"] = f"session={sess}" + ck = _cookie_header_from_session_env() + if ck: + h["Cookie"] = ck return h return h @@ -94,6 +183,93 @@ def _full_headers(*, json_body: bool) -> dict[str, str]: return h +_csrf_extra_cache: dict[str, str] | None = None +_csrf_extra_pid: str | None = None + + +def _session_cookie_auth() -> bool: + return bool(os.environ.get("PRISM_SESSION_COOKIE", "").strip()) + + +def _api_key_auth() -> bool: + return bool(os.environ.get("PRISM_API_KEY", "").strip() or os.environ.get("PRISM_AUTH_HEADER", "").strip()) + + +def _bootstrap_write_extras(production_id: str) -> dict[str, str]: + """ + Hit a same-origin HTML route so Set-Cookie can attach XSRF (etc.). + Returns extra headers for mutating API calls: optional Cookie merge + CSRF header. + """ + origin = os.environ.get("PRISM_ORIGIN", "https://prism.tal.one").rstrip("/") + paths = (f"/productions/{production_id}", "/productions", "/") + base_h = _full_headers(json_body=False) + base_sess = _cookie_header_from_session_env() + for path in paths: + url = f"{origin}{path}" + jar = CookieJar() + opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(jar)) + req = urllib.request.Request(url, headers=base_h) + try: + with opener.open(req, timeout=60) as resp: + html = resp.read().decode("utf-8", errors="replace") + except urllib.error.HTTPError as e: + html = e.read().decode("utf-8", errors="replace") + except OSError: + continue + from_jar = [f"{c.name}={c.value}" for c in jar] + xsrf: str | None = None + for c in jar: + norm = re.sub(r"[_-]", "", c.name.lower()) + if norm in ("xsrftoken", "csrftoken", "csrf"): + xsrf = unquote(c.value) + break + if not xsrf: + m = re.search( + r' dict[str, str]: + global _csrf_extra_cache, _csrf_extra_pid + if _csrf_extra_cache is not None and _csrf_extra_pid == production_id: + return _csrf_extra_cache + out: dict[str, str] = {} + if _api_key_auth(): + _csrf_extra_cache, _csrf_extra_pid = out, production_id + return out + hname = os.environ.get("PRISM_CSRF_HEADER", "X-CSRFToken").strip() or "X-CSRFToken" + tok = os.environ.get("PRISM_CSRF_TOKEN", "").strip() + if _session_cookie_auth(): + out = _bootstrap_write_extras(production_id) + if tok: + out = dict(out) + out[hname] = tok + _csrf_extra_cache, _csrf_extra_pid = out, production_id + return out + + def _req( method: str, url: str, @@ -108,6 +284,17 @@ def _req( except urllib.error.HTTPError as e: body = e.read().decode("utf-8", errors="replace") return e.code, body + except urllib.error.URLError as e: + host = urlparse(url).hostname or "?" + reason = repr(e.reason) + hint = "" + if "getaddrinfo" in reason or "11001" in reason or "Name or service not known" in reason: + hint = ( + f"\n DNS could not resolve host {host!r}. Fix PRISM_BASE_URL to match the site you open in the " + f'browser (no typos), check network/VPN, then: Resolve-DnsName {host} in PowerShell.' + ) + print(f"Request failed {method} {url}\n {e!r}{hint}", file=sys.stderr) + raise SystemExit(1) from e def _normalize_list_payload(data: Any) -> list[dict[str, Any]]: @@ -904,6 +1091,24 @@ def cmd_sync(args: argparse.Namespace) -> int: filt = set(args.only) paths = [p for p in paths if p.name in filt] + write_json_headers: dict[str, str] | None = None + if args.apply: + write_json_headers = dict(_full_headers(json_body=True)) + csrf_extra = _csrf_headers_for_write(production_id=str(pid)) + write_json_headers.update(csrf_extra) + web_origin = os.environ.get("PRISM_ORIGIN", base).rstrip("/") + if _session_cookie_auth() and not _api_key_auth(): + write_json_headers["Referer"] = os.environ.get( + "PRISM_REFERER", + f"{web_origin}/productions/{pid}/", + ) + if _session_cookie_auth() and not _api_key_auth() and not csrf_extra: + print( + "Warning: session auth but CSRF bootstrap found no token; writes may fail with CSRF_ERROR. " + "Set PRISM_CSRF_TOKEN (or add it to prism.local.env) or use PRISM_API_KEY if available.", + file=sys.stderr, + ) + for path in paths: if not args.no_skip_done and path.name in SYNC_SKIP_MANUAL: print(f"SKIP manual-done\t{path.name}", file=sys.stderr) @@ -958,18 +1163,39 @@ def cmd_sync(args: argparse.Namespace) -> int: args.attach_field, _attach_value(merged, args.attach_field, blob), ) + assert write_json_headers is not None write_obj = _prism_write_payload(merged) payload = json.dumps(write_obj, ensure_ascii=False).encode("utf-8") - pc, pbody = _req("PUT", detail_url, data=payload, headers=_full_headers(json_body=True)) + pc, pbody = _req("PUT", detail_url, data=payload, headers=write_json_headers) + if pc == 400 and args.map_md and isinstance(write_obj.get("data"), dict): + flat = json.dumps(write_obj["data"], ensure_ascii=False).encode("utf-8") + pc, pbody = _req("PUT", detail_url, data=flat, headers=write_json_headers) if pc not in (200, 204): + print( + f"{path.name}\tPUT {pc}\t(response {min(len(pbody), 2000)} chars)\t{pbody[:2000]}", + file=sys.stderr, + ) + if "CSRF_ERROR" in pbody: + print( + " → CSRF rejected: refresh PRISM_CSRF_TOKEN (meta csrf-token) and ensure PRISM_SESSION_COOKIE " + "matches the same browser session (include cf_clearance if the browser sends it). " + "Or use PRISM_API_KEY. If using prism.local.env, edit that file — real env vars override it.", + file=sys.stderr, + ) patch_body = _minimal_patch(args, write_obj, prism_patch if args.map_md else None) pc2, pbody2 = _req( "PATCH", detail_url, data=json.dumps(patch_body, ensure_ascii=False).encode("utf-8"), - headers=_full_headers(json_body=True), + headers=write_json_headers, ) - print(f"{path.name}\tPUT {pc}\tPATCH {pc2}\t{pbody2[:500]}", file=sys.stderr) + if pc2 not in (200, 204): + print( + f"{path.name}\tPATCH {pc2}\t{pbody2[:2000]}", + file=sys.stderr, + ) + else: + print(f"OK\t{path.name}\tPATCH HTTP {pc2}", file=sys.stderr) else: print(f"OK\t{path.name}\tHTTP {pc}", file=sys.stderr) return 0 @@ -1052,6 +1278,7 @@ def main() -> int: sys.stdout.reconfigure(encoding="utf-8") except Exception: pass + _apply_prism_env_files() ap = argparse.ArgumentParser(description="Prism production API helper for Story/*.md profiles.") sub = ap.add_subparsers(dest="cmd", required=True)