prism ingest attempts

from 2 days previous
This commit is contained in:
2026-04-25 13:41:30 -06:00
parent 345816e3d8
commit 71e31c5703
2 changed files with 238 additions and 7 deletions
+4
View File
@@ -129,3 +129,7 @@ RnR/
# Microsoft Office temporary files
~$*
# Local Prism ingest secrets (see Assets/Scripts/prism_ingest.py)
prism.local.env
Assets/Scripts/prism.local.env
+234 -7
View File
@@ -5,7 +5,14 @@ Push Story/*.md Prism profiles to prism.tal.one via the production API.
Auth (pick one — use environment variables, never commit secrets):
PRISM_API_KEY If set, sends Authorization: Bearer <value> by default.
PRISM_AUTH_HEADER Optional full header value, e.g. "Bearer sk-..." or "ApiKey ...".
PRISM_SESSION_COOKIE Raw session cookie string (same value as browser Cookie "session").
PRISM_SESSION_COOKIE Session only: the cookie *value* (script sends as session=<value>), or a full Cookie
fragment e.g. session=…; XSRF-TOKEN=… (used as-is — do not double-prefix).
Persistent local config (no PowerShell paste each run):
Create Assets/Scripts/prism.local.env (gitignored) with lines like PRISM_ALLOW_WRITE=1, PRISM_SESSION_COOKIE=…,
PRISM_CSRF_TOKEN=… (optional if bootstrap works), PRISM_API_KEY=… (best — skips CSRF entirely).
Or set PRISM_ENV_FILE to another path. Optional CLI: --prism-env-file PATH (must appear before the subcommand).
Real environment variables always override values from the file.
Optional:
PRISM_BASE_URL Default https://prism.tal.one
@@ -13,6 +20,9 @@ Optional:
PRISM_ALLOW_WRITE Must be exactly "1" for sync --apply
PRISM_USER_AGENT Override browser-like User-Agent (default: Chrome on Windows)
PRISM_ORIGIN Default https://prism.tal.one (Origin + Referer for API calls)
PRISM_CSRF_TOKEN If writes return CSRF_ERROR with session cookie auth: paste token from DevTools, or rely on auto-bootstrap below.
PRISM_CSRF_HEADER Header name for that token (default X-CSRFToken — matches /static/js/csrf.js on prism.tal.one).
PRISM_CSRF_SEND_BOTH Set to 1 to also send X-CSRF-Token and X-XSRF-TOKEN with the same value (if a proxy strips one header).
Examples:
python Assets/Scripts/prism_ingest.py list
@@ -37,8 +47,12 @@ import re
import sys
import urllib.error
import urllib.request
from urllib.parse import urlparse
from http.cookiejar import CookieJar
from pathlib import Path
from typing import Any
from urllib.parse import unquote
FIELD_BULLET = re.compile(r"^-\s+\*\*(.+?)\*\*:\s*(.*)\s*$")
# Alternate: `- **Label:** value` (colon inside bold — common typo / variant)
@@ -53,6 +67,81 @@ _DEFAULT_UA = (
)
def _strip_prism_env_file_from_argv() -> Path | None:
"""Remove --prism-env-file PATH from sys.argv; return path if present."""
argv = sys.argv
out: list[str] = [argv[0]]
i = 1
found: Path | None = None
while i < len(argv):
a = argv[i]
if a == "--prism-env-file" and i + 1 < len(argv):
found = Path(argv[i + 1])
i += 2
continue
if a.startswith("--prism-env-file="):
found = Path(a.split("=", 1)[1])
i += 1
continue
out.append(a)
i += 1
if found is not None:
sys.argv[:] = out
return found
def _load_prism_env_file(path: Path, *, warn_missing: bool) -> None:
"""Set os.environ for keys from a simple KEY=value file. Does not override existing env."""
if not path.is_file():
if warn_missing:
print(f"Prism: env file not found: {path}", file=sys.stderr)
return
raw = path.read_text(encoding="utf-8")
for line in raw.splitlines():
s = line.strip()
if not s or s.startswith("#"):
continue
if "=" not in s:
continue
k, _, v = s.partition("=")
k = k.strip()
v = v.strip()
if not k:
continue
if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
v = v[1:-1]
if k not in os.environ:
os.environ[k] = v
def _apply_prism_env_files() -> None:
cli_path = _strip_prism_env_file_from_argv()
explicit = bool(cli_path) or bool(os.environ.get("PRISM_ENV_FILE", "").strip())
path = cli_path
if path is None:
e = os.environ.get("PRISM_ENV_FILE", "").strip()
if e:
path = Path(e)
if path is None:
default = Path(__file__).resolve().parent / "prism.local.env"
if default.is_file():
path = default
explicit = False
if path is not None:
_load_prism_env_file(path, warn_missing=explicit)
def _cookie_header_from_session_env() -> str:
"""Build Cookie header from PRISM_SESSION_COOKIE without duplicating session=."""
s = os.environ.get("PRISM_SESSION_COOKIE", "").strip()
if not s:
return ""
sl = s.lower()
if sl.startswith("session=") or ";" in s or s.count("=") > 1:
return s
return f"session={s}"
def _auth_headers() -> dict[str, str]:
"""Authorization / session only (no browser fingerprint)."""
h: dict[str, str] = {}
@@ -68,9 +157,9 @@ def _auth_headers() -> dict[str, str]:
if key:
h["Authorization"] = f"Bearer {key}"
return h
sess = os.environ.get("PRISM_SESSION_COOKIE", "").strip()
if sess:
h["Cookie"] = f"session={sess}"
ck = _cookie_header_from_session_env()
if ck:
h["Cookie"] = ck
return h
return h
@@ -94,6 +183,93 @@ def _full_headers(*, json_body: bool) -> dict[str, str]:
return h
_csrf_extra_cache: dict[str, str] | None = None
_csrf_extra_pid: str | None = None
def _session_cookie_auth() -> bool:
return bool(os.environ.get("PRISM_SESSION_COOKIE", "").strip())
def _api_key_auth() -> bool:
return bool(os.environ.get("PRISM_API_KEY", "").strip() or os.environ.get("PRISM_AUTH_HEADER", "").strip())
def _bootstrap_write_extras(production_id: str) -> dict[str, str]:
"""
Hit a same-origin HTML route so Set-Cookie can attach XSRF (etc.).
Returns extra headers for mutating API calls: optional Cookie merge + CSRF header.
"""
origin = os.environ.get("PRISM_ORIGIN", "https://prism.tal.one").rstrip("/")
paths = (f"/productions/{production_id}", "/productions", "/")
base_h = _full_headers(json_body=False)
base_sess = _cookie_header_from_session_env()
for path in paths:
url = f"{origin}{path}"
jar = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(jar))
req = urllib.request.Request(url, headers=base_h)
try:
with opener.open(req, timeout=60) as resp:
html = resp.read().decode("utf-8", errors="replace")
except urllib.error.HTTPError as e:
html = e.read().decode("utf-8", errors="replace")
except OSError:
continue
from_jar = [f"{c.name}={c.value}" for c in jar]
xsrf: str | None = None
for c in jar:
norm = re.sub(r"[_-]", "", c.name.lower())
if norm in ("xsrftoken", "csrftoken", "csrf"):
xsrf = unquote(c.value)
break
if not xsrf:
m = re.search(
r'<meta\s+name=["\']csrf-token["\']\s+content=["\']([^"\']+)["\']',
html,
re.I,
)
if m:
xsrf = m.group(1)
if not xsrf:
m = re.search(r'"csrfToken"\s*:\s*"([^"]+)"', html)
if m:
xsrf = m.group(1)
out: dict[str, str] = {}
if from_jar:
tail = "; ".join(from_jar)
out["Cookie"] = f"{base_sess}; {tail}" if base_sess else tail
if xsrf:
hname = os.environ.get("PRISM_CSRF_HEADER", "X-CSRFToken").strip() or "X-CSRFToken"
out[hname] = xsrf
if os.environ.get("PRISM_CSRF_SEND_BOTH") == "1":
for alt in ("X-CSRFToken", "X-CSRF-Token", "X-XSRF-TOKEN"):
if alt.lower() != hname.lower():
out[alt] = xsrf
if out:
return out
return {}
def _csrf_headers_for_write(*, production_id: str) -> dict[str, str]:
global _csrf_extra_cache, _csrf_extra_pid
if _csrf_extra_cache is not None and _csrf_extra_pid == production_id:
return _csrf_extra_cache
out: dict[str, str] = {}
if _api_key_auth():
_csrf_extra_cache, _csrf_extra_pid = out, production_id
return out
hname = os.environ.get("PRISM_CSRF_HEADER", "X-CSRFToken").strip() or "X-CSRFToken"
tok = os.environ.get("PRISM_CSRF_TOKEN", "").strip()
if _session_cookie_auth():
out = _bootstrap_write_extras(production_id)
if tok:
out = dict(out)
out[hname] = tok
_csrf_extra_cache, _csrf_extra_pid = out, production_id
return out
def _req(
method: str,
url: str,
@@ -108,6 +284,17 @@ def _req(
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
return e.code, body
except urllib.error.URLError as e:
host = urlparse(url).hostname or "?"
reason = repr(e.reason)
hint = ""
if "getaddrinfo" in reason or "11001" in reason or "Name or service not known" in reason:
hint = (
f"\n DNS could not resolve host {host!r}. Fix PRISM_BASE_URL to match the site you open in the "
f'browser (no typos), check network/VPN, then: Resolve-DnsName {host} in PowerShell.'
)
print(f"Request failed {method} {url}\n {e!r}{hint}", file=sys.stderr)
raise SystemExit(1) from e
def _normalize_list_payload(data: Any) -> list[dict[str, Any]]:
@@ -904,6 +1091,24 @@ def cmd_sync(args: argparse.Namespace) -> int:
filt = set(args.only)
paths = [p for p in paths if p.name in filt]
write_json_headers: dict[str, str] | None = None
if args.apply:
write_json_headers = dict(_full_headers(json_body=True))
csrf_extra = _csrf_headers_for_write(production_id=str(pid))
write_json_headers.update(csrf_extra)
web_origin = os.environ.get("PRISM_ORIGIN", base).rstrip("/")
if _session_cookie_auth() and not _api_key_auth():
write_json_headers["Referer"] = os.environ.get(
"PRISM_REFERER",
f"{web_origin}/productions/{pid}/",
)
if _session_cookie_auth() and not _api_key_auth() and not csrf_extra:
print(
"Warning: session auth but CSRF bootstrap found no token; writes may fail with CSRF_ERROR. "
"Set PRISM_CSRF_TOKEN (or add it to prism.local.env) or use PRISM_API_KEY if available.",
file=sys.stderr,
)
for path in paths:
if not args.no_skip_done and path.name in SYNC_SKIP_MANUAL:
print(f"SKIP manual-done\t{path.name}", file=sys.stderr)
@@ -958,18 +1163,39 @@ def cmd_sync(args: argparse.Namespace) -> int:
args.attach_field,
_attach_value(merged, args.attach_field, blob),
)
assert write_json_headers is not None
write_obj = _prism_write_payload(merged)
payload = json.dumps(write_obj, ensure_ascii=False).encode("utf-8")
pc, pbody = _req("PUT", detail_url, data=payload, headers=_full_headers(json_body=True))
pc, pbody = _req("PUT", detail_url, data=payload, headers=write_json_headers)
if pc == 400 and args.map_md and isinstance(write_obj.get("data"), dict):
flat = json.dumps(write_obj["data"], ensure_ascii=False).encode("utf-8")
pc, pbody = _req("PUT", detail_url, data=flat, headers=write_json_headers)
if pc not in (200, 204):
print(
f"{path.name}\tPUT {pc}\t(response {min(len(pbody), 2000)} chars)\t{pbody[:2000]}",
file=sys.stderr,
)
if "CSRF_ERROR" in pbody:
print(
" → CSRF rejected: refresh PRISM_CSRF_TOKEN (meta csrf-token) and ensure PRISM_SESSION_COOKIE "
"matches the same browser session (include cf_clearance if the browser sends it). "
"Or use PRISM_API_KEY. If using prism.local.env, edit that file — real env vars override it.",
file=sys.stderr,
)
patch_body = _minimal_patch(args, write_obj, prism_patch if args.map_md else None)
pc2, pbody2 = _req(
"PATCH",
detail_url,
data=json.dumps(patch_body, ensure_ascii=False).encode("utf-8"),
headers=_full_headers(json_body=True),
headers=write_json_headers,
)
print(f"{path.name}\tPUT {pc}\tPATCH {pc2}\t{pbody2[:500]}", file=sys.stderr)
if pc2 not in (200, 204):
print(
f"{path.name}\tPATCH {pc2}\t{pbody2[:2000]}",
file=sys.stderr,
)
else:
print(f"OK\t{path.name}\tPATCH HTTP {pc2}", file=sys.stderr)
else:
print(f"OK\t{path.name}\tHTTP {pc}", file=sys.stderr)
return 0
@@ -1052,6 +1278,7 @@ def main() -> int:
sys.stdout.reconfigure(encoding="utf-8")
except Exception:
pass
_apply_prism_env_files()
ap = argparse.ArgumentParser(description="Prism production API helper for Story/*.md profiles.")
sub = ap.add_subparsers(dest="cmd", required=True)