prism ingest attempts

from 2 days previous
This commit is contained in:
2026-04-25 13:41:30 -06:00
parent 345816e3d8
commit 71e31c5703
2 changed files with 238 additions and 7 deletions
+4
View File
@@ -129,3 +129,7 @@ RnR/
# Microsoft Office temporary files # Microsoft Office temporary files
~$* ~$*
# Local Prism ingest secrets (see Assets/Scripts/prism_ingest.py)
prism.local.env
Assets/Scripts/prism.local.env
+234 -7
View File
@@ -5,7 +5,14 @@ Push Story/*.md Prism profiles to prism.tal.one via the production API.
Auth (pick one — use environment variables, never commit secrets): Auth (pick one — use environment variables, never commit secrets):
PRISM_API_KEY If set, sends Authorization: Bearer <value> by default. PRISM_API_KEY If set, sends Authorization: Bearer <value> by default.
PRISM_AUTH_HEADER Optional full header value, e.g. "Bearer sk-..." or "ApiKey ...". PRISM_AUTH_HEADER Optional full header value, e.g. "Bearer sk-..." or "ApiKey ...".
PRISM_SESSION_COOKIE Raw session cookie string (same value as browser Cookie "session"). PRISM_SESSION_COOKIE Session only: the cookie *value* (script sends as session=<value>), or a full Cookie
fragment e.g. session=…; XSRF-TOKEN=… (used as-is — do not double-prefix).
Persistent local config (no PowerShell paste each run):
Create Assets/Scripts/prism.local.env (gitignored) with lines like PRISM_ALLOW_WRITE=1, PRISM_SESSION_COOKIE=…,
PRISM_CSRF_TOKEN=… (optional if bootstrap works), PRISM_API_KEY=… (best — skips CSRF entirely).
Or set PRISM_ENV_FILE to another path. Optional CLI: --prism-env-file PATH (must appear before the subcommand).
Real environment variables always override values from the file.
Optional: Optional:
PRISM_BASE_URL Default https://prism.tal.one PRISM_BASE_URL Default https://prism.tal.one
@@ -13,6 +20,9 @@ Optional:
PRISM_ALLOW_WRITE Must be exactly "1" for sync --apply PRISM_ALLOW_WRITE Must be exactly "1" for sync --apply
PRISM_USER_AGENT Override browser-like User-Agent (default: Chrome on Windows) PRISM_USER_AGENT Override browser-like User-Agent (default: Chrome on Windows)
PRISM_ORIGIN Default https://prism.tal.one (Origin + Referer for API calls) PRISM_ORIGIN Default https://prism.tal.one (Origin + Referer for API calls)
PRISM_CSRF_TOKEN If writes return CSRF_ERROR with session cookie auth: paste token from DevTools, or rely on auto-bootstrap below.
PRISM_CSRF_HEADER Header name for that token (default X-CSRFToken — matches /static/js/csrf.js on prism.tal.one).
PRISM_CSRF_SEND_BOTH Set to 1 to also send X-CSRF-Token and X-XSRF-TOKEN with the same value (if a proxy strips one header).
Examples: Examples:
python Assets/Scripts/prism_ingest.py list python Assets/Scripts/prism_ingest.py list
@@ -37,8 +47,12 @@ import re
import sys import sys
import urllib.error import urllib.error
import urllib.request import urllib.request
from urllib.parse import urlparse
from http.cookiejar import CookieJar
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from urllib.parse import unquote
FIELD_BULLET = re.compile(r"^-\s+\*\*(.+?)\*\*:\s*(.*)\s*$") FIELD_BULLET = re.compile(r"^-\s+\*\*(.+?)\*\*:\s*(.*)\s*$")
# Alternate: `- **Label:** value` (colon inside bold — common typo / variant) # Alternate: `- **Label:** value` (colon inside bold — common typo / variant)
@@ -53,6 +67,81 @@ _DEFAULT_UA = (
) )
def _strip_prism_env_file_from_argv() -> Path | None:
"""Remove --prism-env-file PATH from sys.argv; return path if present."""
argv = sys.argv
out: list[str] = [argv[0]]
i = 1
found: Path | None = None
while i < len(argv):
a = argv[i]
if a == "--prism-env-file" and i + 1 < len(argv):
found = Path(argv[i + 1])
i += 2
continue
if a.startswith("--prism-env-file="):
found = Path(a.split("=", 1)[1])
i += 1
continue
out.append(a)
i += 1
if found is not None:
sys.argv[:] = out
return found
def _load_prism_env_file(path: Path, *, warn_missing: bool) -> None:
"""Set os.environ for keys from a simple KEY=value file. Does not override existing env."""
if not path.is_file():
if warn_missing:
print(f"Prism: env file not found: {path}", file=sys.stderr)
return
raw = path.read_text(encoding="utf-8")
for line in raw.splitlines():
s = line.strip()
if not s or s.startswith("#"):
continue
if "=" not in s:
continue
k, _, v = s.partition("=")
k = k.strip()
v = v.strip()
if not k:
continue
if len(v) >= 2 and v[0] == v[-1] and v[0] in "\"'":
v = v[1:-1]
if k not in os.environ:
os.environ[k] = v
def _apply_prism_env_files() -> None:
cli_path = _strip_prism_env_file_from_argv()
explicit = bool(cli_path) or bool(os.environ.get("PRISM_ENV_FILE", "").strip())
path = cli_path
if path is None:
e = os.environ.get("PRISM_ENV_FILE", "").strip()
if e:
path = Path(e)
if path is None:
default = Path(__file__).resolve().parent / "prism.local.env"
if default.is_file():
path = default
explicit = False
if path is not None:
_load_prism_env_file(path, warn_missing=explicit)
def _cookie_header_from_session_env() -> str:
"""Build Cookie header from PRISM_SESSION_COOKIE without duplicating session=."""
s = os.environ.get("PRISM_SESSION_COOKIE", "").strip()
if not s:
return ""
sl = s.lower()
if sl.startswith("session=") or ";" in s or s.count("=") > 1:
return s
return f"session={s}"
def _auth_headers() -> dict[str, str]: def _auth_headers() -> dict[str, str]:
"""Authorization / session only (no browser fingerprint).""" """Authorization / session only (no browser fingerprint)."""
h: dict[str, str] = {} h: dict[str, str] = {}
@@ -68,9 +157,9 @@ def _auth_headers() -> dict[str, str]:
if key: if key:
h["Authorization"] = f"Bearer {key}" h["Authorization"] = f"Bearer {key}"
return h return h
sess = os.environ.get("PRISM_SESSION_COOKIE", "").strip() ck = _cookie_header_from_session_env()
if sess: if ck:
h["Cookie"] = f"session={sess}" h["Cookie"] = ck
return h return h
return h return h
@@ -94,6 +183,93 @@ def _full_headers(*, json_body: bool) -> dict[str, str]:
return h return h
_csrf_extra_cache: dict[str, str] | None = None
_csrf_extra_pid: str | None = None
def _session_cookie_auth() -> bool:
return bool(os.environ.get("PRISM_SESSION_COOKIE", "").strip())
def _api_key_auth() -> bool:
return bool(os.environ.get("PRISM_API_KEY", "").strip() or os.environ.get("PRISM_AUTH_HEADER", "").strip())
def _bootstrap_write_extras(production_id: str) -> dict[str, str]:
"""
Hit a same-origin HTML route so Set-Cookie can attach XSRF (etc.).
Returns extra headers for mutating API calls: optional Cookie merge + CSRF header.
"""
origin = os.environ.get("PRISM_ORIGIN", "https://prism.tal.one").rstrip("/")
paths = (f"/productions/{production_id}", "/productions", "/")
base_h = _full_headers(json_body=False)
base_sess = _cookie_header_from_session_env()
for path in paths:
url = f"{origin}{path}"
jar = CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(jar))
req = urllib.request.Request(url, headers=base_h)
try:
with opener.open(req, timeout=60) as resp:
html = resp.read().decode("utf-8", errors="replace")
except urllib.error.HTTPError as e:
html = e.read().decode("utf-8", errors="replace")
except OSError:
continue
from_jar = [f"{c.name}={c.value}" for c in jar]
xsrf: str | None = None
for c in jar:
norm = re.sub(r"[_-]", "", c.name.lower())
if norm in ("xsrftoken", "csrftoken", "csrf"):
xsrf = unquote(c.value)
break
if not xsrf:
m = re.search(
r'<meta\s+name=["\']csrf-token["\']\s+content=["\']([^"\']+)["\']',
html,
re.I,
)
if m:
xsrf = m.group(1)
if not xsrf:
m = re.search(r'"csrfToken"\s*:\s*"([^"]+)"', html)
if m:
xsrf = m.group(1)
out: dict[str, str] = {}
if from_jar:
tail = "; ".join(from_jar)
out["Cookie"] = f"{base_sess}; {tail}" if base_sess else tail
if xsrf:
hname = os.environ.get("PRISM_CSRF_HEADER", "X-CSRFToken").strip() or "X-CSRFToken"
out[hname] = xsrf
if os.environ.get("PRISM_CSRF_SEND_BOTH") == "1":
for alt in ("X-CSRFToken", "X-CSRF-Token", "X-XSRF-TOKEN"):
if alt.lower() != hname.lower():
out[alt] = xsrf
if out:
return out
return {}
def _csrf_headers_for_write(*, production_id: str) -> dict[str, str]:
global _csrf_extra_cache, _csrf_extra_pid
if _csrf_extra_cache is not None and _csrf_extra_pid == production_id:
return _csrf_extra_cache
out: dict[str, str] = {}
if _api_key_auth():
_csrf_extra_cache, _csrf_extra_pid = out, production_id
return out
hname = os.environ.get("PRISM_CSRF_HEADER", "X-CSRFToken").strip() or "X-CSRFToken"
tok = os.environ.get("PRISM_CSRF_TOKEN", "").strip()
if _session_cookie_auth():
out = _bootstrap_write_extras(production_id)
if tok:
out = dict(out)
out[hname] = tok
_csrf_extra_cache, _csrf_extra_pid = out, production_id
return out
def _req( def _req(
method: str, method: str,
url: str, url: str,
@@ -108,6 +284,17 @@ def _req(
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace") body = e.read().decode("utf-8", errors="replace")
return e.code, body return e.code, body
except urllib.error.URLError as e:
host = urlparse(url).hostname or "?"
reason = repr(e.reason)
hint = ""
if "getaddrinfo" in reason or "11001" in reason or "Name or service not known" in reason:
hint = (
f"\n DNS could not resolve host {host!r}. Fix PRISM_BASE_URL to match the site you open in the "
f'browser (no typos), check network/VPN, then: Resolve-DnsName {host} in PowerShell.'
)
print(f"Request failed {method} {url}\n {e!r}{hint}", file=sys.stderr)
raise SystemExit(1) from e
def _normalize_list_payload(data: Any) -> list[dict[str, Any]]: def _normalize_list_payload(data: Any) -> list[dict[str, Any]]:
@@ -904,6 +1091,24 @@ def cmd_sync(args: argparse.Namespace) -> int:
filt = set(args.only) filt = set(args.only)
paths = [p for p in paths if p.name in filt] paths = [p for p in paths if p.name in filt]
write_json_headers: dict[str, str] | None = None
if args.apply:
write_json_headers = dict(_full_headers(json_body=True))
csrf_extra = _csrf_headers_for_write(production_id=str(pid))
write_json_headers.update(csrf_extra)
web_origin = os.environ.get("PRISM_ORIGIN", base).rstrip("/")
if _session_cookie_auth() and not _api_key_auth():
write_json_headers["Referer"] = os.environ.get(
"PRISM_REFERER",
f"{web_origin}/productions/{pid}/",
)
if _session_cookie_auth() and not _api_key_auth() and not csrf_extra:
print(
"Warning: session auth but CSRF bootstrap found no token; writes may fail with CSRF_ERROR. "
"Set PRISM_CSRF_TOKEN (or add it to prism.local.env) or use PRISM_API_KEY if available.",
file=sys.stderr,
)
for path in paths: for path in paths:
if not args.no_skip_done and path.name in SYNC_SKIP_MANUAL: if not args.no_skip_done and path.name in SYNC_SKIP_MANUAL:
print(f"SKIP manual-done\t{path.name}", file=sys.stderr) print(f"SKIP manual-done\t{path.name}", file=sys.stderr)
@@ -958,18 +1163,39 @@ def cmd_sync(args: argparse.Namespace) -> int:
args.attach_field, args.attach_field,
_attach_value(merged, args.attach_field, blob), _attach_value(merged, args.attach_field, blob),
) )
assert write_json_headers is not None
write_obj = _prism_write_payload(merged) write_obj = _prism_write_payload(merged)
payload = json.dumps(write_obj, ensure_ascii=False).encode("utf-8") payload = json.dumps(write_obj, ensure_ascii=False).encode("utf-8")
pc, pbody = _req("PUT", detail_url, data=payload, headers=_full_headers(json_body=True)) pc, pbody = _req("PUT", detail_url, data=payload, headers=write_json_headers)
if pc == 400 and args.map_md and isinstance(write_obj.get("data"), dict):
flat = json.dumps(write_obj["data"], ensure_ascii=False).encode("utf-8")
pc, pbody = _req("PUT", detail_url, data=flat, headers=write_json_headers)
if pc not in (200, 204): if pc not in (200, 204):
print(
f"{path.name}\tPUT {pc}\t(response {min(len(pbody), 2000)} chars)\t{pbody[:2000]}",
file=sys.stderr,
)
if "CSRF_ERROR" in pbody:
print(
" → CSRF rejected: refresh PRISM_CSRF_TOKEN (meta csrf-token) and ensure PRISM_SESSION_COOKIE "
"matches the same browser session (include cf_clearance if the browser sends it). "
"Or use PRISM_API_KEY. If using prism.local.env, edit that file — real env vars override it.",
file=sys.stderr,
)
patch_body = _minimal_patch(args, write_obj, prism_patch if args.map_md else None) patch_body = _minimal_patch(args, write_obj, prism_patch if args.map_md else None)
pc2, pbody2 = _req( pc2, pbody2 = _req(
"PATCH", "PATCH",
detail_url, detail_url,
data=json.dumps(patch_body, ensure_ascii=False).encode("utf-8"), data=json.dumps(patch_body, ensure_ascii=False).encode("utf-8"),
headers=_full_headers(json_body=True), headers=write_json_headers,
) )
print(f"{path.name}\tPUT {pc}\tPATCH {pc2}\t{pbody2[:500]}", file=sys.stderr) if pc2 not in (200, 204):
print(
f"{path.name}\tPATCH {pc2}\t{pbody2[:2000]}",
file=sys.stderr,
)
else:
print(f"OK\t{path.name}\tPATCH HTTP {pc2}", file=sys.stderr)
else: else:
print(f"OK\t{path.name}\tHTTP {pc}", file=sys.stderr) print(f"OK\t{path.name}\tHTTP {pc}", file=sys.stderr)
return 0 return 0
@@ -1052,6 +1278,7 @@ def main() -> int:
sys.stdout.reconfigure(encoding="utf-8") sys.stdout.reconfigure(encoding="utf-8")
except Exception: except Exception:
pass pass
_apply_prism_env_files()
ap = argparse.ArgumentParser(description="Prism production API helper for Story/*.md profiles.") ap = argparse.ArgumentParser(description="Prism production API helper for Story/*.md profiles.")
sub = ap.add_subparsers(dest="cmd", required=True) sub = ap.add_subparsers(dest="cmd", required=True)