Files
blender-portable-repo/scripts/addons/flamenco/bat/shaman.py
T
2026-03-17 14:58:51 -06:00

545 lines
20 KiB
Python

# SPDX-License-Identifier: GPL-3.0-or-later
"""BAT interface for sending files to the Manager via the Shaman API."""
import email.header
import logging
import random
import platform
from collections import deque
from pathlib import Path, PurePath, PurePosixPath, PureWindowsPath
from typing import TYPE_CHECKING, Optional, Any, Iterable, Iterator
from . import cache, submodules
if TYPE_CHECKING:
from ..manager import ApiClient as _ApiClient
from ..manager.models import (
ShamanCheckoutResult as _ShamanCheckoutResult,
ShamanRequirementsRequest as _ShamanRequirementsRequest,
ShamanFileSpec as _ShamanFileSpec,
)
else:
_ApiClient = object
_ShamanCheckoutResult = object
_ShamanRequirementsRequest = object
_ShamanFileSpec = object
log = logging.getLogger(__name__)
MAX_DEFERRED_PATHS = 8
MAX_FAILED_PATHS = 8
HashableShamanFileSpec = tuple[str, int, str]
"""Tuple of the 'sha', 'size', and 'path' fields of a ShamanFileSpec."""
# Mypy doesn't understand that submodules.pack.Packer exists.
class Packer(submodules.pack.Packer): # type: ignore
"""Creates BAT Packs on a Shaman server."""
def __init__(
self,
blendfile: Path,
project_root: Path,
target: str,
*,
api_client: _ApiClient,
checkout_path: str,
**kwargs: dict[Any, Any],
) -> None:
"""Constructor
:param target: mock target root directory to construct project-relative paths.
"""
super().__init__(blendfile, project_root, target, **kwargs)
self.checkout_path = checkout_path
self.api_client = api_client
self.shaman_transferrer: Optional[Transferrer] = None
# Mypy doesn't understand that submodules.transfer.FileTransferer exists.
def _create_file_transferer(self) -> submodules.transfer.FileTransferer: # type: ignore
self.shaman_transferrer = Transferrer(
self.api_client, self.project, self.checkout_path
)
return self.shaman_transferrer
def _make_target_path(self, target: str) -> PurePath:
return _root_path()
@property
def output_path(self) -> PurePath:
"""The path of the packed blend file in the target directory."""
assert self._output_path is not None
rel_output = self._output_path.relative_to(self._target_path)
out_path: PurePath = self.actual_checkout_path / rel_output
return out_path
@property
def actual_checkout_path(self) -> PurePosixPath:
"""The actual Shaman checkout path.
Only valid after packing is complete. Shaman ensures that the checkout
is unique, and thus the actual path can be different than the requested
one.
"""
assert self.shaman_transferrer is not None
return PurePosixPath(self.shaman_transferrer.checkout_path)
def execute(self):
try:
super().execute()
except Exception as ex:
log.exception("Error communicating with Shaman")
self.abort(str(ex))
self._check_aborted()
class Transferrer(submodules.transfer.FileTransferer): # type: ignore
"""Sends files to a Shaman server."""
class AbortUpload(Exception):
"""Raised from the upload callback to abort an upload."""
def __init__(
self,
api_client: _ApiClient,
local_project_root: Path,
checkout_path: str,
) -> None:
super().__init__()
from ..manager.apis import ShamanApi
self.shaman_api = ShamanApi(api_client)
self.project_root = local_project_root
self.checkout_path = checkout_path
self.log = log.getChild(self.__class__.__name__)
self.uploaded_files = 0
self.uploaded_bytes = 0
# Mapping from the relative path (as used in Shaman requests) to the
# absolute path where we can find the local file. This is typically just
# the same as the relative path (relative to the project root), but can
# also point to a temporary file when it had to be rewritten.
self._rel_to_local_path: dict[str, Path] = {}
# Temporary files that should be deleted before stopping.
self._delete_when_done: list[Path] = []
# noinspection PyBroadException
def run(self) -> None:
try:
self._run()
except Exception as ex:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
self.log.exception("Error transferring files to Shaman")
self.error_set("Unexpected exception transferring files to Shaman: %s" % ex)
finally:
# Delete the files that were supposed to be moved.
for src in self._delete_when_done:
self.delete_file(src)
self._delete_when_done.clear()
def _run(self) -> None:
self.uploaded_files = 0
self.uploaded_bytes = 0
# Construct the Shaman Checkout Definition file.
shaman_file_specs = self._create_checkout_definition()
if not shaman_file_specs:
# An error has already been logged.
return
failed_files = self._upload_missing_files(shaman_file_specs)
if failed_files:
self.log.error("Aborting upload due to too many failures")
self.error_set("Giving up after multiple attempts to upload the files")
return
self.log.info("All files uploaded successfully")
checkout_result = self._request_checkout(shaman_file_specs)
assert checkout_result is not None
# Update our checkout path to match the one received from the Manager.
self.checkout_path = checkout_result.checkout_path
def _upload_missing_files(
self, shaman_file_specs: _ShamanRequirementsRequest
) -> list[_ShamanFileSpec]:
self.log.info("Feeding %d files to the Shaman", len(shaman_file_specs.files))
if self.log.isEnabledFor(logging.INFO):
for spec in shaman_file_specs.files:
self.log.info(" - %s", spec.path)
# Try to upload all the files.
failed_files: set[HashableShamanFileSpec] = set()
max_tries = 50
for try_index in range(max_tries):
# Send the file to the Shaman and see what we still need to send there.
to_upload = self._send_checkout_def_to_shaman(shaman_file_specs)
if to_upload is None:
# An error has already been logged.
return make_file_specs_regular_list(failed_files)
if not to_upload:
break
# Send the files that still need to be sent.
self.log.info("Upload attempt %d", try_index + 1)
failed_files = self._upload_files(to_upload)
if not failed_files:
break
# Having failed paths at this point is expected when multiple
# clients are sending the same files. Instead of retrying on a
# file-by-file basis, we just re-send the checkout definition
# file to the Shaman and obtain a new list of files to upload.
return make_file_specs_regular_list(failed_files)
def _create_checkout_definition(self) -> Optional[_ShamanRequirementsRequest]:
"""Create the checkout definition file for this BAT pack.
:returns: the checkout definition.
If there was an error and file transfer was aborted, the checkout
definition will be empty.
"""
from ..manager.models import (
ShamanRequirementsRequest,
ShamanFileSpec,
)
filespecs: list[ShamanFileSpec] = []
for src, dst, act in self.iter_queue():
try:
checksum = cache.compute_cached_checksum(src)
filesize = src.stat().st_size
relpath = str(_root_path_strip(dst))
filespec = ShamanFileSpec(
sha=checksum,
size=filesize,
path=relpath,
)
if filespec in filespecs:
# FIXME: there is an issue in BAT that some UDIM files are
# reported twice. There is no use asking Shaman to check
# them out twice, so avoid duplicates here for now.
# ShamanFileSpec is not a hashable type, so unfortunately we
# can't use a set() here.
continue
filespecs.append(filespec)
self._rel_to_local_path[relpath] = src
if act == submodules.transfer.Action.MOVE:
self._delete_when_done.append(src)
except Exception:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
msg = "Error transferring %s to %s" % (src, dst)
self.log.exception(msg)
# Put the files to copy back into the queue, and abort. This allows
# the main thread to inspect the queue and see which files were not
# copied. The one we just failed (due to this exception) should also
# be reported there.
self.queue.put((src, dst, act))
self.error_set(msg)
return None
cache.cleanup_cache()
specs: ShamanRequirementsRequest = ShamanRequirementsRequest(files=filespecs)
return specs
def _send_checkout_def_to_shaman(
self,
requirements: _ShamanRequirementsRequest,
) -> Optional[deque[_ShamanFileSpec]]:
"""Send the checkout definition file to the Shaman.
:return: An iterable of file specs that still need to be uploaded, or
None if there was an error.
"""
from ..manager.exceptions import ApiException
from ..manager.models import ShamanRequirementsResponse
try:
resp = self.shaman_api.shaman_checkout_requirements(requirements)
except ApiException as ex:
# TODO: the body should be JSON of a predefined type, parse it to get the actual message.
msg = "Error from Shaman, code %d: %s" % (ex.status, ex.body)
self.log.error(msg)
self.error_set(msg)
return None
assert isinstance(resp, ShamanRequirementsResponse)
to_upload: deque[_ShamanFileSpec] = deque()
for file_spec in resp.files:
if file_spec.path not in self._rel_to_local_path:
msg = (
"Shaman requested path we did not intend to upload: %r" % file_spec
)
self.log.error(msg)
self.error_set(msg)
return None
self.log.debug(" %s: %s", file_spec.status, file_spec.path)
status = file_spec.status.value
if status == "unknown":
to_upload.appendleft(file_spec)
elif status == "uploading":
to_upload.append(file_spec)
else:
msg = "Unknown status in response from Shaman: %r" % file_spec
self.log.error(msg)
self.error_set(msg)
return None
return to_upload
def _upload_files(
self, to_upload: deque[_ShamanFileSpec]
) -> set[HashableShamanFileSpec]:
"""Actually upload the files to Shaman.
Returns the set of files that we did not upload.
"""
if not to_upload:
self.log.info("All files are at the Shaman already")
self.report_transferred(0)
return set()
from ..manager.exceptions import ApiException
failed_specs: set[HashableShamanFileSpec] = set()
deferred_specs: set[HashableShamanFileSpec] = set()
def defer(filespec: _ShamanFileSpec) -> None:
nonlocal to_upload
self.log.info(
" %s deferred (already being uploaded by someone else)", filespec.path
)
deferred_specs.add(make_file_spec_hashable(filespec))
# Instead of deferring this one file, randomize the files to upload.
# This prevents multiple deferrals when someone else is uploading
# files from the same project (because it probably happens alphabetically).
all_files = list(to_upload)
random.shuffle(all_files)
to_upload = deque(all_files)
self.log.info(
"Going to upload %d of %d files",
len(to_upload),
len(self._rel_to_local_path),
)
while to_upload:
# After too many failures, just retry to get a fresh set of files to upload.
if len(failed_specs) > MAX_FAILED_PATHS:
self.log.info("Too many failures, going to abort this iteration")
failed_specs.update(make_file_specs_hashable_gen(to_upload))
return failed_specs
file_spec = to_upload.popleft()
self.log.info(" %s", file_spec.path)
# Pre-flight check. The generated API code will load the entire file into
# memory before sending it to the Shaman. It's faster to do a check at
# Shaman first, to see if we need uploading at all.
check_resp = self.shaman_api.shaman_file_store_check(
checksum=file_spec.sha,
filesize=file_spec.size,
)
if check_resp.status.value == "stored":
self.log.info(" %s: skipping, already on server", file_spec.path)
continue
# Let the Shaman know whether we can defer uploading this file or not.
hashable_file_spec = make_file_spec_hashable(file_spec)
can_defer = bool(
len(deferred_specs) < MAX_DEFERRED_PATHS
and hashable_file_spec not in deferred_specs
and len(to_upload)
)
local_filepath = self._rel_to_local_path[file_spec.path]
filename_header = _encode_original_filename_header(file_spec.path)
try:
with local_filepath.open("rb") as file_reader:
self.shaman_api.shaman_file_store(
checksum=file_spec.sha,
filesize=file_spec.size,
body=file_reader,
x_shaman_can_defer_upload=can_defer,
x_shaman_original_filename=filename_header,
)
except ApiException as ex:
if ex.status == 425:
# Too Early, i.e. defer uploading this file.
self.log.info(
" %s: someone else is uploading this file, deferring",
file_spec.path,
)
defer(file_spec)
continue
elif ex.status == 417:
# Expectation Failed; mismatch of checksum or file size.
msg = "Error from Shaman uploading %s, code %d: %s" % (
file_spec.path,
ex.status,
ex.body,
)
else: # Unknown error
msg = "API exception\nHeaders: %s\nBody: %s\n" % (
ex.headers,
ex.body,
)
self.log.error(msg)
self.error_set(msg)
failed_specs.add(make_file_spec_hashable(file_spec))
return failed_specs
failed_specs.discard(make_file_spec_hashable(file_spec))
self.uploaded_files += 1
file_size = local_filepath.stat().st_size
self.uploaded_bytes += file_size
self.report_transferred(file_size)
if failed_specs:
self.log.info(
"Uploaded %d bytes in %d files so far",
self.uploaded_bytes,
self.uploaded_files,
)
return failed_specs
self.log.info(
"Done uploading %d bytes in %d files",
self.uploaded_bytes,
self.uploaded_files,
)
return set()
def report_transferred(self, bytes_transferred: int) -> None:
if self._abort.is_set():
self.log.warning("Interrupting ongoing upload")
raise self.AbortUpload("interrupting ongoing upload")
super().report_transferred(bytes_transferred)
def _request_checkout(
self, shaman_file_specs: _ShamanRequirementsRequest
) -> Optional[_ShamanCheckoutResult]:
"""Ask the Shaman to create a checkout of this BAT pack."""
if not self.checkout_path:
self.log.warning("NOT requesting checkout at Shaman")
return None
from ..manager.models import ShamanCheckout, ShamanCheckoutResult
from ..manager.exceptions import ApiException
self.log.info(
"Requesting checkout at Shaman for checkout_path=%s", self.checkout_path
)
checkoutRequest = ShamanCheckout(
files=shaman_file_specs.files,
checkout_path=str(self.checkout_path),
)
try:
result: ShamanCheckoutResult = self.shaman_api.shaman_checkout(
checkoutRequest
)
except ApiException as ex:
if ex.status == 424: # Files were missing
msg = "We did not upload some files, checkout aborted"
elif ex.status == 409: # Checkout already exists
msg = "There is already an existing checkout at %s" % self.checkout_path
else: # Unknown error
msg = "API exception\nHeaders: %s\nBody: %s\n" % (
ex.headers,
ex.body,
)
self.log.error(msg)
self.error_set(msg)
return None
self.log.info("Shaman created checkout at %s", result.checkout_path)
return result
def make_file_spec_hashable(spec: _ShamanFileSpec) -> HashableShamanFileSpec:
"""Return a hashable, immutable representation of the given spec."""
return (spec.sha, spec.size, spec.path)
def make_file_spec_regular(hashable_spec: HashableShamanFileSpec) -> _ShamanFileSpec:
"""Convert a hashable filespec into a real one."""
from ..manager.models import ShamanFileSpec
spec: ShamanFileSpec = ShamanFileSpec(*hashable_spec)
return spec
def make_file_specs_hashable_gen(
specs: Iterable[_ShamanFileSpec],
) -> Iterator[HashableShamanFileSpec]:
"""Convert a collection of specifications by generating their hashable representations."""
return (make_file_spec_hashable(spec) for spec in specs)
def make_file_specs_regular_list(
hashable_specs: Iterable[HashableShamanFileSpec],
) -> list[_ShamanFileSpec]:
"""Convert hashable filespecs into a list of real ones."""
return [make_file_spec_regular(spec) for spec in hashable_specs]
def _root_path() -> PurePath:
"""Return an arbitrary root path for the current platform.
When packing, BAT needs to know the "target path", and for Shaman use this
is kind of irrelevant. Any path will do, as long as it's absolute.
"""
if platform.system() == "Windows":
# A path on Windows can only be absolute if it has a drive letter. The
# letter itself doesn't matter, as it'll only be used to compute
# relative paths between various files rooted here.
return PureWindowsPath("X:/")
return PurePosixPath("/")
def _root_path_strip(path: PurePath) -> PurePosixPath:
"""Strip off the leading / (POSIX) and drive letter (Windows).
Note that this is limited to paths of the current platform. In other words,
a `PurePosixPath('X:/path/to/file')` will be returned as-is, as it's
considered relative on a POSIX platform. This is not an issue as this
function is just meant to strip off the platform-specific root path returned
by `_root_path()`.
"""
if path.is_absolute():
return PurePosixPath(*path.parts[1:])
return PurePosixPath(path)
def _encode_original_filename_header(filename: str) -> str:
"""Encode the 'original filename' as valid HTTP Header.
See the specs for the X-Shaman-Original-Filename header in the OpenAPI
operation `shamanFileStore`, defined in flamenco-openapi.yaml.
"""
# This is a no-op when the filename is already in ASCII.
fake_header = email.header.Header()
fake_header.append(filename, charset="utf-8")
return fake_header.encode()