Files
blender-portable-repo/scripts/addons/flamenco/bat_v2/pack_shaman.py
T
Nathan 6c3b78075b work: restore shift+spacebar for media play/pause
maybe put in maya config? idk what funiman's preference is
2026-05-29 14:58:59 -06:00

595 lines
21 KiB
Python

# SPDX-FileCopyrightText: 2026 Blender Authors
# SPDX-License-Identifier: GPL-3.0-or-later
"""BAT v2 packing to a Shaman server."""
from __future__ import annotations
__all__ = ("pack_start",)
import dataclasses
import email.header
import logging
import random
from functools import partial
from pathlib import Path, PurePosixPath
from typing import TYPE_CHECKING, Any, TypeAlias
import bpy
if TYPE_CHECKING:
# _BATPacker: TypeAlias = pack.BATPacker
from ..manager import ApiClient as _ApiClient
from ..manager.apis import ShamanApi as _ShamanApi
from ..manager.models import (
ShamanCheckoutResult as _ShamanCheckoutResult,
)
from ..manager.models import (
ShamanFileSpec as _ShamanFileSpec,
)
from ..manager.models import (
ShamanRequirementsRequest as _ShamanRequirementsRequest,
)
from .submodules.file_usage import FileInfo as _FileInfo
from .submodules.pack import BATPacker as _BATPacker
from .submodules.pack import QueueingExecutor as _QueueingExecutor
else:
_ApiClient = object
_ShamanApi = object
_ShamanCheckoutResult = object
_ShamanRequirementsRequest = object
_ShamanFileSpec = object
_BATPacker = object
_FileInfo = object
_QueueingExecutor = object
log = logging.getLogger(__name__)
MAX_DEFERRED_PATHS = 8
MAX_FAILED_PATHS = 8
HASH_STORAGE_PATH = Path(bpy.app.cachedir) / "flamenco/shaman"
HASH_METHOD = "sha256"
SHAMAN_JOBS_VARIABLE = "{jobs}"
HashableShamanFileSpec = tuple[str, int, str]
"""Tuple of the 'sha', 'size', and 'path' fields of a ShamanFileSpec."""
# Alias some types from blender_asset_tracer so that we can use type annotations
# without having to import from BATv2.
BATPackReporter: TypeAlias = Any
BATPacker: TypeAlias = Any
def pack_start(
project_root: Path,
reporter: BATPackReporter,
*,
use_relative_only: bool,
api_client: _ApiClient,
checkout_path: PurePosixPath,
ignore_globs: set[str] = set(),
) -> BATPacker:
"""Investigate what's needed to create a BAT pack."""
from ..manager.apis import ShamanApi
from .submodules import file_usage, pack
shaman_api = ShamanApi(api_client)
executor = pack.QueueingExecutor()
shaman_transferer = ShamanPacker(shaman_api, checkout_path, executor, reporter)
batpacker = pack.BATPacker(
project_root,
file_usage.Options(
use_relative_only=use_relative_only,
ignore_globs=ignore_globs,
),
reporter,
file_transfer=shaman_transferer,
)
return batpacker
@dataclasses.dataclass
class ShamanUploadProgress:
# When another client is already uploading a file that we also want to
# upload, we defer the file. That way, when we do get around to uploading
# it, the other person may already have finished their upload, saving us
# time.
#
# Mapping from 'path in pack' to the BAT FileInfo and Shaman FileSpec.
deferred: dict[PurePosixPath, tuple[_FileInfo, _ShamanFileSpec]] = (
dataclasses.field(default_factory=dict)
)
# When a file doesn't want to get uploaded, it's stored here to retry. If
# too many files fail, or the retry counter reaches its max, it'll get
# reported as an actual error.
# The string value is the error message.
failures: dict[PurePosixPath, tuple[_FileInfo, _ShamanFileSpec, str]] = (
dataclasses.field(default_factory=dict)
)
retry_counter: int = 0
max_retries: int = 50
def is_deferred(self, relpath_in_pack: PurePosixPath) -> bool:
return relpath_in_pack in self.deferred
@property
def num_deferred(self) -> int:
return len(self.deferred)
@property
def num_failed(self) -> int:
return len(self.failures)
def defer(
self,
bat_file_info: _FileInfo,
shaman_file_spec: _ShamanFileSpec,
) -> None:
# A file should only be deferred once. Once an upload has been deferred,
# the next attempt shouldn't be deferred again.
assert bat_file_info.relpath_in_pack not in self.deferred
self.deferred[bat_file_info.relpath_in_pack] = (bat_file_info, shaman_file_spec)
def failed(
self,
bat_file_info: _FileInfo,
shaman_file_spec: _ShamanFileSpec,
errormsg: str,
) -> None:
# A file should only be added to the 'failures' dict once. When its
# upload is retried, it should be removed from the 'failures' dict first.
assert bat_file_info.relpath_in_pack not in self.failures
self.failures[bat_file_info.relpath_in_pack] = (
bat_file_info,
shaman_file_spec,
errormsg,
)
@dataclasses.dataclass
class ShamanPacker:
shaman_api: _ShamanApi
checkout_path: PurePosixPath
executor: _QueueingExecutor
reporter: BATPackReporter
# Shaman may decide to create the checkout at another path than requested.
# This will be set to the actually-used path on the farm, relative to the
# Shaman's "jobs" directory.
#
# NOTE: It is the checkout path of the job, NOT the path to the blend file.
_checkout_path_final: PurePosixPath | None = None
_source_file_relpath_in_pack: PurePosixPath | None = None
_num_files_to_transfer_total: int = -1
_num_files_to_transfer_done: int = 0
@property
def is_succes(self) -> bool:
"""Return whether the Shaman operation was completed succesfully."""
return bool(self._checkout_path_final)
def start(self, batpacker: _BATPacker) -> None:
files_to_copy = batpacker.all_files_to_copy()
# Initial value is the total number of files to copy. Once the Shaman
# server has told us how many files to submit, this will be adjusted.
self._num_files_to_transfer_total = len(files_to_copy)
self._num_files_to_transfer_done = 0
# Remember where the main blend file sits in the BAT pack.
source_file_info = batpacker.deps_repo.source_file_info()
assert source_file_info.relpath_in_pack is not None
self._source_file_relpath_in_pack = PurePosixPath(
source_file_info.relpath_in_pack.as_posix()
)
self.executor.queue(partial(self._step_queue_hashing, files_to_copy))
def step(self) -> bool:
"""Perform a single step in the Shaman file transfer.
Returns whether there are more steps to do (True) or the process is done (False).
"""
if self.executor.is_done:
return False
self.executor.run_step()
return not self.executor.is_done
def blendfile_location_in_pack(self) -> PurePosixPath:
assert self._checkout_path_final is not None
assert self._source_file_relpath_in_pack is not None
return (
PurePosixPath(SHAMAN_JOBS_VARIABLE)
/ self._checkout_path_final
/ self._source_file_relpath_in_pack
)
def num_files_to_transfer(self) -> tuple[int, int]:
"""Return the number of files that need to be transferred.
This is a tuple [total, done] with the total number of files to
transfer, and the number of transferred files so far.
The number may change during the packing process, as it takes time
for the Shaman protocol to get this information. Or some paths may
turn out to be multiple paths (UDIMs for example).
"""
return self._num_files_to_transfer_total, self._num_files_to_transfer_done
def _step_queue_hashing(self, files_to_copy: dict[Path, _FileInfo]) -> None:
from ..manager.models import ShamanRequirementsRequest
# Shaman Spec that's shared between the queued function calls. They
# can all just append to the same list.
shaman_spec = ShamanRequirementsRequest(files=[])
assert isinstance(shaman_spec, ShamanRequirementsRequest)
# Tracks deferred files and failed uploads.
upload_progress = ShamanUploadProgress()
# Queue up all the hash computations.
for file_info in files_to_copy.values():
self.executor.queue(
partial(
self._step_hash_file,
file_info,
shaman_spec,
)
)
# After the hashes are gathered in 'filespecs', send the spec to Shaman.
self.executor.queue(
partial(
self._step_queue_uploads_of_files,
files_to_copy,
shaman_spec,
upload_progress,
)
)
def _step_hash_file(
self,
file_info: _FileInfo,
shaman_spec: _ShamanRequirementsRequest,
) -> None:
from _bpy_internal import disk_file_hash_service
from ..manager.models import ShamanFileSpec
path_to_pack = file_info.path_to_pack
if not path_to_pack.exists():
# If the file is missing, there's little else to do than reporting
# it as such and continue with the next file.
if path_to_pack == file_info.source_path:
log.info("File missing: %s", path_to_pack)
else:
log.info(
"File missing after rewriting %s to %s",
file_info.source_path,
path_to_pack,
)
self.reporter.on_missing_file(
file_info.source_path, file_info.relpath_in_pack
)
return
# It might be tempting to use the same Disk File Hash Service as BAT's
# path rewriting system is using. However, that only hashes the files
# that need rewriting, and the code below only deals with paths after
# rewriting (or where rewriting was not necessary). That means that
# there is no benefit in sharing the same database.
dfhs = disk_file_hash_service.get_service(HASH_STORAGE_PATH)
checksum = dfhs.get_hash(path_to_pack, HASH_METHOD)
filesize = path_to_pack.stat().st_size
filespec = ShamanFileSpec(
sha=checksum,
size=filesize,
path=str(file_info.relpath_in_pack),
)
assert isinstance(filespec, ShamanFileSpec)
shaman_spec.files.append(filespec)
def _step_queue_uploads_of_files(
self,
files_to_copy: dict[Path, _FileInfo],
shaman_spec: _ShamanRequirementsRequest,
upload_progress: ShamanUploadProgress,
) -> None:
"""Send the spec file to Shaman, and queue file uploads."""
# Query Shaman to figure out which files still need uploading.
to_upload = self._send_spec_to_shaman(shaman_spec)
if to_upload is None:
# Errors have been reported already, so just stop.
return
log.info(
"Feeding %d/%d files to the Shaman", len(to_upload), len(shaman_spec.files)
)
self._num_files_to_transfer_total = len(to_upload)
# Create a mapping from the path in the pack (which is used in
# `filespecs`) to the FileInfo.
path_in_pack_to_abs: dict[str, _FileInfo] = {
str(file_info.relpath_in_pack): file_info
for file_info in files_to_copy.values()
}
# Queue the file uploads.
for index, file_spec in enumerate(to_upload):
file_info = path_in_pack_to_abs[file_spec.path]
is_last_file = index == len(to_upload)
self.executor.queue(
partial(
self._step_queue_upload_file,
file_info,
file_spec,
is_last_file,
upload_progress,
)
)
self.executor.queue(
partial(
self._step_check_upload_success,
files_to_copy,
shaman_spec,
upload_progress,
)
)
def _step_queue_upload_file(
self,
file_info: _FileInfo,
file_spec: _ShamanFileSpec,
is_last_file: bool,
upload_progress: ShamanUploadProgress,
) -> None:
# Pre-flight check. The generated API code will load the entire file
# into memory before sending it to the Shaman. It's faster to do a check
# at Shaman first, to see if we need uploading at all.
check_resp = self.shaman_api.shaman_file_store_check(
checksum=file_spec.sha,
filesize=file_spec.size,
)
if check_resp.status.value == "stored":
log.info(" %s: skipping, already on server", file_spec.path)
return
# Do the 'start' reporting in a separate step, so that the Blender UI
# can be updated for it. The 'done'/'error' reports are done at the end
# of the file upload step, and so these don't need a separate step.
self.executor.queue(partial(self._step_report_upload_file_start, file_info))
self.executor.queue(
partial(
self._step_upload_file,
file_info,
file_spec,
is_last_file,
upload_progress,
)
)
def _step_report_upload_file_start(self, file_info: _FileInfo) -> None:
self.reporter.on_copy_start(file_info.source_path, file_info.relpath_in_pack)
def _step_upload_file(
self,
file_info: _FileInfo,
file_spec: _ShamanFileSpec,
is_last_file: bool,
upload_progress: ShamanUploadProgress,
) -> None:
from ..manager.exceptions import ApiException
# See whether we may be able to defer uploading this file or not.
can_defer = bool(
not is_last_file
and upload_progress.num_deferred < MAX_DEFERRED_PATHS
and not upload_progress.is_deferred(file_info.relpath_in_pack)
)
filename_header = _encode_original_filename_header(file_spec.path)
try:
with file_info.path_to_pack.open("rb") as file_reader:
self.shaman_api.shaman_file_store(
checksum=file_spec.sha,
filesize=file_spec.size,
body=file_reader,
x_shaman_can_defer_upload=can_defer,
x_shaman_original_filename=filename_header,
)
except ApiException as ex:
if ex.status == 425:
# Too Early, i.e. defer uploading this file.
log.info(
" %s: someone else is uploading this file, deferring",
file_spec.path,
)
upload_progress.defer(file_info, file_spec)
return
elif ex.status == 417:
# Expectation Failed; mismatch of checksum or file size.
msg = "Error from Shaman uploading %s, code %d: %s" % (
file_spec.path,
ex.status,
ex.body,
)
else: # Unknown error
msg = "API exception\nHeaders: %s\nBody: %s\n" % (
ex.headers,
ex.body,
)
log.error(msg)
upload_progress.failed(file_info, file_spec, msg)
return
self._num_files_to_transfer_done += 1
self.reporter.on_copy_done(file_info.source_path, file_info.relpath_in_pack)
def _step_check_upload_success(
self,
files_to_copy: dict[Path, _FileInfo],
shaman_spec: _ShamanRequirementsRequest,
upload_progress: ShamanUploadProgress,
) -> None:
"""See if there were any deferred or failed files.
If there were, re-queue the uploading of the remaining files.
Unless the number of retries has been exceeded, in which case the
failures are final.
"""
if upload_progress.num_deferred == 0 and upload_progress.num_failed == 0:
# Nothing left to do, so move on to the next stage.
self.executor.queue(partial(self._step_request_checkout, shaman_spec))
return
upload_progress.retry_counter += 1
if upload_progress.retry_counter >= upload_progress.max_retries:
# Failed uploads have really failed now.
#
# Deferred uploads shouldn't be mentioned, because they only get
# deferred on the first upload attempt. After that, if they fail,
# they get into the failures.
for fileinfo, _, errormsg in upload_progress.failures.values():
self.reporter.on_copy_error(
fileinfo.source_path, fileinfo.relpath_in_pack, errormsg
)
return
# Retry uploading.
self.executor.queue(
partial(
self._step_queue_uploads_of_files,
files_to_copy,
shaman_spec,
upload_progress,
)
)
def _step_request_checkout(self, shaman_spec: _ShamanRequirementsRequest) -> None:
"""Ask the Shaman to create a checkout of this BAT pack."""
assert self.checkout_path
from ..manager.exceptions import ApiException
from ..manager.models import ShamanCheckout, ShamanCheckoutResult
log.info(
"Requesting checkout at Shaman for checkout_path=%s", self.checkout_path
)
checkoutRequest = ShamanCheckout(
files=shaman_spec.files,
checkout_path=str(self.checkout_path),
)
try:
result: ShamanCheckoutResult = self.shaman_api.shaman_checkout(
checkoutRequest
)
except ApiException as ex:
if ex.status == 424: # Files were missing
msg = "We did not upload some files, checkout aborted"
elif ex.status == 409: # Checkout already exists
msg = "There is already an existing checkout at %s" % self.checkout_path
else: # Unknown error
msg = "API exception\nHeaders: %s\nBody: %s\n" % (
ex.headers,
ex.body,
)
log.error(msg)
self.reporter.on_error(msg)
return
log.info("Shaman created checkout at %s", result.checkout_path)
self._checkout_path_final = result.checkout_path
def _send_spec_to_shaman(
self,
requirements: _ShamanRequirementsRequest,
) -> list[_ShamanFileSpec] | None:
"""Send the checkout definition file to the Shaman.
:return: A list of file specs that still need to be uploaded, or
None if there was an error.
"""
from ..manager.exceptions import ApiException
from ..manager.models import ShamanRequirementsResponse
requested_relpaths = {file.path for file in requirements.files}
try:
resp = self.shaman_api.shaman_checkout_requirements(requirements)
except ApiException as ex:
# TODO: the body should be JSON of a predefined type, parse it to get the actual message.
msg = "Error from Shaman, code %d: %s" % (ex.status, ex.body)
log.error(msg)
self.reporter.on_error(msg)
return None
assert isinstance(resp, ShamanRequirementsResponse)
# Go over the response, and create two queues for uploading. Any file
# that's already being uploaded by somebody else will be put in the
# low-priority queue.
to_upload_normal_prio: list[_ShamanFileSpec] = []
to_upload_low_prio: list[_ShamanFileSpec] = []
for file_spec in resp.files:
if file_spec.path not in requested_relpaths:
msg = (
"Shaman requested path we did not intend to upload: %r" % file_spec
)
log.error(msg)
self.reporter.on_error(msg)
return None
log.debug(" %s: %s", file_spec.status, file_spec.path)
status = file_spec.status.value
if status == "unknown":
to_upload_normal_prio.append(file_spec)
elif status == "uploading":
to_upload_low_prio.append(file_spec)
else:
msg = "Unknown status in response from Shaman: %r" % file_spec
log.error(msg)
self.reporter.on_error(msg)
return None
# Randomize the two lists, so that when two clients upload similar sets
# of files, collissions are minimized.
random.shuffle(to_upload_normal_prio)
random.shuffle(to_upload_low_prio)
return to_upload_normal_prio + to_upload_low_prio
def _encode_original_filename_header(filename: str) -> str:
"""Encode the 'original filename' as valid HTTP Header.
See the specs for the X-Shaman-Original-Filename header in the OpenAPI
operation `shamanFileStore`, defined in flamenco-openapi.yaml.
"""
# This is a no-op when the filename is already in ASCII.
fake_header = email.header.Header(maxlinelen=0)
fake_header.append(filename, charset="utf-8")
encoded_header = fake_header.encode()
# Make sure that there are no newlines in the returned value.
# HTTP Header line folding is obsolete, see RFC9112 section 5.2 in
# https://www.rfc-editor.org/rfc/rfc9112#name-obsolete-line-folding
assert "\n" not in encoded_header
return encoded_header