Files
blender-portable-repo/extensions/.local/lib/python3.11/site-packages/phonemizer/utils.py
T
2026-03-17 14:58:51 -06:00

132 lines
4.1 KiB
Python

# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides utility functions for the phonemizer"""
import os
from numbers import Number
from pathlib import Path
from typing import Union, List, Tuple, Iterable
import importlib
def cumsum(iterable: Iterable[Number]) -> List[Number]:
"""Returns the cumulative sum of the `iterable` as a list"""
res = []
cumulative = 0
for value in iterable:
cumulative += value
res.append(cumulative)
return res
def str2list(text: Union[str, List[str]]) -> List[str]:
"""Returns the string `text` as a list of lines, split by \n"""
if isinstance(text, str):
return text.strip(os.linesep).split(os.linesep)
return text
def list2str(text: Union[str, List[str]]) -> str:
"""Returns the list of lines `text` as a single string separated by \n"""
if isinstance(text, str):
return text
return os.linesep.join(text)
def chunks(text: Union[str, List[str]], num: int) \
-> Tuple[List[List[str]], List[int]]:
"""Return a maximum of `num` equally sized chunks of a `text`
This method is usefull when phonemizing a single text on multiple jobs.
The exact number of chunks returned is `m = min(num, len(str2list(text)))`.
Only the m-1 first chunks have equal size. The last chunk can be longer.
The input `text` can be a list or a string. Return a list of `m` strings.
Parameters
----------
text (str or list) : The text to divide in chunks
num (int) : The number of chunks to build, must be a strictly positive
integer.
Returns
-------
chunks (list of list of str) : The chunked text with utterances separated
by '\n'.
offsets (list of int) : offset used below to recover the line numbers in
the input text wrt the chunks
"""
text: List[str] = str2list(text)
size = int(max(1, len(text) / num)) # noqa
nchunks = min(num, len(text))
text_chunks = [
text[i * size:(i + 1) * size] for i in range(nchunks - 1)]
last = text[(nchunks - 1) * size:]
if last:
text_chunks.append(last)
offsets = [0] + cumsum((len(c) for c in text_chunks[:-1]))
return text_chunks, offsets
def get_package_resource(path: str) -> Path:
"""Returns the absolute path to a phonemizer resource file or directory
The packages resource are stored within the source tree in the
'phonemizer/share' directory and, once the package is installed, are moved
to another system directory (e.g. /share/phonemizer).
Parameters
----------
path (str) : the file or directory to get, must be relative to
'phonemizer/share'.
Raises
------
ValueError if the required `path` is not found
Returns
-------
The absolute path to the required resource as a `pathlib.Path`
"""
try:
# new in python-3.9
path = importlib.resources.files('phonemizer') / 'share' / path
except AttributeError: # pragma: nocover
with importlib.resources.path('phonemizer', 'share') as share:
path = share / path
if not path.exists(): # pragma: nocover
raise ValueError(f'the requested resource does not exist: {path}')
return path.resolve()
def version_as_tuple(version: str) -> Tuple[int, ...]:
"""Returns a tuple of integers from a version string
Any '-dev' in version string is ignored. For instance, returns (1, 2, 3)
from '1.2.3' or (0, 2) from '0.2-dev'
"""
return tuple(int(v) for v in version.replace('-dev', '').split('.'))