2025-12-01
This commit is contained in:
@@ -0,0 +1,131 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Provides utility functions for the phonemizer"""
|
||||
|
||||
import os
|
||||
from numbers import Number
|
||||
from pathlib import Path
|
||||
from typing import Union, List, Tuple, Iterable
|
||||
|
||||
import importlib
|
||||
|
||||
|
||||
def cumsum(iterable: Iterable[Number]) -> List[Number]:
|
||||
"""Returns the cumulative sum of the `iterable` as a list"""
|
||||
res = []
|
||||
cumulative = 0
|
||||
for value in iterable:
|
||||
cumulative += value
|
||||
res.append(cumulative)
|
||||
return res
|
||||
|
||||
|
||||
def str2list(text: Union[str, List[str]]) -> List[str]:
|
||||
"""Returns the string `text` as a list of lines, split by \n"""
|
||||
if isinstance(text, str):
|
||||
return text.strip(os.linesep).split(os.linesep)
|
||||
return text
|
||||
|
||||
|
||||
def list2str(text: Union[str, List[str]]) -> str:
|
||||
"""Returns the list of lines `text` as a single string separated by \n"""
|
||||
if isinstance(text, str):
|
||||
return text
|
||||
return os.linesep.join(text)
|
||||
|
||||
|
||||
def chunks(text: Union[str, List[str]], num: int) \
|
||||
-> Tuple[List[List[str]], List[int]]:
|
||||
"""Return a maximum of `num` equally sized chunks of a `text`
|
||||
|
||||
This method is usefull when phonemizing a single text on multiple jobs.
|
||||
|
||||
The exact number of chunks returned is `m = min(num, len(str2list(text)))`.
|
||||
Only the m-1 first chunks have equal size. The last chunk can be longer.
|
||||
The input `text` can be a list or a string. Return a list of `m` strings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text (str or list) : The text to divide in chunks
|
||||
|
||||
num (int) : The number of chunks to build, must be a strictly positive
|
||||
integer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
chunks (list of list of str) : The chunked text with utterances separated
|
||||
by '\n'.
|
||||
|
||||
offsets (list of int) : offset used below to recover the line numbers in
|
||||
the input text wrt the chunks
|
||||
|
||||
"""
|
||||
text: List[str] = str2list(text)
|
||||
size = int(max(1, len(text) / num)) # noqa
|
||||
nchunks = min(num, len(text))
|
||||
|
||||
text_chunks = [
|
||||
text[i * size:(i + 1) * size] for i in range(nchunks - 1)]
|
||||
|
||||
last = text[(nchunks - 1) * size:]
|
||||
if last:
|
||||
text_chunks.append(last)
|
||||
|
||||
offsets = [0] + cumsum((len(c) for c in text_chunks[:-1]))
|
||||
return text_chunks, offsets
|
||||
|
||||
|
||||
def get_package_resource(path: str) -> Path:
|
||||
"""Returns the absolute path to a phonemizer resource file or directory
|
||||
|
||||
The packages resource are stored within the source tree in the
|
||||
'phonemizer/share' directory and, once the package is installed, are moved
|
||||
to another system directory (e.g. /share/phonemizer).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path (str) : the file or directory to get, must be relative to
|
||||
'phonemizer/share'.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError if the required `path` is not found
|
||||
|
||||
Returns
|
||||
-------
|
||||
The absolute path to the required resource as a `pathlib.Path`
|
||||
|
||||
"""
|
||||
try:
|
||||
# new in python-3.9
|
||||
path = importlib.resources.files('phonemizer') / 'share' / path
|
||||
except AttributeError: # pragma: nocover
|
||||
with importlib.resources.path('phonemizer', 'share') as share:
|
||||
path = share / path
|
||||
|
||||
if not path.exists(): # pragma: nocover
|
||||
raise ValueError(f'the requested resource does not exist: {path}')
|
||||
|
||||
return path.resolve()
|
||||
|
||||
|
||||
def version_as_tuple(version: str) -> Tuple[int, ...]:
|
||||
"""Returns a tuple of integers from a version string
|
||||
|
||||
Any '-dev' in version string is ignored. For instance, returns (1, 2, 3)
|
||||
from '1.2.3' or (0, 2) from '0.2-dev'
|
||||
|
||||
"""
|
||||
return tuple(int(v) for v in version.replace('-dev', '').split('.'))
|
||||
Reference in New Issue
Block a user