Files
blender-portable-repo/extensions/.local/lib/python3.11/site-packages/phonemizer/phonemize.py
T
2026-03-17 14:58:51 -06:00

329 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides the phonemize function
To use it in your own code, type:
from phonemizer import phonemize
"""
import os
import sys
from logging import Logger
from typing import Optional, Union, List, Pattern
from typing_extensions import Literal
from phonemizer.backend import BACKENDS
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.espeak.language_switch import LanguageSwitch
from phonemizer.backend.espeak.words_mismatch import WordMismatch
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import default_separator, Separator
from phonemizer.utils import list2str, str2list
Backend = Literal['espeak', 'espeak-mbrola', 'festival', 'segments']
def phonemize( # pylint: disable=too-many-arguments
text,
language: str = 'en-us',
backend: Backend = 'espeak',
separator: Optional[Separator] = default_separator,
strip: bool = False,
prepend_text: bool = False,
preserve_empty_lines: bool = False,
preserve_punctuation: bool = False,
punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
with_stress: bool = False,
tie: Union[bool, str] = False,
language_switch: LanguageSwitch = 'keep-flags',
words_mismatch: WordMismatch = 'ignore',
njobs: int = 1,
logger: Logger = get_logger()):
"""Multilingual text to phonemes converter
Return a phonemized version of an input `text`, given its `language` and a
phonemization `backend`.
Note
----
To improve the processing speed it is better to minimize the calls to this
function: provide the input text as a list and call phonemize() a single
time is much more efficient than calling it on each element of the list.
Indeed the initialization of the phonemization backend can be expensive,
especially for espeak. In one example,
Do this:
>>> text = [line1, line2, ...]
>>> phonemize(text, ...)
Not this:
>>> for line in text:
>>> phonemize(line, ...)
Parameters
----------
text: str or list of str
The text to be phonemized. Any empty line will
be ignored. If ``text`` is an str, it can be multiline (lines being
separated by ``\\n``). If ``text`` is a list, each element is considered as a
separated line. Each line is considered as a text utterance.
language: str
The language code of the input text, must be supported by
the backend. If ``backend`` is 'segments', the language can be a file with
a grapheme to phoneme mapping.
backend: str, optional
The software backend to use for phonemization,
must be 'festival' (US English only is supported, coded 'en-us'),
'espeak', 'espeak-mbrola' or 'segments'.
separator: Separator
string separators between phonemes, syllables and
words, default to separator.default_separator. Syllable separator is
considered only for the festival backend. Word separator is ignored by
the 'espeak-mbrola' backend. Initialize it as follows:
>>> from phonemizer.separator import Separator
>>> separator = Separator(phone='-', word=' ')
strip: bool, optional
If True, don't output the last word and phone
separators of a token, default to False.
prepend_text: bool, optional
When True, returns a pair (input utterance,
phonemized utterance) for each line of the input text. When False,
returns only the phonemized utterances. Default to False
preserve_empty_lines: bool, optional
When True, will keep the empty lines
in the phonemized output. Default to False and remove all empty lines.
preserve_punctuation: bool, optional
When True, will keep the punctuation
in the phonemized output. Not supported by the 'espeak-mbrola' backend.
Default to False and remove all the punctuation.
punctuation_marks: str or re.Pattern, optional
The punctuation marks to consider when dealing with punctuation,
either for removal or preservation. Can be defined as a string or regular expression.
Default to Punctuation.default_marks().
with_stress: bool, optional
This option is only valid for the 'espeak'
backend. When True the stresses on phonemes are present (stresses
characters are ˈ'ˌ). When False stresses are removed. Default to False.
tie: bool or char, optional
This option is only valid for the 'espeak'
backend with espeak>=1.49. It is incompatible with phone separator. When
not False, use a tie character within multi-letter phoneme names. When
True, the char 'U+361' is used (as in d͡ʒ), 'z' means ZWJ character,
default to False.
language_switch: str, optional
Espeak can output some words in another
language (typically English) when phonemizing a text. This option setups
the policy to use when such a language switch occurs. Three values are
available : 'keep-flags' (the default), 'remove-flags' or
'remove-utterance'. The 'keep-flags' policy keeps the language switching
flags, for example "(en) or (jp)", in the output. The 'remove-flags'
policy removes them and the 'remove-utterance' policy removes the whole
line of text including a language switch. This option is only valid for
the 'espeak' backend.
words_mismatch: str, optional
Espeak can join two consecutive words or
drop some words, yielding a word count mismatch between orthographic and
phonemized text. This option setups the policy to use when such a words
count mismatch occurs. Three values are available: 'ignore' (the default)
which do nothing, 'warn' which issue a warning for each mismatched line,
and 'remove' which remove the mismatched lines from the output.
njobs: int
The number of parallel jobs to launch. The input text is split
in ``njobs`` parts, phonemized on parallel instances of the backend and the
outputs are finally collapsed.
logger: logging.Logger
the logging instance where to send messages. If
not specified, use the default system logger.
Returns
-------
phonemized text: str or list of str
The input ``text`` phonemized for the
given ``language`` and ``backend``. The returned value has the same type of
the input text (either a list or a string), excepted if ``prepend_input``
is True where the output is forced as a list of pairs (input_text,
phonemized text).
Raises
------
RuntimeError
if the ``backend`` is not valid or is valid but not installed,
if the ``language`` is not supported by the ``backend``, if any incompatible options are used.
"""
# ensure we are using a compatible Python version
if sys.version_info < (3, 6): # pragma: nocover
logger.error(
'Your are using python-%s which is unsupported by the phonemizer, '
'please update to python>=3.6', ".".join(sys.version_info))
# ensure the arguments are valid
_check_arguments(
backend, with_stress, tie, separator, language_switch, words_mismatch)
# preserve_punctuation and word separator not valid for espeak-mbrola
if backend == 'espeak-mbrola' and preserve_punctuation:
logger.warning('espeak-mbrola backend cannot preserve punctuation')
if backend == 'espeak-mbrola' and separator.word:
logger.warning('espeak-mbrola backend cannot preserve word separation')
# initialize the phonemization backend
if backend == 'espeak':
phonemizer = BACKENDS[backend](
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
with_stress=with_stress,
tie=tie,
language_switch=language_switch,
words_mismatch=words_mismatch,
logger=logger)
elif backend == 'espeak-mbrola':
phonemizer = BACKENDS[backend](
language,
logger=logger)
else: # festival or segments
phonemizer = BACKENDS[backend](
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
# do the phonemization
return _phonemize(phonemizer, text, separator, strip, njobs, prepend_text, preserve_empty_lines)
def _check_arguments( # pylint: disable=too-many-arguments
backend: Backend,
with_stress: bool,
tie: Union[bool, str],
separator: Separator,
language_switch: LanguageSwitch,
words_mismatch: WordMismatch):
"""Auxiliary function to phonemize()
Ensures the parameters are compatible with each other, raises a
RuntimeError the first encountered error.
"""
# ensure the backend is either espeak, festival or segments
if backend not in ('espeak', 'espeak-mbrola', 'festival', 'segments'):
raise RuntimeError(
'{} is not a supported backend, choose in {}.'
.format(backend, ', '.join(
('espeak', 'espeak-mbrola', 'festival', 'segments'))))
# with_stress option only valid for espeak
if with_stress and backend != 'espeak':
raise RuntimeError(
'the "with_stress" option is available for espeak backend only, '
'but you are using {} backend'.format(backend))
# tie option only valid for espeak
if tie and backend != 'espeak':
raise RuntimeError(
'the "tie" option is available for espeak backend only, '
'but you are using {} backend'.format(backend))
# tie option incompatible with phone separator
if tie and separator.phone:
raise RuntimeError(
'the "tie" option is incompatible with phone separator '
f'(which is "{separator.phone}")')
# language_switch option only valid for espeak
if language_switch != 'keep-flags' and backend != 'espeak':
raise RuntimeError(
'the "language_switch" option is available for espeak backend '
'only, but you are using {} backend'.format(backend))
# words_mismatch option only valid for espeak
if words_mismatch != 'ignore' and backend != 'espeak':
raise RuntimeError(
'the "words_mismatch" option is available for espeak backend '
'only, but you are using {} backend'.format(backend))
def _phonemize( # pylint: disable=too-many-arguments
backend: BaseBackend,
text: Union[str, List[str]],
separator: Separator,
strip: bool,
njobs: int,
prepend_text: bool,
preserve_empty_lines: bool):
"""Auxiliary function to phonemize()
Does the phonemization and returns the phonemized text. Raises a
RuntimeError on error.
"""
# remember the text type for output (either list or string)
text_type = type(text)
# force the text as a list
text = [line.strip(os.linesep) for line in str2list(text)]
# if preserving empty lines, note the index of each empty line
if preserve_empty_lines:
empty_lines = [n for n, line in enumerate(text) if not line.strip()]
# ignore empty lines
text = [line for line in text if line.strip()]
if (text):
# phonemize the text
phonemized = backend.phonemize(
text, separator=separator, strip=strip, njobs=njobs)
else:
phonemized = []
# if preserving empty lines, reinsert them into text and phonemized lists
if preserve_empty_lines:
for i in empty_lines: # noqa
if prepend_text:
text.insert(i, '')
phonemized.insert(i, '')
# at that point, the phonemized text is a list of str. Format it as
# expected by the parameters
if prepend_text:
return list(zip(text, phonemized))
if text_type == str:
return list2str(phonemized)
return phonemized