2025-12-01
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonologizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonologizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Multilingual text to phones converter"""
|
||||
|
||||
from .phonemize import phonemize # pylint: disable=unused-import
|
||||
|
||||
|
||||
__version__ = '3.3.0'
|
||||
"""Phonemizer version"""
|
||||
@@ -0,0 +1,27 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonologizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonologizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Multilingual text to phonemes converter"""
|
||||
|
||||
# pylint: disable=unused-import
|
||||
|
||||
from .espeak.espeak import EspeakBackend
|
||||
from .espeak.mbrola import EspeakMbrolaBackend
|
||||
from .festival.festival import FestivalBackend
|
||||
from .segments import SegmentsBackend
|
||||
|
||||
|
||||
BACKENDS = {b.name(): b for b in (
|
||||
EspeakBackend, FestivalBackend, SegmentsBackend, EspeakMbrolaBackend)}
|
||||
"""The different phonemization backends as a mapping (name, class)"""
|
||||
@@ -0,0 +1,255 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Abstract base class for phonemization backends"""
|
||||
|
||||
import abc
|
||||
import itertools
|
||||
import re
|
||||
from logging import Logger
|
||||
from typing import Optional, List, Any, Dict, Tuple, Union, Pattern
|
||||
|
||||
import joblib
|
||||
|
||||
from phonemizer.logger import get_logger
|
||||
from phonemizer.punctuation import Punctuation
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
from phonemizer.utils import chunks
|
||||
|
||||
|
||||
class BaseBackend(abc.ABC):
|
||||
"""Abstract base class of all the phonemization backends
|
||||
|
||||
Provides a common interface to all backends. The central method is
|
||||
`phonemize()`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
language: str
|
||||
The language code of the input text, must be supported by
|
||||
the backend. If ``backend`` is 'segments', the language can be a file with
|
||||
a grapheme to phoneme mapping.
|
||||
|
||||
preserve_punctuation: bool
|
||||
When True, will keep the punctuation in the
|
||||
phonemized output. Not supported by the 'espeak-mbrola' backend. Default
|
||||
to False and remove all the punctuation.
|
||||
|
||||
punctuation_marks: str
|
||||
The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
|
||||
Can be defined as a string or regular expression. Default to Punctuation.default_marks().
|
||||
|
||||
logger: logging.Logger
|
||||
the logging instance where to send
|
||||
messages. If not specified, use the default system logger.
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
if the backend is not available of if the `language` cannot be initialized.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, language: str,
|
||||
punctuation_marks: Optional[Union[str, Pattern]] = None,
|
||||
preserve_punctuation: bool = False,
|
||||
logger: Optional[Logger] = None):
|
||||
|
||||
if punctuation_marks is None:
|
||||
punctuation_marks = Punctuation.default_marks()
|
||||
|
||||
if logger is None:
|
||||
logger = get_logger()
|
||||
|
||||
# ensure the backend is installed on the system
|
||||
if not self.is_available():
|
||||
raise RuntimeError( # pragma: nocover
|
||||
'{} not installed on your system'.format(self.name()))
|
||||
|
||||
self._logger = logger
|
||||
self._logger.info(
|
||||
'initializing backend %s-%s',
|
||||
self.name(), '.'.join(str(v) for v in self.version()))
|
||||
|
||||
# ensure the backend support the requested language
|
||||
self._language = self._init_language(language)
|
||||
|
||||
# setup punctuation processing
|
||||
self._preserve_punctuation = preserve_punctuation
|
||||
self._punctuator = Punctuation(punctuation_marks)
|
||||
|
||||
@classmethod
|
||||
def _init_language(cls, language):
|
||||
"""Language initialization
|
||||
|
||||
This method may be overloaded in child classes (see Segments backend)
|
||||
|
||||
"""
|
||||
if not cls.is_supported_language(language):
|
||||
raise RuntimeError(
|
||||
f'language "{language}" is not supported by the '
|
||||
f'{cls.name()} backend')
|
||||
return language
|
||||
|
||||
@property
|
||||
def logger(self):
|
||||
"""A logging.Logger instance where to send messages"""
|
||||
return self._logger
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
"""The language code configured to be used for phonemization"""
|
||||
return self._language
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def name():
|
||||
"""The name of the backend"""
|
||||
|
||||
@classmethod
|
||||
@abc.abstractmethod
|
||||
def is_available(cls):
|
||||
"""Returns True if the backend is installed, False otherwise"""
|
||||
|
||||
@classmethod
|
||||
@abc.abstractmethod
|
||||
def version(cls):
|
||||
"""Return the backend version as a tuple (major, minor, patch)"""
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def supported_languages() -> Dict[str, str]:
|
||||
"""Return a dict of language codes -> name supported by the backend"""
|
||||
|
||||
@classmethod
|
||||
def is_supported_language(cls, language: str):
|
||||
"""Returns True if `language` is supported by the backend"""
|
||||
return language in cls.supported_languages()
|
||||
|
||||
def phonemize(self, text: List[str],
|
||||
separator: Optional[Separator] = None,
|
||||
strip: bool = False,
|
||||
njobs: int = 1) -> List[str]:
|
||||
"""Returns the `text` phonemized for the given language
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text: list of str
|
||||
The text to be phonemized. Each string in the list
|
||||
is considered as a separated line. Each line is considered as a text
|
||||
utterance. Any empty utterance will be ignored.
|
||||
|
||||
separator: Separator
|
||||
string separators between phonemes, syllables
|
||||
and words, default to separator.default_separator. Syllable separator
|
||||
is considered only for the festival backend. Word separator is
|
||||
ignored by the 'espeak-mbrola' backend.
|
||||
|
||||
strip: bool
|
||||
If True, don't output the last word and phone separators
|
||||
of a token, default to False.
|
||||
|
||||
njobs : int
|
||||
The number of parallel jobs to launch. The input text is
|
||||
split in ``njobs`` parts, phonemized on parallel instances of the
|
||||
backend and the outputs are finally collapsed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
phonemized text: list of str
|
||||
The input ``text`` phonemized for the given ``language`` and ``backend``.
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
if something went wrong during the phonemization
|
||||
|
||||
"""
|
||||
if isinstance(text, str):
|
||||
# changed in phonemizer-3.0, warn the user
|
||||
raise RuntimeError(
|
||||
'input text to phonemize() is str but it must be list of str')
|
||||
|
||||
if separator is None:
|
||||
separator = default_separator
|
||||
|
||||
text, punctuation_marks = self._phonemize_preprocess(text)
|
||||
|
||||
if njobs == 1:
|
||||
# phonemize the text forced as a string
|
||||
phonemized = self._phonemize_aux(text, 0, separator, strip)
|
||||
else:
|
||||
# If using parallel jobs, disable the log as stderr is not
|
||||
# picklable.
|
||||
self.logger.info('running %s on %s jobs', self.name(), njobs)
|
||||
|
||||
# we have here a list of phonemized chunks
|
||||
phonemized = joblib.Parallel(n_jobs=njobs)(
|
||||
joblib.delayed(self._phonemize_aux)(
|
||||
# chunk[0] is the text, chunk[1] is the offset
|
||||
chunk[0], chunk[1], separator, strip)
|
||||
for chunk in zip(*chunks(text, njobs)))
|
||||
|
||||
# flatten them in a single list
|
||||
phonemized = self._flatten(phonemized)
|
||||
|
||||
return self._phonemize_postprocess(phonemized, punctuation_marks, separator, strip)
|
||||
|
||||
@staticmethod
|
||||
def _flatten(phonemized: List[List[Any]]):
|
||||
"""Flatten a list of lists into a single one
|
||||
|
||||
From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
|
||||
format the output as obtained using multiple jobs.
|
||||
|
||||
"""
|
||||
return list(itertools.chain(*phonemized))
|
||||
|
||||
@abc.abstractmethod
|
||||
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
|
||||
"""The "concrete" phonemization method
|
||||
|
||||
Must be implemented in child classes. `separator` and `strip`
|
||||
parameters are as given to the phonemize() method. `text` is as
|
||||
returned by _phonemize_preprocess(). `offset` is line number of the
|
||||
first line in `text` with respect to the original text (this is only
|
||||
usefull with running on chunks in multiple jobs. When using a single
|
||||
jobs the offset is 0).
|
||||
|
||||
"""
|
||||
|
||||
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
|
||||
"""Preprocess the text before phonemization
|
||||
|
||||
Removes the punctuation (keep trace of punctuation marks for further
|
||||
restoration if required by the `preserve_punctuation` option).
|
||||
|
||||
"""
|
||||
if self._preserve_punctuation:
|
||||
# a tuple (text, punctuation marks)
|
||||
return self._punctuator.preserve(text)
|
||||
return self._punctuator.remove(text), []
|
||||
|
||||
def _phonemize_postprocess(self, phonemized: List[str],
|
||||
punctuation_marks,
|
||||
separator: Separator,
|
||||
strip: bool):
|
||||
"""Postprocess the raw phonemized output
|
||||
|
||||
Restores the punctuation as needed.
|
||||
|
||||
"""
|
||||
if self._preserve_punctuation:
|
||||
return self._punctuator.restore(phonemized, punctuation_marks, separator, strip)
|
||||
return phonemized
|
||||
@@ -0,0 +1,15 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonologizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonologizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Phonemizer module for espeak backend implementation"""
|
||||
@@ -0,0 +1,275 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Low-level bindings to the espeak API"""
|
||||
|
||||
import atexit
|
||||
import ctypes
|
||||
import pathlib
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
import weakref
|
||||
from ctypes import CDLL
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
from phonemizer.backend.espeak.voice import EspeakVoice
|
||||
|
||||
if sys.platform != 'win32':
|
||||
# cause a crash on Windows
|
||||
import dlinfo
|
||||
|
||||
|
||||
class EspeakAPI:
|
||||
"""Exposes the espeak API to the EspeakWrapper
|
||||
|
||||
This class exposes only low-level bindings to the API and should not be
|
||||
used directly.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, library: Union[str, Path]):
|
||||
# set to None to avoid an AttributeError in _delete if the __init__
|
||||
# method raises, will be properly initialized below
|
||||
self._library = None
|
||||
|
||||
# Because the library is not designed to be wrapped nor to be used in
|
||||
# multithreaded/multiprocess contexts (massive use of global variables)
|
||||
# we need a copy of the original library for each instance of the
|
||||
# wrapper... (see "man dlopen" on Linux/MacOS: we cannot load two times
|
||||
# the same library because a reference is then returned by dlopen). The
|
||||
# tweak is therefore to make a copy of the original library in a
|
||||
# different (temporary) directory.
|
||||
try:
|
||||
# load the original library in order to retrieve its full path?
|
||||
# Forced as str as it is required on Windows.
|
||||
espeak: CDLL = ctypes.cdll.LoadLibrary(str(library))
|
||||
library_path = self._shared_library_path(espeak)
|
||||
del espeak
|
||||
except OSError as error:
|
||||
raise RuntimeError(
|
||||
f'failed to load espeak library: {str(error)}') from None
|
||||
|
||||
# will be automatically destroyed after use
|
||||
self._tempdir = tempfile.mkdtemp()
|
||||
|
||||
# properly exit when the wrapper object is destroyed (see
|
||||
# https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods).
|
||||
# But... weakref implementation does not work on windows so we register
|
||||
# the cleanup with atexit. This means that, on Windows, all the
|
||||
# temporary directories created by EspeakAPI instances will remain on
|
||||
# disk until the Python process exit.
|
||||
if sys.platform == 'win32': # pragma: nocover
|
||||
atexit.register(self._delete_win32)
|
||||
else:
|
||||
weakref.finalize(self, self._delete, self._library, self._tempdir)
|
||||
|
||||
espeak_copy = pathlib.Path(self._tempdir) / library_path.name
|
||||
shutil.copy(library_path, espeak_copy, follow_symlinks=False)
|
||||
|
||||
# finally load the library copy and initialize it. 0x02 is
|
||||
# AUDIO_OUTPUT_SYNCHRONOUS in the espeak API
|
||||
self._library = ctypes.cdll.LoadLibrary(str(espeak_copy))
|
||||
try:
|
||||
if self._library.espeak_Initialize(0x02, 0, None, 0) <= 0:
|
||||
raise RuntimeError( # pragma: nocover
|
||||
'failed to initialize espeak shared library')
|
||||
except AttributeError: # pragma: nocover
|
||||
raise RuntimeError(
|
||||
'failed to load espeak library') from None
|
||||
|
||||
# the path to the original one (the copy is considered an
|
||||
# implementation detail and is not exposed)
|
||||
self._library_path = library_path
|
||||
|
||||
def _delete_win32(self): # pragma: nocover
|
||||
# Windows does not support static methods with ctypes libraries
|
||||
# (library == None) so we use a proxy method...
|
||||
self._delete(self._library, self._tempdir)
|
||||
|
||||
@staticmethod
|
||||
def _delete(library, tempdir):
|
||||
try:
|
||||
# clean up the espeak library allocated memory
|
||||
library.espeak_Terminate()
|
||||
except AttributeError: # library not loaded
|
||||
pass
|
||||
|
||||
# on Windows it is required to unload the library or the .dll file
|
||||
# cannot be erased from the temporary directory
|
||||
if sys.platform == 'win32': # pragma: nocover
|
||||
# pylint: disable=import-outside-toplevel
|
||||
# pylint: disable=protected-access
|
||||
# pylint: disable=no-member
|
||||
import _ctypes
|
||||
_ctypes.FreeLibrary(library._handle)
|
||||
|
||||
# clean up the tempdir containing the copy of the library
|
||||
shutil.rmtree(tempdir)
|
||||
|
||||
@property
|
||||
def library_path(self):
|
||||
"""Absolute path to the espeak library being in use"""
|
||||
return self._library_path
|
||||
|
||||
@staticmethod
|
||||
def _shared_library_path(library) -> Path:
|
||||
"""Returns the absolute path to `library`
|
||||
|
||||
This function is cross-platform and works for Linux, MacOS and Windows.
|
||||
Raises a RuntimeError if the library path cannot be retrieved
|
||||
|
||||
"""
|
||||
# pylint: disable=protected-access
|
||||
path = pathlib.Path(library._name).resolve()
|
||||
if path.is_file():
|
||||
return path
|
||||
|
||||
try:
|
||||
# Linux or MacOS only, ImportError on Windows
|
||||
return pathlib.Path(dlinfo.DLInfo(library).path).resolve()
|
||||
except (Exception, ImportError): # pragma: nocover
|
||||
raise RuntimeError(
|
||||
f'failed to retrieve the path to {library} library') from None
|
||||
|
||||
def info(self):
|
||||
"""Bindings to espeak_Info
|
||||
|
||||
Returns
|
||||
-------
|
||||
version, data_path: encoded strings containing the espeak version
|
||||
number and data path respectively
|
||||
|
||||
"""
|
||||
f_info = self._library.espeak_Info
|
||||
f_info.restype = ctypes.c_char_p
|
||||
data_path = ctypes.c_char_p()
|
||||
version = f_info(ctypes.byref(data_path))
|
||||
return version, data_path.value
|
||||
|
||||
def list_voices(self, name):
|
||||
"""Bindings to espeak_ListVoices
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name (str or None): if specified, a filter on voices to be listed
|
||||
|
||||
Returns
|
||||
-------
|
||||
voices: a pointer to EspeakVoice.Struct instances
|
||||
|
||||
"""
|
||||
f_list_voices = self._library.espeak_ListVoices
|
||||
f_list_voices.argtypes = [ctypes.POINTER(EspeakVoice.VoiceStruct)]
|
||||
f_list_voices.restype = ctypes.POINTER(
|
||||
ctypes.POINTER(EspeakVoice.VoiceStruct))
|
||||
return f_list_voices(name)
|
||||
|
||||
def set_voice_by_name(self, name) -> int:
|
||||
"""Bindings to espeak_SetVoiceByName
|
||||
|
||||
Parameters
|
||||
----------
|
||||
name (str) : the voice name to setup
|
||||
|
||||
Returns
|
||||
-------
|
||||
0 on success, non-zero integer on failure
|
||||
|
||||
"""
|
||||
f_set_voice_by_name = self._library.espeak_SetVoiceByName
|
||||
f_set_voice_by_name.argtypes = [ctypes.c_char_p]
|
||||
return f_set_voice_by_name(name)
|
||||
|
||||
def get_current_voice(self):
|
||||
"""Bindings to espeak_GetCurrentVoice
|
||||
|
||||
Returns
|
||||
-------
|
||||
a EspeakVoice.Struct instance or None if no voice has been setup
|
||||
|
||||
"""
|
||||
f_get_current_voice = self._library.espeak_GetCurrentVoice
|
||||
f_get_current_voice.restype = ctypes.POINTER(EspeakVoice.VoiceStruct)
|
||||
return f_get_current_voice().contents
|
||||
|
||||
def text_to_phonemes(self, text_ptr, text_mode, phonemes_mode):
|
||||
"""Bindings to espeak_TextToPhonemes
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text_ptr (pointer): the text to be phonemized, as a pointer to a
|
||||
pointer of chars
|
||||
text_mode (bits field): see espeak sources for details
|
||||
phonemes_mode (bits field): see espeak sources for details
|
||||
|
||||
Returns
|
||||
-------
|
||||
an encoded string containing the computed phonemes
|
||||
|
||||
"""
|
||||
f_text_to_phonemes = self._library.espeak_TextToPhonemes
|
||||
f_text_to_phonemes.restype = ctypes.c_char_p
|
||||
f_text_to_phonemes.argtypes = [
|
||||
ctypes.POINTER(ctypes.c_char_p),
|
||||
ctypes.c_int,
|
||||
ctypes.c_int]
|
||||
return f_text_to_phonemes(text_ptr, text_mode, phonemes_mode)
|
||||
|
||||
def set_phoneme_trace(self, mode, file_pointer):
|
||||
""""Bindings on espeak_SetPhonemeTrace
|
||||
|
||||
This method must be called before any call to synthetize()
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mode (bits field): see espeak sources for details
|
||||
file_pointer (FILE*): a pointer to an opened file in which to output
|
||||
the phoneme trace
|
||||
|
||||
"""
|
||||
f_set_phoneme_trace = self._library.espeak_SetPhonemeTrace
|
||||
f_set_phoneme_trace.argtypes = [
|
||||
ctypes.c_int,
|
||||
ctypes.c_void_p]
|
||||
f_set_phoneme_trace(mode, file_pointer)
|
||||
|
||||
def synthetize(self, text_ptr, size, mode):
|
||||
"""Bindings on espeak_Synth
|
||||
|
||||
The output phonemes are sent to the file specified by a call to
|
||||
set_phoneme_trace().
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text (pointer) : a pointer to chars
|
||||
size (int) : number of chars in `text`
|
||||
mode (bits field) : see espeak sources for details
|
||||
|
||||
Returns
|
||||
-------
|
||||
0 on success, non-zero integer on failure
|
||||
|
||||
"""
|
||||
f_synthetize = self._library.espeak_Synth
|
||||
f_synthetize.argtypes = [
|
||||
ctypes.c_void_p,
|
||||
ctypes.c_size_t,
|
||||
ctypes.c_uint,
|
||||
ctypes.c_int, # position_type
|
||||
ctypes.c_uint,
|
||||
ctypes.POINTER(ctypes.c_uint),
|
||||
ctypes.c_void_p]
|
||||
return f_synthetize(text_ptr, size, 0, 1, 0, mode, None, None)
|
||||
@@ -0,0 +1,113 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Base class of espeak backends for the phonemizer"""
|
||||
|
||||
import abc
|
||||
from logging import Logger
|
||||
from typing import Optional, Union, Pattern
|
||||
|
||||
from phonemizer.backend.base import BaseBackend
|
||||
from phonemizer.backend.espeak.wrapper import EspeakWrapper
|
||||
from phonemizer.logger import get_logger
|
||||
from phonemizer.punctuation import Punctuation
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
class BaseEspeakBackend(BaseBackend):
|
||||
"""Abstract espeak backend for the phonemizer
|
||||
|
||||
Base class of the concrete backends Espeak and EspeakMbrola. It provides
|
||||
facilities to find espeak library and read espeak version.
|
||||
|
||||
"""
|
||||
def __init__(self, language: str,
|
||||
punctuation_marks: Optional[Union[str, Pattern]] = None,
|
||||
preserve_punctuation: bool = False,
|
||||
logger: Optional[Logger] = None):
|
||||
super().__init__(
|
||||
language,
|
||||
punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation,
|
||||
logger=logger)
|
||||
|
||||
self._espeak = EspeakWrapper()
|
||||
self.logger.debug('loaded %s', self._espeak.library_path)
|
||||
|
||||
|
||||
@classmethod
|
||||
def set_library(cls, library):
|
||||
"""Sets the espeak backend to use `library`
|
||||
|
||||
If this is not set, the backend uses the default espeak shared library
|
||||
from the system installation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
library (str or None) : the path to the espeak shared library to use as
|
||||
backend. Set `library` to None to restore the default.
|
||||
|
||||
"""
|
||||
EspeakWrapper.set_library(library)
|
||||
|
||||
@classmethod
|
||||
def library(cls):
|
||||
"""Returns the espeak library used as backend
|
||||
|
||||
The following precedence rule applies for library lookup:
|
||||
|
||||
1. As specified by BaseEspeakBackend.set_library()
|
||||
2. Or as specified by the environment variable
|
||||
PHONEMIZER_ESPEAK_LIBRARY
|
||||
3. Or the default espeak library found on the system
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if the espeak library cannot be found or if the
|
||||
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
|
||||
non-readable file
|
||||
|
||||
"""
|
||||
return EspeakWrapper.library()
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
try:
|
||||
EspeakWrapper()
|
||||
except RuntimeError: # pragma: nocover
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def is_espeak_ng(cls) -> bool:
|
||||
"""Returns True if using espeak-ng, False otherwise"""
|
||||
# espeak-ng starts with version 1.49
|
||||
return cls.version() >= (1, 49)
|
||||
|
||||
@classmethod
|
||||
def version(cls):
|
||||
"""Espeak version as a tuple (major, minor, patch)
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if BaseEspeakBackend.is_available() is False or if the
|
||||
version cannot be extracted for some reason.
|
||||
|
||||
"""
|
||||
return EspeakWrapper().version
|
||||
|
||||
@abc.abstractmethod
|
||||
def _postprocess_line(self, line: str, num: int,
|
||||
separator: Separator, strip: bool) -> str:
|
||||
pass
|
||||
@@ -0,0 +1,172 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Espeak backend for the phonemizer"""
|
||||
|
||||
import itertools
|
||||
import re
|
||||
from logging import Logger
|
||||
from typing import Optional, Tuple, List, Union, Pattern
|
||||
|
||||
from phonemizer.backend.espeak.base import BaseEspeakBackend
|
||||
from phonemizer.backend.espeak.language_switch import (
|
||||
get_language_switch_processor, LanguageSwitch, BaseLanguageSwitch)
|
||||
from phonemizer.backend.espeak.words_mismatch import (
|
||||
get_words_mismatch_processor, WordMismatch, BaseWordsMismatch)
|
||||
from phonemizer.backend.espeak.wrapper import EspeakWrapper
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
class EspeakBackend(BaseEspeakBackend):
|
||||
"""Espeak backend for the phonemizer"""
|
||||
# a regular expression to find phonemes stresses in espeak output
|
||||
_ESPEAK_STRESS_RE = re.compile(r"[ˈˌ'-]+")
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
def __init__(self, language: str,
|
||||
punctuation_marks: Optional[Union[str, Pattern]] = None,
|
||||
preserve_punctuation: bool = False,
|
||||
with_stress: bool = False,
|
||||
tie: Union[bool, str] = False,
|
||||
language_switch: LanguageSwitch = 'keep-flags',
|
||||
words_mismatch: WordMismatch = 'ignore',
|
||||
logger: Optional[Logger] = None):
|
||||
super().__init__(
|
||||
language, punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation, logger=logger)
|
||||
|
||||
self._espeak.set_voice(language)
|
||||
self._with_stress = with_stress
|
||||
self._tie = self._init_tie(tie)
|
||||
self._lang_switch: BaseLanguageSwitch = get_language_switch_processor(
|
||||
language_switch, self.logger, self.language)
|
||||
self._words_mismatch: BaseWordsMismatch = get_words_mismatch_processor(
|
||||
words_mismatch, self.logger)
|
||||
|
||||
@staticmethod
|
||||
def _init_tie(tie) -> Optional[str]:
|
||||
if not tie:
|
||||
return None
|
||||
|
||||
if tie is True: # default U+361 tie character
|
||||
return '͡'
|
||||
|
||||
# non default tie charcacter
|
||||
tie = str(tie)
|
||||
if len(tie) != 1:
|
||||
raise RuntimeError(
|
||||
f'explicit tie must be a single charcacter but is {tie}')
|
||||
return tie
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return 'espeak'
|
||||
|
||||
@classmethod
|
||||
def supported_languages(cls):
|
||||
return {
|
||||
voice.language: voice.name
|
||||
for voice in EspeakWrapper().available_voices()}
|
||||
|
||||
def _phonemize_aux(self, text, offset, separator, strip):
|
||||
if self._tie is not None and separator.phone:
|
||||
self.logger.warning(
|
||||
'cannot use ties AND phone separation, '
|
||||
'ignoring phone separator')
|
||||
|
||||
output = []
|
||||
lang_switches = []
|
||||
for num, line in enumerate(text, start=1):
|
||||
line = self._espeak.text_to_phonemes(line, self._tie)
|
||||
line, has_switch = self._postprocess_line(
|
||||
line, num, separator, strip)
|
||||
output.append(line)
|
||||
if has_switch:
|
||||
lang_switches.append(num + offset)
|
||||
|
||||
return output, lang_switches
|
||||
|
||||
def _process_stress(self, word):
|
||||
if self._with_stress:
|
||||
return word
|
||||
# remove the stresses on phonemes
|
||||
return re.sub(self._ESPEAK_STRESS_RE, '', word)
|
||||
|
||||
def _process_tie(self, word: str, separator: Separator):
|
||||
# NOTE a bug in espeak append ties to (en) flags so as (͡e͡n).
|
||||
# We do not correct it here.
|
||||
if self._tie is not None and self._tie != '͡':
|
||||
# replace default '͡' by the requested one
|
||||
return word.replace('͡', self._tie)
|
||||
return word.replace('_', separator.phone)
|
||||
|
||||
def _postprocess_line(self, line: str, num: int,
|
||||
separator: Separator, strip: bool) -> Tuple[str, bool]:
|
||||
# espeak can split an utterance into several lines because
|
||||
# of punctuation, here we merge the lines into a single one
|
||||
line = line.strip().replace('\n', ' ').replace(' ', ' ')
|
||||
|
||||
# due to a bug in espeak-ng, some additional separators can be
|
||||
# added at the end of a word. Here a quick fix to solve that
|
||||
# issue. See https://github.com/espeak-ng/espeak-ng/issues/694
|
||||
line = re.sub(r'_+', '_', line)
|
||||
line = re.sub(r'_ ', ' ', line)
|
||||
|
||||
line, has_switch = self._lang_switch.process(line)
|
||||
if not line:
|
||||
return '', has_switch
|
||||
|
||||
out_line = ''
|
||||
for word in line.split(' '):
|
||||
word = self._process_stress(word.strip())
|
||||
if not strip and self._tie is None:
|
||||
word += '_'
|
||||
word = self._process_tie(word, separator)
|
||||
out_line += word + separator.word
|
||||
|
||||
if strip and separator.word:
|
||||
# erase the last word separator from the line
|
||||
out_line = out_line[:-len(separator.word)]
|
||||
|
||||
return out_line, has_switch
|
||||
|
||||
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
|
||||
text, punctuation_marks = super()._phonemize_preprocess(text)
|
||||
self._words_mismatch.count_text(text)
|
||||
return text, punctuation_marks
|
||||
|
||||
def _phonemize_postprocess(self, phonemized, punctuation_marks, separator: Separator, strip: bool):
|
||||
text = phonemized[0]
|
||||
switches = phonemized[1]
|
||||
|
||||
self._words_mismatch.count_phonemized(text, separator)
|
||||
self._lang_switch.warning(switches)
|
||||
|
||||
phonemized = super()._phonemize_postprocess(text, punctuation_marks, separator, strip)
|
||||
return self._words_mismatch.process(phonemized)
|
||||
|
||||
@staticmethod
|
||||
def _flatten(phonemized) -> List:
|
||||
"""Specialization of BaseBackend._flatten for the espeak backend
|
||||
|
||||
From [([1, 2], ['a', 'b']), ([3],), ([4], ['c'])] to [[1, 2, 3, 4],
|
||||
['a', 'b', 'c']].
|
||||
|
||||
"""
|
||||
flattened = []
|
||||
for i in range(len(phonemized[0])):
|
||||
flattened.append(
|
||||
list(itertools.chain(
|
||||
c for chunk in phonemized for c in chunk[i])))
|
||||
return flattened
|
||||
+193
@@ -0,0 +1,193 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Manages language switches for the espeak backend
|
||||
|
||||
This module is used in phonemizer.backend.EspeakBackend and should be
|
||||
considered private.
|
||||
|
||||
It manages languages switches that occur during phonemization, where a part of
|
||||
a text is phonemized in a language different from the target language. For
|
||||
instance the sentence "j'aime le football" in French will be phonemized by
|
||||
espeak as "ʒɛm lə (en)fʊtbɔːl(fr)", "football" be pronounced as an English
|
||||
word. This may cause two issues to end users. First it introduces undesirable
|
||||
(.) language switch flags. It may introduce extra phones that are not present
|
||||
in the target language phoneset.
|
||||
|
||||
This module implements 3 alternative solutions the user can choose when
|
||||
initializing the espeak backend:
|
||||
- 'keep-flags' preserves the language switch flags,
|
||||
- 'remove-flags' removes the flags (.) but preserves the words with alternative
|
||||
phoneset,
|
||||
- 'remove-utterance' removes the utterances where flags are detected.
|
||||
|
||||
"""
|
||||
|
||||
import abc
|
||||
import re
|
||||
from logging import Logger
|
||||
from typing import List, Tuple
|
||||
from typing_extensions import TypeAlias, Literal
|
||||
|
||||
LanguageSwitch: TypeAlias = Literal['keep-flags', 'remove-flags', 'remove-utterance']
|
||||
|
||||
|
||||
def get_language_switch_processor(mode: LanguageSwitch, logger: Logger, language: str) -> 'BaseLanguageSwitch':
|
||||
"""Returns a language switch processor initialized from `mode`
|
||||
|
||||
The `mode` can be one of the following:
|
||||
- 'keep-flags' to preserve the switch flags
|
||||
- 'remove-flags' to suppress the switch flags
|
||||
- 'remove-utterance' to suppress the entire utterance
|
||||
|
||||
Raises a RuntimeError if the `mode` is unknown.
|
||||
|
||||
"""
|
||||
processors = {
|
||||
'keep-flags': KeepFlags,
|
||||
'remove-flags': RemoveFlags,
|
||||
'remove-utterance': RemoveUtterances}
|
||||
|
||||
try:
|
||||
return processors[mode](logger, language)
|
||||
except KeyError:
|
||||
raise RuntimeError(
|
||||
f'mode "{mode}" invalid, must be in {", ".join(processors.keys())}'
|
||||
) from None
|
||||
|
||||
|
||||
class BaseLanguageSwitch(abc.ABC):
|
||||
"""The base class for language switch processors
|
||||
|
||||
Parameters
|
||||
----------
|
||||
logger (logging.Logger) : a logger instance to send warnings when language
|
||||
switches are detected.
|
||||
language (str) : the language code currently in use by the phonemizer, to
|
||||
customize warning content
|
||||
|
||||
"""
|
||||
# a regular expression to find language switch flags in espeak output,
|
||||
# Switches have the following form (here a switch from English to French):
|
||||
# "something (fr)quelque chose(en) another thing".
|
||||
_ESPEAK_FLAGS_RE = re.compile(r'\(.+?\)')
|
||||
|
||||
def __init__(self, logger: Logger, language: str):
|
||||
self._logger = logger
|
||||
self._language = language
|
||||
|
||||
@classmethod
|
||||
def is_language_switch(cls, utterance: str) -> bool:
|
||||
"""Returns True is a language switch is present in the `utterance`"""
|
||||
return bool(cls._ESPEAK_FLAGS_RE.search(utterance))
|
||||
|
||||
@classmethod
|
||||
@abc.abstractmethod
|
||||
def process(cls, utterance: str) -> Tuple[str, bool]:
|
||||
"""Detects and process language switches according to the mode
|
||||
|
||||
This method is called on each utterance as a phonemization
|
||||
post-processing step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
processed_utterance (str) : the utterance either preserved, deleted (as
|
||||
'') or with the switch removed
|
||||
has_switch (bool): True if a language switch flag is found in the
|
||||
`utterance` and False otherwise
|
||||
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def warning(self, switches: List[int]):
|
||||
"""Sends warnings to the logger with recorded language switches
|
||||
|
||||
This method is called a single time at the very end of the
|
||||
phonemization process.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
switches (list of int) : the line numbers where language switches has
|
||||
been detected during phonemization
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class KeepFlags(BaseLanguageSwitch):
|
||||
"""Preserves utterances even if language switch flags are present"""
|
||||
|
||||
@classmethod
|
||||
def process(cls, utterance: str) -> Tuple[str, bool]:
|
||||
return utterance, cls.is_language_switch(utterance)
|
||||
|
||||
def warning(self, switches: List[int]):
|
||||
if not switches:
|
||||
return
|
||||
|
||||
nswitches = len(switches)
|
||||
self._logger.warning(
|
||||
'%s utterances containing language switches '
|
||||
'on lines %s', nswitches,
|
||||
', '.join(str(switch) for switch in sorted(switches)))
|
||||
self._logger.warning(
|
||||
'extra phones may appear in the "%s" phoneset', self._language)
|
||||
self._logger.warning(
|
||||
'language switch flags have been kept '
|
||||
'(applying "keep-flags" policy)')
|
||||
|
||||
|
||||
class RemoveFlags(BaseLanguageSwitch):
|
||||
"""Removes the language switch flags when detected"""
|
||||
|
||||
@classmethod
|
||||
def process(cls, utterance: str) -> Tuple[str, bool]:
|
||||
if cls.is_language_switch(utterance):
|
||||
# remove all the (lang) flags in the current utterance
|
||||
return re.sub(cls._ESPEAK_FLAGS_RE, '', utterance), True
|
||||
return utterance, False
|
||||
|
||||
def warning(self, switches: List[int]):
|
||||
if not switches:
|
||||
return
|
||||
|
||||
nswitches = len(switches)
|
||||
self._logger.warning(
|
||||
'%s utterances containing language switches '
|
||||
'on lines %s', nswitches,
|
||||
', '.join(str(switch) for switch in sorted(switches)))
|
||||
self._logger.warning(
|
||||
'extra phones may appear in the "%s" phoneset', self._language)
|
||||
self._logger.warning(
|
||||
'language switch flags have been removed '
|
||||
'(applying "remove-flags" policy)')
|
||||
|
||||
|
||||
class RemoveUtterances(BaseLanguageSwitch):
|
||||
"""Remove the entire utterance when a language switch flag is detected"""
|
||||
|
||||
@classmethod
|
||||
def process(cls, utterance: str) -> Tuple[str, bool]:
|
||||
if cls.is_language_switch(utterance):
|
||||
# drop the entire utterance
|
||||
return '', True
|
||||
return utterance, False
|
||||
|
||||
def warning(self, switches: List[int]):
|
||||
if not switches:
|
||||
return
|
||||
|
||||
nswitches = len(switches)
|
||||
self._logger.warning(
|
||||
'removed %s utterances containing language switches '
|
||||
'(applying "remove-utterance" policy)', nswitches)
|
||||
@@ -0,0 +1,108 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Mbrola backend for the phonemizer"""
|
||||
|
||||
import pathlib
|
||||
import shutil
|
||||
import sys
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
from typing import Union, Optional, List, Dict
|
||||
|
||||
from phonemizer.backend.espeak.base import BaseEspeakBackend
|
||||
from phonemizer.backend.espeak.wrapper import EspeakWrapper
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
class EspeakMbrolaBackend(BaseEspeakBackend):
|
||||
"""Espeak-mbrola backend for the phonemizer"""
|
||||
# this will be initialized once, at the first call to supported_languages()
|
||||
_supported_languages = None
|
||||
|
||||
def __init__(self, language: str, logger: Optional[Logger] = None):
|
||||
super().__init__(language, logger=logger)
|
||||
self._espeak.set_voice(language)
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return 'espeak-mbrola'
|
||||
|
||||
@classmethod
|
||||
def is_available(cls) -> bool:
|
||||
"""Mbrola backend is available for espeak>=1.49"""
|
||||
return (
|
||||
BaseEspeakBackend.is_available() and
|
||||
shutil.which('mbrola') and
|
||||
BaseEspeakBackend.is_espeak_ng())
|
||||
|
||||
@classmethod
|
||||
def _all_supported_languages(cls):
|
||||
# retrieve the mbrola voices. This voices must be installed separately.
|
||||
voices = EspeakWrapper().available_voices('mbrola')
|
||||
return {voice.identifier[3:]: voice.name for voice in voices}
|
||||
|
||||
@classmethod
|
||||
def _is_language_installed(cls, language: str, data_path: Union[str, Path]) \
|
||||
-> bool:
|
||||
"""Returns True if the required mbrola voice is installed"""
|
||||
# this is a reimplementation of LoadMbrolaTable from espeak
|
||||
# synth_mbrola.h sources
|
||||
voice = language[3:] # remove mb- prefix
|
||||
|
||||
if pathlib.Path(data_path / 'mbrola' / voice).is_file():
|
||||
return True # pragma: nocover
|
||||
|
||||
if sys.platform != 'win32':
|
||||
candidates = [
|
||||
f'/usr/share/mbrola/{voice}',
|
||||
f'/usr/share/mbrola/{voice}/{voice}',
|
||||
f'/usr/share/mbrola/voices/{voice}']
|
||||
for candidate in candidates:
|
||||
if pathlib.Path(candidate).is_file():
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def supported_languages(cls) -> Dict[str, str]: # pragma: nocover
|
||||
"""Returns the list of installed mbrola voices"""
|
||||
if cls._supported_languages is None:
|
||||
data_path = EspeakWrapper().data_path
|
||||
cls._supported_languages = {
|
||||
k: v for k, v in cls._all_supported_languages().items()
|
||||
if cls._is_language_installed(k, data_path)}
|
||||
return cls._supported_languages
|
||||
|
||||
def _phonemize_aux(self, text: List[str], offset: int,
|
||||
separator: Separator, strip: bool) -> List[str]:
|
||||
output = []
|
||||
for num, line in enumerate(text, start=1):
|
||||
line = self._espeak.synthetize(line)
|
||||
line = self._postprocess_line(line, offset + num, separator, strip)
|
||||
output.append(line)
|
||||
return output
|
||||
|
||||
def _postprocess_line(self, line: str, num: int,
|
||||
separator: Separator, strip: bool) -> str:
|
||||
# retrieve the phonemes with the correct SAMPA alphabet (but
|
||||
# without word separation)
|
||||
phonemes = (
|
||||
phn.split('\t')[0] for phn in line.split('\n') if phn.strip())
|
||||
phonemes = separator.phone.join(pho for pho in phonemes if pho != '_')
|
||||
|
||||
if not strip:
|
||||
phonemes += separator.phone
|
||||
|
||||
return phonemes
|
||||
@@ -0,0 +1,81 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Voice struct from Espeak API exposed to Python"""
|
||||
|
||||
import ctypes
|
||||
|
||||
|
||||
# This class can be a dataclass for compatibility with python-3.6 we don't use
|
||||
# the dataclasses module.
|
||||
class EspeakVoice:
|
||||
"""A helper class to expose voice structures within C and Python"""
|
||||
|
||||
def __init__(self, name: str = '', language: str = '', identifier: str = ''):
|
||||
self._name = name
|
||||
self._language = language
|
||||
self._identifier = identifier
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""Voice name"""
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
"""Language code"""
|
||||
return self._language
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
"""Path to the voice file wrt espeak data path"""
|
||||
return self._identifier
|
||||
|
||||
def __eq__(self, other: 'EspeakVoice'):
|
||||
return (
|
||||
self.name == other.name and
|
||||
self.language == other.language and
|
||||
self.identifier == other.identifier)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.name, self.language, self.identifier))
|
||||
|
||||
class VoiceStruct(ctypes.Structure): # pylint: disable=too-few-public-methods
|
||||
"""A helper class to fetch voices information from the espeak library.
|
||||
|
||||
The espeak_VOICE struct is defined in speak_lib.h from the espeak code.
|
||||
Here we use only name (voice name), languages (language code) and
|
||||
identifier (voice file) information.
|
||||
|
||||
"""
|
||||
_fields_ = [
|
||||
('name', ctypes.c_char_p),
|
||||
('languages', ctypes.c_char_p),
|
||||
('identifier', ctypes.c_char_p)]
|
||||
|
||||
def to_ctypes(self):
|
||||
"""Converts the Voice instance to an espeak ctypes structure"""
|
||||
return self.VoiceStruct(
|
||||
self.name.encode('utf8') if self.name else None,
|
||||
self.language.encode('utf8') if self.language else None,
|
||||
self.identifier.encode('utf8') if self.identifier else None)
|
||||
|
||||
@classmethod
|
||||
def from_ctypes(cls, struct: VoiceStruct):
|
||||
"""Returns a Voice instance built from an espeak ctypes structure"""
|
||||
return cls(
|
||||
name=(struct.name or b'').decode(),
|
||||
# discard a useless char prepended by espeak
|
||||
language=(struct.languages or b'0').decode()[1:],
|
||||
identifier=(struct.identifier or b'').decode())
|
||||
+152
@@ -0,0 +1,152 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Manages words count mismatches for the espeak backend"""
|
||||
|
||||
import abc
|
||||
import re
|
||||
from logging import Logger
|
||||
from typing import List, Tuple
|
||||
|
||||
from typing_extensions import TypeAlias, Literal, Union
|
||||
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
WordMismatch: TypeAlias = Literal["warn", "ignore"]
|
||||
|
||||
|
||||
def get_words_mismatch_processor(mode: WordMismatch, logger: Logger) -> 'BaseWordsMismatch':
|
||||
"""Returns a word count mismatch processor according to `mode`
|
||||
|
||||
The `mode` can be one of the following:
|
||||
- `ignore` to ignore words mismatches
|
||||
- `warn` to display a warning on each mismatched utterance
|
||||
- `remove` to remove any utterance containing a words mismatch
|
||||
|
||||
Raises a RuntimeError if the `mode` is unknown.
|
||||
|
||||
"""
|
||||
processors = {
|
||||
'ignore': Ignore,
|
||||
'warn': Warn,
|
||||
'remove': Remove}
|
||||
|
||||
try:
|
||||
return processors[mode](logger)
|
||||
except KeyError:
|
||||
raise RuntimeError(
|
||||
f'mode {mode} invalid, must be in {", ".join(processors.keys())}'
|
||||
) from None
|
||||
|
||||
|
||||
class BaseWordsMismatch(abc.ABC):
|
||||
"""The base class of all word count mismatch processors"""
|
||||
_RE_SPACES = re.compile(r'\s+')
|
||||
|
||||
def __init__(self, logger: Logger):
|
||||
self._logger = logger
|
||||
self._count_txt = []
|
||||
self._count_phn = []
|
||||
|
||||
@classmethod
|
||||
def _count_words(
|
||||
cls,
|
||||
text: List[str],
|
||||
wordsep: Union[str, re.Pattern] = _RE_SPACES) -> List[int]:
|
||||
"""Return the number of words contained in each line of `text`"""
|
||||
if not isinstance(wordsep, re.Pattern):
|
||||
wordsep = re.escape(wordsep)
|
||||
|
||||
return [
|
||||
len([w for w in re.split(wordsep, line.strip()) if w])
|
||||
for line in text]
|
||||
|
||||
def _mismatched_lines(self) -> List[Tuple[int, int, int]]:
|
||||
"""Returns a list of (num_line, nwords_input, nwords_output)
|
||||
|
||||
Consider only the lines where nwords_input != nwords_output. Raises a
|
||||
RuntimeError if input and output do not have the same number of lines.
|
||||
|
||||
"""
|
||||
if len(self._count_txt) != len(self._count_phn):
|
||||
raise RuntimeError( # pragma: nocover
|
||||
f'number of lines in input and output must be equal, '
|
||||
f'we have: input={len(self._count_txt)}, '
|
||||
f'output={len(self._count_phn)}')
|
||||
|
||||
return [
|
||||
(n, t, p) for n, (t, p) in
|
||||
enumerate(zip(self._count_txt, self._count_phn))
|
||||
if t != p]
|
||||
|
||||
def _resume(self, nmismatch: int, nlines: int):
|
||||
"""Logs a high level undetailed warning"""
|
||||
if nmismatch:
|
||||
self._logger.warning(
|
||||
'words count mismatch on %s%% of the lines (%s/%s)',
|
||||
round(nmismatch / nlines, 2) * 100, nmismatch, nlines)
|
||||
|
||||
def count_text(self, text: List[str]):
|
||||
"""Stores the number of words in each input line"""
|
||||
self._count_txt = self._count_words(text)
|
||||
|
||||
def count_phonemized(self, text: List[str], separator: Separator):
|
||||
"""Stores the number of words in each output line"""
|
||||
self._count_phn = self._count_words(text, separator.word)
|
||||
|
||||
@abc.abstractmethod
|
||||
def process(self, text: List[str]) -> List[str]:
|
||||
"""Detects and process word count misatches according to the mode
|
||||
|
||||
This method is called at the very end of phonemization, during
|
||||
post-processing.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Ignore(BaseWordsMismatch):
|
||||
"""Ignores word count mismatches"""
|
||||
|
||||
def process(self, text: List[str]) -> List[str]:
|
||||
self._resume(len(self._mismatched_lines()), len(text))
|
||||
return text
|
||||
|
||||
|
||||
class Warn(BaseWordsMismatch):
|
||||
"""Warns on every mismatch detected"""
|
||||
|
||||
def process(self, text: List[str]) -> List[str]:
|
||||
mismatch = self._mismatched_lines()
|
||||
for num, ntxt, nphn in mismatch:
|
||||
self._logger.warning(
|
||||
'words count mismatch on line %s '
|
||||
'(expected %s words but get %s)',
|
||||
num + 1, ntxt, nphn)
|
||||
|
||||
self._resume(len(mismatch), len(text))
|
||||
return text
|
||||
|
||||
|
||||
class Remove(BaseWordsMismatch):
|
||||
"""Removes any utterance containing a word count mismatch"""
|
||||
|
||||
def process(self, text: List[str]) -> List[str]:
|
||||
mismatch = [line[0] for line in self._mismatched_lines()]
|
||||
self._resume(len(mismatch), len(text))
|
||||
self._logger.warning('removing the mismatched lines')
|
||||
|
||||
for index in mismatch:
|
||||
text[index] = ''
|
||||
return text
|
||||
@@ -0,0 +1,370 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Wrapper on espeak-ng library"""
|
||||
|
||||
import ctypes
|
||||
import ctypes.util
|
||||
import functools
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import tempfile
|
||||
import weakref
|
||||
from typing import Tuple, Dict
|
||||
|
||||
from phonemizer.backend.espeak.api import EspeakAPI
|
||||
from phonemizer.backend.espeak.voice import EspeakVoice
|
||||
|
||||
|
||||
class EspeakWrapper:
|
||||
"""Wrapper on espeak shared library
|
||||
|
||||
The aim of this wrapper is not to be exhaustive but to encapsulate the
|
||||
espeak functions required for phonemization. It relies on a espeak shared
|
||||
library (*.so on Linux, *.dylib on Mac and *.dll on Windows) that must be
|
||||
installed on the system.
|
||||
|
||||
Use the function `EspeakWrapper.set_library()` before instanciation to
|
||||
customize the library to use.
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if the espeak shared library cannot be loaded
|
||||
|
||||
"""
|
||||
# a static variable used to overload the default espeak library installed
|
||||
# on the system. The user can choose an alternative espeak library with
|
||||
# the method EspeakWrapper.set_library().
|
||||
_ESPEAK_LIBRARY = None
|
||||
|
||||
def __init__(self):
|
||||
# the following attributes are accessed through properties and are
|
||||
# lazily initialized
|
||||
self._version: Tuple[int, ...] = None
|
||||
self._data_path = None
|
||||
self._voice = None
|
||||
|
||||
# load the espeak API
|
||||
self._espeak = EspeakAPI(self.library())
|
||||
|
||||
# lazy loading of attributes only required for the synthetize method
|
||||
self._libc_ = None
|
||||
self._tempfile_ = None
|
||||
|
||||
@property
|
||||
def _libc(self):
|
||||
if self._libc_ is None:
|
||||
self._libc_ = (
|
||||
ctypes.windll.msvcrt if sys.platform == 'win32' else
|
||||
ctypes.cdll.LoadLibrary(ctypes.util.find_library('c')))
|
||||
return self._libc_
|
||||
|
||||
@property
|
||||
def _tempfile(self):
|
||||
if self._tempfile_ is None:
|
||||
# this will automatically removed at exit
|
||||
# pylint: disable=consider-using-with
|
||||
self._tempfile_ = tempfile.NamedTemporaryFile()
|
||||
weakref.finalize(self._tempfile_, self._tempfile_.close)
|
||||
return self._tempfile_
|
||||
|
||||
def __getstate__(self):
|
||||
"""For pickling, when phonemizing on multiple jobs"""
|
||||
return {
|
||||
'version': self._version,
|
||||
'data_path': self._data_path,
|
||||
'voice': self._voice}
|
||||
|
||||
def __setstate__(self, state: Dict):
|
||||
"""For unpickling, when phonemizing on multiple jobs"""
|
||||
self.__init__()
|
||||
self._version = state['version']
|
||||
self._data_path = state['data_path']
|
||||
self._voice = state['voice']
|
||||
if self._voice:
|
||||
if 'mb' in self._voice.identifier: # mbrola voice
|
||||
self.set_voice(self._voice.identifier[3:])
|
||||
else:
|
||||
self.set_voice(self._voice.language)
|
||||
|
||||
@classmethod
|
||||
def set_library(cls, library: str):
|
||||
"""Sets the espeak backend to use `library`
|
||||
|
||||
If this is not set, the backend uses the default espeak shared library
|
||||
from the system installation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
library (str or None) : the path to the espeak shared library to use as
|
||||
backend. Set `library` to None to restore the default.
|
||||
|
||||
"""
|
||||
cls._ESPEAK_LIBRARY = library
|
||||
|
||||
@classmethod
|
||||
def library(cls):
|
||||
"""Returns the espeak library used as backend
|
||||
|
||||
The following precedence rule applies for library lookup:
|
||||
|
||||
1. As specified by BaseEspeakBackend.set_library()
|
||||
2. Or as specified by the environment variable
|
||||
PHONEMIZER_ESPEAK_LIBRARY
|
||||
3. Or the default espeak library found on the system
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if the espeak library cannot be found or if the
|
||||
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
|
||||
non-readable file
|
||||
|
||||
"""
|
||||
if cls._ESPEAK_LIBRARY:
|
||||
return cls._ESPEAK_LIBRARY
|
||||
|
||||
if 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ:
|
||||
library = pathlib.Path(os.environ['PHONEMIZER_ESPEAK_LIBRARY'])
|
||||
if not (library.is_file() and os.access(library, os.R_OK)):
|
||||
raise RuntimeError( # pragma: nocover
|
||||
f'PHONEMIZER_ESPEAK_LIBRARY={library} '
|
||||
f'is not a readable file')
|
||||
return library.resolve()
|
||||
|
||||
library = (
|
||||
ctypes.util.find_library('espeak-ng') or
|
||||
ctypes.util.find_library('espeak'))
|
||||
if not library: # pragma: nocover
|
||||
raise RuntimeError(
|
||||
'failed to find espeak library')
|
||||
return library
|
||||
|
||||
def _fetch_version_and_path(self):
|
||||
"""Initializes version and dapa path from the espeak library"""
|
||||
version, data_path = self._espeak.info()
|
||||
|
||||
# pylint: disable=no-member
|
||||
self._data_path = pathlib.Path(data_path.decode())
|
||||
if not self._data_path.is_dir(): # pragma: nocover
|
||||
raise RuntimeError('failed to retrieve espeak data directory')
|
||||
|
||||
# espeak-1.48 appends the release date to version number, here we
|
||||
# simply ignore it
|
||||
version = version.decode().strip().split(' ')[0].replace('-dev', '')
|
||||
self._version = tuple(int(v) for v in version.split('.'))
|
||||
|
||||
@property
|
||||
def version(self) -> Tuple[int, int, int]:
|
||||
"""The espeak version as a tuple of integers (major, minor, patch)"""
|
||||
if self._version is None:
|
||||
self._fetch_version_and_path()
|
||||
return self._version
|
||||
|
||||
@property
|
||||
def library_path(self):
|
||||
"""The espeak library as a pathlib.Path instance"""
|
||||
return self._espeak.library_path
|
||||
|
||||
@property
|
||||
def data_path(self):
|
||||
"""The espeak data directory as a pathlib.Path instance"""
|
||||
if self._data_path is None:
|
||||
self._fetch_version_and_path()
|
||||
return self._data_path
|
||||
|
||||
@property
|
||||
def voice(self):
|
||||
"""The configured voice as an EspeakVoice instance
|
||||
|
||||
If `set_voice` has not been called, returns None
|
||||
|
||||
"""
|
||||
return self._voice
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def available_voices(self, name=None):
|
||||
"""Voices available for phonemization, as a list of `EspeakVoice`"""
|
||||
if name:
|
||||
name = EspeakVoice(language=name).to_ctypes()
|
||||
voices = self._espeak.list_voices(name or None)
|
||||
|
||||
index = 0
|
||||
available_voices = []
|
||||
# voices is an array to pointers, terminated by None
|
||||
while voices[index]:
|
||||
voice = voices[index].contents
|
||||
available_voices.append(EspeakVoice(
|
||||
name=os.fsdecode(voice.name).replace('_', ' '),
|
||||
language=os.fsdecode(voice.languages)[1:],
|
||||
identifier=os.fsdecode(voice.identifier)))
|
||||
index += 1
|
||||
return available_voices
|
||||
|
||||
def set_voice(self, voice_code):
|
||||
"""Setup the voice to use for phonemization
|
||||
|
||||
Parameters
|
||||
----------
|
||||
voice_code (str) : Must be a valid language code that is actually
|
||||
supported by espeak
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if the required voice cannot be initialized
|
||||
|
||||
"""
|
||||
if 'mb' in voice_code:
|
||||
# this is an mbrola voice code. Select the voice by using
|
||||
# identifier in the format 'mb/{voice_code}'
|
||||
available = {
|
||||
voice.identifier[3:]: voice.identifier
|
||||
for voice in self.available_voices('mbrola')}
|
||||
else:
|
||||
# this are espeak voices. Select the voice using it's attached
|
||||
# language code. Consider only the first voice of a given code as
|
||||
# they are sorted by relevancy
|
||||
available = {}
|
||||
for voice in self.available_voices():
|
||||
if voice.language not in available:
|
||||
available[voice.language] = voice.identifier
|
||||
|
||||
try:
|
||||
voice_name = available[voice_code]
|
||||
except KeyError:
|
||||
raise RuntimeError(f'invalid voice code "{voice_code}"') from None
|
||||
|
||||
if self._espeak.set_voice_by_name(voice_name.encode('utf8')) != 0:
|
||||
raise RuntimeError( # pragma: nocover
|
||||
f'failed to load voice "{voice_code}"')
|
||||
|
||||
voice = self._get_voice()
|
||||
if not voice: # pragma: nocover
|
||||
raise RuntimeError(f'failed to load voice "{voice_code}"')
|
||||
self._voice = voice
|
||||
|
||||
def _get_voice(self):
|
||||
"""Returns the current voice used for phonemization
|
||||
|
||||
If no voice has been set up, returns None.
|
||||
|
||||
"""
|
||||
voice = self._espeak.get_current_voice()
|
||||
if voice.name:
|
||||
return EspeakVoice.from_ctypes(voice)
|
||||
return None # pragma: nocover
|
||||
|
||||
def text_to_phonemes(self, text: str, tie: bool = False) -> str:
|
||||
"""Translates a text into phonemes, must call set_voice() first.
|
||||
|
||||
This method is used by the Espeak backend. Wrapper on the
|
||||
espeak_TextToPhonemes function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text (str) : the text to phonemize
|
||||
|
||||
tie (bool, optional) : When True use a '͡' character between
|
||||
consecutive characters of a single phoneme. Else separate phoneme
|
||||
with '_'. This option requires espeak>=1.49. Default to False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
phonemes (str) : the phonemes for the text encoded in IPA, with '_' as
|
||||
phonemes separator (excepted if ``tie`` is True) and ' ' as word
|
||||
separator.
|
||||
|
||||
"""
|
||||
if self.voice is None: # pragma: nocover
|
||||
raise RuntimeError('no voice specified')
|
||||
|
||||
if tie and self.version <= (1, 48, 3):
|
||||
raise RuntimeError( # pragma: nocover
|
||||
'tie option only compatible with espeak>=1.49')
|
||||
|
||||
# from Python string to C void** (a pointer to a pointer to chars)
|
||||
text_ptr = ctypes.pointer(ctypes.c_char_p(text.encode('utf8')))
|
||||
|
||||
# input text is encoded as UTF8
|
||||
text_mode = 1
|
||||
|
||||
# output phonemes in IPA and separated by _, or with a tie character if
|
||||
# required. See comments for the function espeak_TextToPhonemes in
|
||||
# speak_lib.h of the espeak sources for details.
|
||||
if self.version <= (1, 48, 3): # pragma: nocover
|
||||
phonemes_mode = 0x03 | 0x01 << 4
|
||||
elif tie:
|
||||
phonemes_mode = 0x02 | 0x01 << 7 | ord('͡') << 8
|
||||
else:
|
||||
phonemes_mode = ord('_') << 8 | 0x02
|
||||
|
||||
result = []
|
||||
while text_ptr.contents.value is not None:
|
||||
phonemes = self._espeak.text_to_phonemes(
|
||||
text_ptr, text_mode, phonemes_mode)
|
||||
if phonemes:
|
||||
result.append(phonemes.decode())
|
||||
return ' '.join(result)
|
||||
|
||||
def synthetize(self, text: str):
|
||||
"""Translates a text into phonemes, must call set_voice() first.
|
||||
|
||||
Only compatible with espeak>=1.49. This method is used by the
|
||||
EspeakMbrola backend. Wrapper on the espeak_Synthesize function.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text (str) : the text to phonemize
|
||||
|
||||
Returns
|
||||
-------
|
||||
phonemes (str) : the phonemes for the text encoded in SAMPA, with '_'
|
||||
as phonemes separator and no word separation.
|
||||
|
||||
"""
|
||||
|
||||
if self.version < (1, 49): # pragma: nocover
|
||||
raise RuntimeError('not compatible with espeak<=1.48')
|
||||
if self.voice is None: # pragma: nocover
|
||||
raise RuntimeError('no voice specified')
|
||||
|
||||
# init libc fopen and fclose functions
|
||||
self._libc.fopen.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
|
||||
self._libc.fopen.restype = ctypes.c_void_p
|
||||
self._libc.fclose.argtypes = [ctypes.c_void_p]
|
||||
self._libc.fclose.restype = ctypes.c_int
|
||||
|
||||
# output phonemes in SAMPA and separated by _. Write the result to a
|
||||
# tempfile which is read back after phonemization (seems not possible
|
||||
# to redirect to stdout). See comments for the function
|
||||
# espeak_SetPhonemeTrace in speak_lib.h of the espeak sources for
|
||||
# details.
|
||||
self._tempfile.truncate(0)
|
||||
file_p = self._libc.fopen(
|
||||
self._tempfile.name.encode(),
|
||||
self._tempfile.mode.encode())
|
||||
|
||||
self._espeak.set_phoneme_trace(0x01 << 4 | ord('_') << 8, file_p)
|
||||
status = self._espeak.synthetize(
|
||||
ctypes.c_char_p(text.encode('utf8')),
|
||||
ctypes.c_size_t(len(text) + 1),
|
||||
ctypes.c_uint(0x01))
|
||||
self._libc.fclose(file_p) # because flush does not work...
|
||||
|
||||
if status != 0: # pragma: nocover
|
||||
raise RuntimeError('failed to synthetize')
|
||||
|
||||
self._tempfile.seek(0)
|
||||
phonemized = self._tempfile.read().decode().strip()
|
||||
return phonemized
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonologizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonologizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Phonemizer module for festival backend implementation"""
|
||||
+334
@@ -0,0 +1,334 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Festival backend for the phonemizer"""
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, List, IO, Union, Pattern
|
||||
|
||||
from phonemizer.backend.base import BaseBackend
|
||||
from phonemizer.backend.festival import lispy
|
||||
from phonemizer.separator import Separator
|
||||
from phonemizer.utils import get_package_resource, version_as_tuple
|
||||
|
||||
|
||||
class FestivalBackend(BaseBackend):
|
||||
"""Festival backend for the phonemizer"""
|
||||
# a static variable used to overload the default festival binary installed
|
||||
# on the system. The user can choose an alternative festival binary with
|
||||
# the method FestivalBackend.set_executable().
|
||||
_FESTIVAL_EXECUTABLE = None
|
||||
|
||||
def __init__(self, language: str,
|
||||
punctuation_marks: Optional[Union[str, Pattern]] = None,
|
||||
preserve_punctuation: bool = False,
|
||||
logger: Optional[Logger] = None):
|
||||
super().__init__(
|
||||
language,
|
||||
punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation,
|
||||
logger=logger)
|
||||
|
||||
self.logger.debug('festival executable is %s', self.executable())
|
||||
|
||||
# the Scheme script to be send to festival
|
||||
script_file = get_package_resource('festival/phonemize.scm')
|
||||
with open(script_file, 'r') as fscript:
|
||||
self._script = fscript.read()
|
||||
self.logger.debug('loaded %s', script_file)
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return 'festival'
|
||||
|
||||
@classmethod
|
||||
def set_executable(cls, executable: str):
|
||||
"""Sets the festival backend to use `executable`
|
||||
|
||||
If this is not set, the backend uses the default festival executable
|
||||
from the system installation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
executable (str) : the path to the festival executable to use as
|
||||
backend. Set `executable` to None to restore the default.
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if `executable` is not an executable file.
|
||||
|
||||
"""
|
||||
if executable is None:
|
||||
cls._FESTIVAL_EXECUTABLE = None
|
||||
return
|
||||
|
||||
executable = pathlib.Path(executable)
|
||||
if not (executable.is_file() and os.access(executable, os.X_OK)):
|
||||
raise RuntimeError(
|
||||
f'{executable} is not an executable file')
|
||||
|
||||
cls._FESTIVAL_EXECUTABLE = executable.resolve()
|
||||
|
||||
@classmethod
|
||||
def executable(cls) -> Path:
|
||||
"""Returns the absolute path to the festival executable used as backend
|
||||
|
||||
The following precedence rule applies for executable lookup:
|
||||
|
||||
1. As specified by FestivalBackend.set_executable()
|
||||
2. Or as specified by the environment variable
|
||||
PHONEMIZER_FESTIVAL_EXECUTABLE
|
||||
3. Or the default 'festival' binary found on the system with ``shutil.which('festival')``
|
||||
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
if the festival executable cannot be found or if the
|
||||
environment variable PHONEMIZER_FESTIVAL_EXECUTABLE is set to a
|
||||
non-executable file
|
||||
|
||||
"""
|
||||
if cls._FESTIVAL_EXECUTABLE:
|
||||
return cls._FESTIVAL_EXECUTABLE
|
||||
|
||||
if 'PHONEMIZER_FESTIVAL_EXECUTABLE' in os.environ:
|
||||
executable = pathlib.Path(os.environ[
|
||||
'PHONEMIZER_FESTIVAL_EXECUTABLE'])
|
||||
if not (
|
||||
executable.is_file()
|
||||
and os.access(executable, mode=os.X_OK)
|
||||
):
|
||||
raise RuntimeError(
|
||||
f'PHONEMIZER_FESTIVAL_EXECUTABLE={executable} '
|
||||
f'is not an executable file')
|
||||
return executable.resolve()
|
||||
|
||||
executable = shutil.which('festival')
|
||||
if not executable: # pragma: nocover
|
||||
raise RuntimeError(
|
||||
'failed to find festival executable')
|
||||
return Path(executable).resolve()
|
||||
|
||||
@classmethod
|
||||
def is_available(cls):
|
||||
"""True if the festival executable is available, False otherwise"""
|
||||
try:
|
||||
cls.executable()
|
||||
except RuntimeError: # pragma: nocover
|
||||
return False
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def version(cls):
|
||||
"""Festival version as a tupe of integers (major, minor, patch)
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if FestivalBackend.is_available() is False or if the
|
||||
version cannot be extracted for some reason.
|
||||
|
||||
"""
|
||||
|
||||
festival = cls.executable()
|
||||
|
||||
# the full version version string includes extra information
|
||||
# we don't need
|
||||
long_version = subprocess.check_output(
|
||||
[festival, '--version']).decode('latin1').strip()
|
||||
|
||||
# extract the version number with a regular expression
|
||||
festival_version_re = r'.* ([0-9\.]+[0-9]):'
|
||||
try:
|
||||
version = re.match(festival_version_re, long_version).group(1)
|
||||
except AttributeError:
|
||||
raise RuntimeError(
|
||||
f'cannot extract festival version from {festival}') from None
|
||||
|
||||
return version_as_tuple(version)
|
||||
|
||||
@staticmethod
|
||||
def supported_languages() -> Dict[str, str]:
|
||||
"""A dictionnary of language codes -> name supported by festival
|
||||
|
||||
Actually only en-us (American English) is supported.
|
||||
|
||||
"""
|
||||
return {'en-us': 'english-us'}
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
|
||||
"""Return a phonemized version of `text` with festival
|
||||
|
||||
This function is a wrapper on festival, a text to speech
|
||||
program, allowing simple phonemization of some English
|
||||
text. The US phoneset we use is the default one in festival,
|
||||
as described at http://www.festvox.org/bsv/c4711.html
|
||||
|
||||
Any opening and closing parenthesis in `text` are removed, as
|
||||
they interfer with the Scheme expression syntax. Moreover
|
||||
double quotes are replaced by simple quotes because double
|
||||
quotes denotes utterances boundaries in festival.
|
||||
|
||||
Parsing a ill-formed Scheme expression during post-processing
|
||||
(typically with unbalanced parenthesis) raises an IndexError.
|
||||
|
||||
"""
|
||||
text = self._preprocess(text)
|
||||
if len(text) == 0:
|
||||
return []
|
||||
text = self._process(text)
|
||||
text = self._postprocess(text, separator, strip)
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def _double_quoted(line: str) -> str:
|
||||
"""Return the string `line` surrounded by double quotes"""
|
||||
return '"' + line + '"'
|
||||
|
||||
@staticmethod
|
||||
def _cleaned(line: str):
|
||||
"""Remove 'forbidden' characters from the line"""
|
||||
# special case (very unlikely but causes a crash in festival)
|
||||
# where a line is only made of '
|
||||
if set(line) == set("'"):
|
||||
line = ''
|
||||
|
||||
# remove forbidden characters (reserved for scheme, ie festival
|
||||
# scripting language)
|
||||
return line.replace('"', '').replace('(', '').replace(')', '').strip()
|
||||
|
||||
@classmethod
|
||||
def _preprocess(cls, text: List[str]):
|
||||
"""Returns the contents of `text` formatted for festival input
|
||||
|
||||
This function adds double quotes to begining and end of each
|
||||
line in text, if not already presents. The returned result is
|
||||
a multiline string. Empty lines in inputs are ignored.
|
||||
|
||||
"""
|
||||
cleaned_text = (
|
||||
cls._cleaned(line) for line in text if line != '')
|
||||
|
||||
return '\n'.join(
|
||||
cls._double_quoted(line) for line in cleaned_text if line != '')
|
||||
|
||||
def _process(self, text: str):
|
||||
"""Return the raw phonemization of `text`
|
||||
|
||||
This function delegates to festival the text analysis and
|
||||
syllabic structure extraction.
|
||||
|
||||
Return a string containing the "SylStructure" relation tree of
|
||||
the text, as a scheme expression.
|
||||
|
||||
"""
|
||||
with tempfile.NamedTemporaryFile('w+', delete=False) as data:
|
||||
try:
|
||||
# save the text as a tempfile
|
||||
data.write(text)
|
||||
data.close()
|
||||
|
||||
# fix the path name for windows
|
||||
name = data.name
|
||||
if sys.platform == 'win32': # pragma: nocover
|
||||
name = name.replace('\\', '\\\\')
|
||||
|
||||
with tempfile.NamedTemporaryFile('w+', delete=False) as scm:
|
||||
try:
|
||||
scm.write(self._script.format(name))
|
||||
scm.close()
|
||||
|
||||
cmd = f'{self.executable()} -b {scm.name}'
|
||||
if self.logger:
|
||||
self.logger.debug('running %s', cmd)
|
||||
|
||||
# redirect stderr to a tempfile and displaying it only
|
||||
# on errors. Messages are something like: "UniSyn:
|
||||
# using default diphone ax-ax for y-pau". This is
|
||||
# related to wave synthesis (done by festival during
|
||||
# phonemization).
|
||||
with tempfile.TemporaryFile('w+') as fstderr:
|
||||
return self._run_festival(cmd, fstderr)
|
||||
finally:
|
||||
os.remove(scm.name)
|
||||
finally:
|
||||
os.remove(data.name)
|
||||
|
||||
@staticmethod
|
||||
def _run_festival(cmd: str, fstderr: IO) -> str:
|
||||
"""Runs the festival command for phonemization
|
||||
|
||||
Returns the raw phonemized output (need to be postprocesses). Raises a
|
||||
RuntimeError if festival fails.
|
||||
|
||||
"""
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
shlex.split(cmd, posix=False), stderr=fstderr)
|
||||
|
||||
# festival seems to use latin1 and not utf8
|
||||
return re.sub(' +', ' ', output.decode('latin1'))
|
||||
|
||||
except subprocess.CalledProcessError as err: # pragma: nocover
|
||||
fstderr.seek(0)
|
||||
raise RuntimeError(
|
||||
f'Command "{cmd}" returned exit status {err.returncode}, '
|
||||
f'output is:\n{fstderr.read()}') from None
|
||||
|
||||
@staticmethod
|
||||
def _postprocess_syll(syll: List[str], separator: Separator, strip: bool) -> str:
|
||||
"""Parse a syllable from festival to phonemized output"""
|
||||
sep = separator.phone
|
||||
out = (phone[0][0].replace('"', '') for phone in syll[1:])
|
||||
out = sep.join(o for o in out if o != '')
|
||||
return out if strip else out + sep
|
||||
|
||||
@classmethod
|
||||
def _postprocess_word(cls, word: List[List[str]], separator: Separator, strip: bool) -> str:
|
||||
"""Parse a word from festival to phonemized output"""
|
||||
sep = separator.syllable
|
||||
out = sep.join(
|
||||
cls._postprocess_syll(syll, separator, strip)
|
||||
for syll in word[1:])
|
||||
return out if strip else out + sep
|
||||
|
||||
@classmethod
|
||||
def _postprocess_line(cls, line: str, separator, strip: bool) -> str:
|
||||
"""Parse a line from festival to phonemized output"""
|
||||
sep = separator.word
|
||||
out = []
|
||||
for word in lispy.parse(line):
|
||||
word = cls._postprocess_word(word, separator, strip)
|
||||
if word != '':
|
||||
out.append(word)
|
||||
out = sep.join(out)
|
||||
|
||||
return out if strip else out + sep
|
||||
|
||||
@classmethod
|
||||
def _postprocess(cls, tree: str, separator: Separator, strip: bool) -> List[str]:
|
||||
"""Conversion from festival syllable tree to desired format"""
|
||||
return [cls._postprocess_line(line, separator, strip)
|
||||
for line in tree.split('\n')
|
||||
if line not in ['', '(nil nil nil)']]
|
||||
@@ -0,0 +1,66 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Parse a Scheme expression as a nested list
|
||||
|
||||
The main function of this module is lispy.parse, other ones should be
|
||||
considered private. This module is a dependency of the festival
|
||||
backend.
|
||||
|
||||
From http://www.norvig.com/lispy.html
|
||||
|
||||
"""
|
||||
from typing import List, Union
|
||||
|
||||
|
||||
def parse(program: str):
|
||||
"""Read a Scheme expression from a string
|
||||
|
||||
Return a nested list
|
||||
|
||||
Raises an IndexError if the expression is not valid scheme
|
||||
(unbalanced parenthesis).
|
||||
|
||||
>>> parse('(+ 2 (* 5 2))')
|
||||
['+', '2', ['*', '5', '2']]
|
||||
|
||||
"""
|
||||
return _read_from_tokens(_tokenize(program))
|
||||
|
||||
|
||||
def _tokenize(chars: str) -> List[str]:
|
||||
"""Convert a string of characters into a list of tokens."""
|
||||
return chars.replace('(', ' ( ').replace(')', ' ) ').split()
|
||||
|
||||
|
||||
Expr = Union[str, List['Expr']]
|
||||
|
||||
|
||||
def _read_from_tokens(tokens: List[str]) -> Expr:
|
||||
"""Read an expression from a sequence of tokens"""
|
||||
if len(tokens) == 0: # pragma: nocover
|
||||
raise SyntaxError('unexpected EOF while reading')
|
||||
|
||||
token = tokens.pop(0)
|
||||
if token == '(':
|
||||
expr = []
|
||||
while tokens[0] != ')':
|
||||
expr.append(_read_from_tokens(tokens))
|
||||
tokens.pop(0) # pop off ')'
|
||||
return expr
|
||||
|
||||
if token == ')': # pragma: nocover
|
||||
raise SyntaxError('unexpected )')
|
||||
|
||||
return token
|
||||
@@ -0,0 +1,143 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Segments backend for the phonemizer"""
|
||||
|
||||
import pathlib
|
||||
from logging import Logger
|
||||
from typing import Optional, Dict, List, Union, Pattern
|
||||
|
||||
import segments
|
||||
|
||||
from phonemizer.backend.base import BaseBackend
|
||||
from phonemizer.separator import Separator
|
||||
from phonemizer.utils import get_package_resource, version_as_tuple
|
||||
|
||||
|
||||
class SegmentsBackend(BaseBackend):
|
||||
"""Segments backends for the phonemizer
|
||||
|
||||
The phonemize method will raise a ValueError when parsing an
|
||||
unknown morpheme.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, language: str,
|
||||
punctuation_marks: Optional[Union[str, Pattern]] = None,
|
||||
preserve_punctuation: bool = False,
|
||||
logger: Optional[Logger] = None):
|
||||
# will be initialized in _init_language() from super().__init__()
|
||||
self._tokenizer: Optional[segments.Tokenizer] = None
|
||||
super().__init__(
|
||||
language,
|
||||
punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation,
|
||||
logger=logger)
|
||||
|
||||
def _init_language(self, language):
|
||||
# load the grapheme to phoneme mapping
|
||||
profile = self._load_g2p_profile(language)
|
||||
self._tokenizer = segments.Tokenizer(profile=profile)
|
||||
|
||||
# this is the language code
|
||||
return pathlib.Path(language).stem
|
||||
|
||||
@staticmethod
|
||||
def name():
|
||||
return 'segments'
|
||||
|
||||
@classmethod
|
||||
def version(cls):
|
||||
return version_as_tuple(segments.__version__)
|
||||
|
||||
@classmethod
|
||||
def is_available(cls):
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def supported_languages():
|
||||
"""Returns a dict of language: file supported by the segments backend
|
||||
|
||||
The supported languages have a grapheme to phoneme conversion file
|
||||
bundled with phonemizer. Users can also use their own file as
|
||||
parameter of the phonemize() function.
|
||||
|
||||
"""
|
||||
# directory phonemizer/share/segments
|
||||
directory = get_package_resource('segments')
|
||||
|
||||
# supported languages are files with the 'g2p' extension
|
||||
return {g2p.stem: g2p
|
||||
for g2p in directory.iterdir() if g2p.suffix == '.g2p'}
|
||||
|
||||
@classmethod
|
||||
def is_supported_language(cls, language: str) -> bool:
|
||||
if pathlib.Path(language).is_file():
|
||||
try:
|
||||
cls._load_g2p_profile(language)
|
||||
return True
|
||||
except RuntimeError:
|
||||
return False
|
||||
return language in cls.supported_languages()
|
||||
|
||||
@classmethod
|
||||
def _load_g2p_profile(cls, language: str) -> segments.Profile:
|
||||
"""Returns a segments profile from a `language`"""
|
||||
# make sure the g2p file exists
|
||||
if not pathlib.Path(language).is_file():
|
||||
try:
|
||||
language = cls.supported_languages()[language]
|
||||
except KeyError:
|
||||
raise RuntimeError(
|
||||
f'grapheme to phoneme file not found: '
|
||||
f'{language}') from None
|
||||
|
||||
# load the mapping grapheme -> phoneme from the file, make sure all
|
||||
# lines are well formatted
|
||||
g2p: Dict[str, str] = {}
|
||||
with open(language, 'r', encoding='utf8') as flang:
|
||||
for num, line in enumerate(flang):
|
||||
elts = line.strip().split()
|
||||
if not len(elts) == 2:
|
||||
raise RuntimeError(
|
||||
'grapheme to phoneme file, line {} must have 2 rows '
|
||||
'but have {}: {}'.format(num + 1, len(elts), language))
|
||||
g2p[elts[0]] = elts[1]
|
||||
|
||||
# build the segments profile from the g2p mapping
|
||||
return segments.Profile(
|
||||
*[{'Grapheme': k, 'mapping': v} for k, v in g2p.items()])
|
||||
|
||||
# pylint: disable=unused-argument
|
||||
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
|
||||
# tokenize the input text per utterance
|
||||
phonemized = (
|
||||
self._tokenizer(line, column='mapping', errors='strict')
|
||||
for line in text)
|
||||
|
||||
# the output of segments is always strip, so we need to add
|
||||
# token separation at the end when strip is False.
|
||||
if not strip:
|
||||
# add word separator at end of utterance
|
||||
phonemized = (p + ' # ' for p in phonemized)
|
||||
# add phoneme separator at end of word
|
||||
phonemized = (p.replace(' # ', ' # ') for p in phonemized)
|
||||
|
||||
# replace default separators by our custom ones
|
||||
phonemized = (p.replace(' # ', '#') for p in phonemized)
|
||||
phonemized = (p.replace(' ', separator.phone) for p in phonemized)
|
||||
phonemized = (p.replace('#', separator.word) for p in phonemized)
|
||||
|
||||
# return the result as a list of utterances
|
||||
return list(phonemized)
|
||||
@@ -0,0 +1,63 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Logging facilities for the phonemizer"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from logging import Logger
|
||||
|
||||
|
||||
def get_logger(verbosity: str = 'quiet', name: str = 'phonemizer') -> Logger:
|
||||
"""Returns a configured logging.Logger instance
|
||||
|
||||
The logger is configured to output messages on the standard error stream
|
||||
(stderr).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
verbosity (str) : The level of verbosity, must be 'verbose' (displays
|
||||
debug/info and warning messages), 'normal' (warnings only) or 'quiet' (do
|
||||
not display anything).
|
||||
name (str) : The logger name, default to 'phonemizer'
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError if `verbosity` is not 'normal', 'verbose', or 'quiet'.
|
||||
|
||||
"""
|
||||
# make sure the verbosity argument is valid
|
||||
valid_verbosity = ['normal', 'verbose', 'quiet']
|
||||
if verbosity not in valid_verbosity:
|
||||
raise RuntimeError(
|
||||
f'verbosity is {verbosity} but must be in '
|
||||
f'{", ".join(valid_verbosity)}')
|
||||
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
# setup output to stderr
|
||||
logger.handlers = []
|
||||
handler = logging.StreamHandler(sys.stderr)
|
||||
|
||||
# setup verbosity level
|
||||
logger.setLevel(logging.WARNING)
|
||||
if verbosity == 'verbose':
|
||||
logger.setLevel(logging.DEBUG)
|
||||
elif verbosity == 'quiet':
|
||||
handler = logging.NullHandler()
|
||||
|
||||
# setup messages format
|
||||
handler.setFormatter(logging.Formatter('[%(levelname)s] %(message)s'))
|
||||
logger.addHandler(handler)
|
||||
return logger
|
||||
@@ -0,0 +1,428 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Command-line phonemizer tool, have a 'phonemizer --help' to get in"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
from phonemizer import phonemize, separator, version, logger, punctuation
|
||||
from phonemizer.backend import BACKENDS
|
||||
|
||||
|
||||
class CatchExceptions: # pragma: nocover
|
||||
"""Decorator wrapping a function in a try/except block
|
||||
|
||||
When an exception occurs, display a user friendly message on
|
||||
standard output before exiting with error code 1.
|
||||
|
||||
The detected exceptions are ValueError, OSError, RuntimeError,
|
||||
AssertionError and KeyboardInterrupt.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
function :
|
||||
The function to wrap in a try/except block
|
||||
|
||||
"""
|
||||
def __init__(self, function):
|
||||
self.function = function
|
||||
|
||||
def __call__(self):
|
||||
"""Executes the wrapped function and catch common exceptions"""
|
||||
try:
|
||||
self.function()
|
||||
|
||||
except (IOError, ValueError, OSError,
|
||||
RuntimeError, AssertionError) as err:
|
||||
self.exit('fatal error: {}'.format(err))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
self.exit('keyboard interruption, exiting')
|
||||
|
||||
@staticmethod
|
||||
def exit(msg):
|
||||
"""Write `msg` on stderr and exit with error code 1"""
|
||||
sys.stderr.write(msg.strip() + '\n')
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Argument parser for the phonemization script"""
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
description='''Multilingual text to phonemes converter
|
||||
|
||||
The 'phonemize' program allows simple phonemization of words and texts
|
||||
in many language using four backends: espeak, espeak-mbrola, festival
|
||||
and segments.
|
||||
|
||||
- espeak is a text-to-speech software supporting multiple languages
|
||||
and IPA (International Phonetic Alphabet) output. See
|
||||
http://espeak.sourceforge.net or
|
||||
https://github.com/espeak-ng/espeak-ng
|
||||
|
||||
- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
|
||||
installed as well as additional mbrola voices. It does not support word or
|
||||
syllable tokenization. See
|
||||
https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md
|
||||
|
||||
- festival is also a text-to-speech software. Currently only American
|
||||
English is supported and festival uses a custom phoneset
|
||||
(http://www.festvox.org/bsv/c4711.html), but festival is the only
|
||||
backend supporting tokenization at the syllable
|
||||
level. See http://www.cstr.ed.ac.uk/projects/festival
|
||||
|
||||
- segments is a Unicode tokenizer that build a phonemization from a
|
||||
grapheme to phoneme mapping provided as a file by the user. See
|
||||
https://github.com/cldf/segments.
|
||||
|
||||
See the '--list-languages' option below for details on the languages
|
||||
supported by each backend.
|
||||
|
||||
''',
|
||||
epilog='''
|
||||
Examples:
|
||||
|
||||
* Phonemize a US English text with espeak
|
||||
|
||||
$ echo 'hello world' | phonemize -l en-us -b espeak
|
||||
həloʊ wɜːld
|
||||
|
||||
* Phonemize a US English text with festival
|
||||
|
||||
$ echo 'hello world' | phonemize -l en-us -b festival
|
||||
hhaxlow werld
|
||||
|
||||
* Phonemize a Japanese text with segments
|
||||
|
||||
$ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
|
||||
konnitʃiwa t͡sekai
|
||||
|
||||
* Add a separator between phones
|
||||
|
||||
$ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
|
||||
hh-ax-l-ow w-er-l-d
|
||||
|
||||
* Phonemize some French text file using espeak
|
||||
|
||||
$ phonemize -l fr-fr -b espeak text.txt -o phones.txt
|
||||
''')
|
||||
|
||||
# general arguments
|
||||
parser.add_argument(
|
||||
'-V', '--version',
|
||||
action='store_true',
|
||||
help='show version information and exit.')
|
||||
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument(
|
||||
'-v', '--verbose',
|
||||
action='store_true',
|
||||
help='write all log messages to stderr '
|
||||
'(displays only warnings by default).')
|
||||
group.add_argument(
|
||||
'-q', '--quiet',
|
||||
action='store_true',
|
||||
help='do not display any log message, even warnings.')
|
||||
|
||||
parser.add_argument(
|
||||
'-j', '--njobs',
|
||||
type=int, metavar='<int>', default=1,
|
||||
help='number of parallel jobs, default is %(default)s.')
|
||||
|
||||
# input/output arguments
|
||||
group = parser.add_argument_group('input/output')
|
||||
group.add_argument(
|
||||
'input',
|
||||
default=sys.stdin, nargs='?', metavar='<file>',
|
||||
help='input text file to phonemize, if not specified read from stdin.')
|
||||
|
||||
group.add_argument(
|
||||
'-o', '--output',
|
||||
default=sys.stdout, metavar='<file>',
|
||||
help='output text file to write, if not specified write to stdout.')
|
||||
|
||||
group.add_argument(
|
||||
'--prepend-text',
|
||||
default=False, const=True, nargs='?', metavar='<str>',
|
||||
help='''prepend each line of the phonemized output text with its
|
||||
matching input text. If a string is specified as option value, use it
|
||||
as field separator, else use one of "|", "||", "|||", "||||" by
|
||||
selecting the first one that is not configured as a token separator
|
||||
(see -p/-s/-w options).''')
|
||||
|
||||
group.add_argument(
|
||||
'--preserve-empty-lines',
|
||||
action='store_true',
|
||||
help='''preserve the empty lines in the phonemized output, default is
|
||||
to remove them.''')
|
||||
|
||||
group = parser.add_argument_group('backends')
|
||||
group.add_argument(
|
||||
'-b', '--backend',
|
||||
metavar='<str>', default=None,
|
||||
choices=['espeak', 'espeak-mbrola', 'festival', 'segments'],
|
||||
help="""the phonemization backend, must be 'espeak', 'espeak-mbrola',
|
||||
'festival' or 'segments'. Default is 'espeak'.""")
|
||||
|
||||
group.add_argument(
|
||||
'-L', '--list-languages',
|
||||
action='store_true',
|
||||
help="""list available languages (and exit) for the specified backend,
|
||||
or for all backends if none selected.""")
|
||||
|
||||
group = parser.add_argument_group('language')
|
||||
group.add_argument(
|
||||
'-l', '--language',
|
||||
metavar='<str|file>', default='en-us',
|
||||
help='''the language code of the input text, use '--list-languages'
|
||||
for a list of supported languages. Default is %(default)s.''')
|
||||
|
||||
group = parser.add_argument_group('token separators')
|
||||
group.add_argument(
|
||||
'-p', '--phone-separator',
|
||||
metavar='<str>', default=separator.default_separator.phone,
|
||||
help='phone separator, default is "%(default)s".')
|
||||
|
||||
group.add_argument(
|
||||
'-w', '--word-separator',
|
||||
metavar='<str>', default=separator.default_separator.word,
|
||||
help='''word separator, not valid for espeak-mbrola backend,
|
||||
default is "%(default)s".''')
|
||||
|
||||
group.add_argument(
|
||||
'-s', '--syllable-separator',
|
||||
metavar='<str>', default=separator.default_separator.syllable,
|
||||
help='''syllable separator, only valid for festival backend,
|
||||
this option has no effect if another backend is used.
|
||||
Default is "%(default)s".''')
|
||||
|
||||
group.add_argument(
|
||||
'--strip',
|
||||
action='store_true',
|
||||
help='removes the end separators in phonemized tokens.')
|
||||
|
||||
group = parser.add_argument_group('specific to espeak backend')
|
||||
try:
|
||||
espeak_library = BACKENDS['espeak'].library()
|
||||
except RuntimeError: # pragma: nocover
|
||||
espeak_library = None
|
||||
|
||||
group.add_argument(
|
||||
'--espeak-library',
|
||||
default=None, type=str, metavar='<library>',
|
||||
help=f'''the path to the espeak shared library to use (*.so on Linux,
|
||||
*.dylib on Mac and *.dll on Windows, useful to overload the default
|
||||
espeak version installed on the system). Default to
|
||||
{espeak_library}. This path can also be specified
|
||||
using the PHONEMIZER_ESPEAK_LIBRARY environment variable.''')
|
||||
group.add_argument(
|
||||
'--tie',
|
||||
nargs='?', default=False, const=True, metavar='<chr>',
|
||||
help='''when the option is set, use a tie character within multi-letter
|
||||
phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
|
||||
only compatible with espeak>1.48 and incompatible with the
|
||||
-p/--phone-separator option''')
|
||||
group.add_argument(
|
||||
'--with-stress',
|
||||
action='store_true',
|
||||
help='''when the option is set, the stresses on phonemes are present
|
||||
(stresses characters are ˈ'ˌ). By default stresses are removed.''')
|
||||
group.add_argument(
|
||||
'--language-switch',
|
||||
default='keep-flags',
|
||||
choices=['keep-flags', 'remove-flags', 'remove-utterance'],
|
||||
help="""espeak can pronounce some words in another language (typically
|
||||
English) when phonemizing a text. This option setups the policy to use
|
||||
when such a language switch occurs. Three values are available:
|
||||
'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
|
||||
'keep-flags' policy keeps the language switching flags, for example
|
||||
(en) or (jp), in the output. The 'remove-flags' policy removes them and
|
||||
the 'remove-utterance' policy removes the whole line of text including
|
||||
a language switch.""")
|
||||
group.add_argument(
|
||||
'--words-mismatch',
|
||||
default='ignore', choices=['ignore', 'warn', 'remove'],
|
||||
help="""espeak can join two consecutive words or drop some words,
|
||||
yielding a word count mismatch between orthographic and phonemized
|
||||
text. This option setups the policy to use when such a words count
|
||||
mismatch occurs. Three values are available: 'ignore' (the default)
|
||||
which do nothing, 'warn' which issue a warning for each mismatched
|
||||
line, and 'remove' which remove the mismatched lines from the
|
||||
output.""")
|
||||
|
||||
group = parser.add_argument_group('specific to festival backend')
|
||||
try:
|
||||
festival_executable = BACKENDS['festival'].executable()
|
||||
except RuntimeError: # pragma: nocover
|
||||
festival_executable = None
|
||||
|
||||
group.add_argument(
|
||||
'--festival-executable',
|
||||
default=None, type=str, metavar='<executable>',
|
||||
help=f'''the path to the festival executable to use (useful to
|
||||
overload the default festival installed on the system). Default to
|
||||
{festival_executable}. This path can also be specified using the
|
||||
PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.''')
|
||||
|
||||
group = parser.add_argument_group(
|
||||
'punctuation processing',
|
||||
description='not available for espeak-mbrola backend')
|
||||
group.add_argument(
|
||||
'--preserve-punctuation',
|
||||
action='store_true',
|
||||
help='''preserve the punctuation marks in the phonemized output,
|
||||
default is to remove them.''')
|
||||
group.add_argument(
|
||||
'--punctuation-marks',
|
||||
type=str, metavar='<str>',
|
||||
default=punctuation.Punctuation.default_marks(),
|
||||
help='''the marks to consider during punctuation processing (either
|
||||
for removal or preservation). Default is %(default)s.''')
|
||||
group.add_argument(
|
||||
'--punctuation-marks-is-regex',
|
||||
action='store_true',
|
||||
help="""interpret the '--punctuation-marks' parameter as a regex.
|
||||
Default is to interpret as a string.""")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def list_languages(args_backend):
|
||||
"""Returns the available languages for the given `backend` as a str"""
|
||||
for backend in BACKENDS.keys() if not args_backend else [args_backend]:
|
||||
print(
|
||||
f'supported languages for {backend} are:\n' +
|
||||
'\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted(
|
||||
BACKENDS[backend].supported_languages().items())))
|
||||
|
||||
|
||||
def get_logger(verbose, quiet):
|
||||
"""Returns a configured logger"""
|
||||
verbosity = 'normal'
|
||||
if verbose:
|
||||
verbosity = 'verbose'
|
||||
elif quiet:
|
||||
verbosity = 'quiet'
|
||||
return logger.get_logger(verbosity=verbosity)
|
||||
|
||||
|
||||
def setup_stream(stream, mode):
|
||||
"""If `stream` is a filename, open it as a file"""
|
||||
if isinstance(stream, str):
|
||||
# pylint: disable=consider-using-with
|
||||
return open(stream, mode, encoding='utf8')
|
||||
return stream # pragma: nocover
|
||||
|
||||
|
||||
@CatchExceptions
|
||||
def main():
|
||||
"""Phonemize a text from command-line arguments"""
|
||||
args = parse_args()
|
||||
|
||||
# setup a custom path to espeak and festival if required (this must be done
|
||||
# before generating the version message)
|
||||
if args.espeak_library:
|
||||
BACKENDS['espeak'].set_library(args.espeak_library)
|
||||
if args.festival_executable:
|
||||
BACKENDS['festival'].set_executable(args.festival_executable)
|
||||
|
||||
# display version information and exit
|
||||
if args.version:
|
||||
print(version.version())
|
||||
return
|
||||
|
||||
# list supported languages and exit
|
||||
if args.list_languages:
|
||||
print(list_languages(args.backend))
|
||||
return
|
||||
|
||||
# set default backend as espeak if not specified
|
||||
args.backend = args.backend or 'espeak'
|
||||
|
||||
# configure logging according to --verbose/--quiet options
|
||||
log = get_logger(args.verbose, args.quiet)
|
||||
|
||||
# configure input:output as a readable/writable streams
|
||||
streamin = setup_stream(args.input, 'r')
|
||||
log.debug('reading from %s', streamin.name)
|
||||
streamout = setup_stream(args.output, 'w')
|
||||
log.debug('writing to %s', streamout.name)
|
||||
|
||||
# configure the separator for phonemes, syllables and words.
|
||||
if args.backend == 'espeak-mbrola':
|
||||
log.debug('using espeak-mbrola backend: ignoring word separator')
|
||||
sep = separator.Separator(
|
||||
phone=args.phone_separator,
|
||||
syllable=None,
|
||||
word=None)
|
||||
else:
|
||||
sep = separator.Separator(
|
||||
phone=args.phone_separator,
|
||||
syllable=args.syllable_separator,
|
||||
word=args.word_separator)
|
||||
log.debug('separator is %s', sep)
|
||||
|
||||
if args.prepend_text:
|
||||
input_output_separator = sep.input_output_separator(args.prepend_text)
|
||||
log.debug(
|
||||
'prepend input text to output, separator is "%s"',
|
||||
input_output_separator)
|
||||
else:
|
||||
input_output_separator = False
|
||||
|
||||
if args.punctuation_marks_is_regex:
|
||||
try:
|
||||
log.debug('punctuation marks is regex %s', args.punctuation_marks)
|
||||
args.punctuation_marks = re.compile(args.punctuation_marks)
|
||||
except re.error:
|
||||
# manually close the open streams for windows
|
||||
streamin.close()
|
||||
streamout.close()
|
||||
raise ValueError(f"can't compile regex pattern from {args.punctuation_marks}")
|
||||
|
||||
# phonemize the input text
|
||||
out = phonemize(
|
||||
streamin.readlines(),
|
||||
language=args.language,
|
||||
backend=args.backend,
|
||||
separator=sep,
|
||||
strip=args.strip,
|
||||
prepend_text=args.prepend_text,
|
||||
preserve_empty_lines=args.preserve_empty_lines,
|
||||
preserve_punctuation=args.preserve_punctuation,
|
||||
punctuation_marks=args.punctuation_marks,
|
||||
with_stress=args.with_stress,
|
||||
tie=args.tie,
|
||||
language_switch=args.language_switch,
|
||||
words_mismatch=args.words_mismatch,
|
||||
njobs=args.njobs,
|
||||
logger=log)
|
||||
|
||||
if out and input_output_separator:
|
||||
streamout.write(
|
||||
os.linesep.join(
|
||||
f'{line[0]} {input_output_separator} {line[1]}'
|
||||
for line in out)
|
||||
+ os.linesep)
|
||||
elif out:
|
||||
streamout.write(os.linesep.join(out) + os.linesep)
|
||||
|
||||
|
||||
if __name__ == '__main__': # pragma: nocover
|
||||
main()
|
||||
@@ -0,0 +1,328 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Provides the phonemize function
|
||||
|
||||
To use it in your own code, type:
|
||||
|
||||
from phonemizer import phonemize
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from logging import Logger
|
||||
from typing import Optional, Union, List, Pattern
|
||||
|
||||
from typing_extensions import Literal
|
||||
|
||||
from phonemizer.backend import BACKENDS
|
||||
from phonemizer.backend.base import BaseBackend
|
||||
from phonemizer.backend.espeak.language_switch import LanguageSwitch
|
||||
from phonemizer.backend.espeak.words_mismatch import WordMismatch
|
||||
from phonemizer.logger import get_logger
|
||||
from phonemizer.punctuation import Punctuation
|
||||
from phonemizer.separator import default_separator, Separator
|
||||
from phonemizer.utils import list2str, str2list
|
||||
|
||||
Backend = Literal['espeak', 'espeak-mbrola', 'festival', 'segments']
|
||||
|
||||
|
||||
def phonemize( # pylint: disable=too-many-arguments
|
||||
text,
|
||||
language: str = 'en-us',
|
||||
backend: Backend = 'espeak',
|
||||
separator: Optional[Separator] = default_separator,
|
||||
strip: bool = False,
|
||||
prepend_text: bool = False,
|
||||
preserve_empty_lines: bool = False,
|
||||
preserve_punctuation: bool = False,
|
||||
punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
|
||||
with_stress: bool = False,
|
||||
tie: Union[bool, str] = False,
|
||||
language_switch: LanguageSwitch = 'keep-flags',
|
||||
words_mismatch: WordMismatch = 'ignore',
|
||||
njobs: int = 1,
|
||||
logger: Logger = get_logger()):
|
||||
"""Multilingual text to phonemes converter
|
||||
|
||||
Return a phonemized version of an input `text`, given its `language` and a
|
||||
phonemization `backend`.
|
||||
|
||||
Note
|
||||
----
|
||||
|
||||
To improve the processing speed it is better to minimize the calls to this
|
||||
function: provide the input text as a list and call phonemize() a single
|
||||
time is much more efficient than calling it on each element of the list.
|
||||
Indeed the initialization of the phonemization backend can be expensive,
|
||||
especially for espeak. In one example,
|
||||
|
||||
Do this:
|
||||
|
||||
>>> text = [line1, line2, ...]
|
||||
>>> phonemize(text, ...)
|
||||
|
||||
Not this:
|
||||
|
||||
>>> for line in text:
|
||||
>>> phonemize(line, ...)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
text: str or list of str
|
||||
The text to be phonemized. Any empty line will
|
||||
be ignored. If ``text`` is an str, it can be multiline (lines being
|
||||
separated by ``\\n``). If ``text`` is a list, each element is considered as a
|
||||
separated line. Each line is considered as a text utterance.
|
||||
|
||||
language: str
|
||||
The language code of the input text, must be supported by
|
||||
the backend. If ``backend`` is 'segments', the language can be a file with
|
||||
a grapheme to phoneme mapping.
|
||||
|
||||
backend: str, optional
|
||||
The software backend to use for phonemization,
|
||||
must be 'festival' (US English only is supported, coded 'en-us'),
|
||||
'espeak', 'espeak-mbrola' or 'segments'.
|
||||
|
||||
separator: Separator
|
||||
string separators between phonemes, syllables and
|
||||
words, default to separator.default_separator. Syllable separator is
|
||||
considered only for the festival backend. Word separator is ignored by
|
||||
the 'espeak-mbrola' backend. Initialize it as follows:
|
||||
>>> from phonemizer.separator import Separator
|
||||
>>> separator = Separator(phone='-', word=' ')
|
||||
|
||||
strip: bool, optional
|
||||
If True, don't output the last word and phone
|
||||
separators of a token, default to False.
|
||||
|
||||
prepend_text: bool, optional
|
||||
When True, returns a pair (input utterance,
|
||||
phonemized utterance) for each line of the input text. When False,
|
||||
returns only the phonemized utterances. Default to False
|
||||
|
||||
preserve_empty_lines: bool, optional
|
||||
When True, will keep the empty lines
|
||||
in the phonemized output. Default to False and remove all empty lines.
|
||||
|
||||
preserve_punctuation: bool, optional
|
||||
When True, will keep the punctuation
|
||||
in the phonemized output. Not supported by the 'espeak-mbrola' backend.
|
||||
Default to False and remove all the punctuation.
|
||||
|
||||
punctuation_marks: str or re.Pattern, optional
|
||||
The punctuation marks to consider when dealing with punctuation,
|
||||
either for removal or preservation. Can be defined as a string or regular expression.
|
||||
Default to Punctuation.default_marks().
|
||||
|
||||
with_stress: bool, optional
|
||||
This option is only valid for the 'espeak'
|
||||
backend. When True the stresses on phonemes are present (stresses
|
||||
characters are ˈ'ˌ). When False stresses are removed. Default to False.
|
||||
|
||||
tie: bool or char, optional
|
||||
This option is only valid for the 'espeak'
|
||||
backend with espeak>=1.49. It is incompatible with phone separator. When
|
||||
not False, use a tie character within multi-letter phoneme names. When
|
||||
True, the char 'U+361' is used (as in d͡ʒ), 'z' means ZWJ character,
|
||||
default to False.
|
||||
|
||||
language_switch: str, optional
|
||||
Espeak can output some words in another
|
||||
language (typically English) when phonemizing a text. This option setups
|
||||
the policy to use when such a language switch occurs. Three values are
|
||||
available : 'keep-flags' (the default), 'remove-flags' or
|
||||
'remove-utterance'. The 'keep-flags' policy keeps the language switching
|
||||
flags, for example "(en) or (jp)", in the output. The 'remove-flags'
|
||||
policy removes them and the 'remove-utterance' policy removes the whole
|
||||
line of text including a language switch. This option is only valid for
|
||||
the 'espeak' backend.
|
||||
|
||||
words_mismatch: str, optional
|
||||
Espeak can join two consecutive words or
|
||||
drop some words, yielding a word count mismatch between orthographic and
|
||||
phonemized text. This option setups the policy to use when such a words
|
||||
count mismatch occurs. Three values are available: 'ignore' (the default)
|
||||
which do nothing, 'warn' which issue a warning for each mismatched line,
|
||||
and 'remove' which remove the mismatched lines from the output.
|
||||
|
||||
njobs: int
|
||||
The number of parallel jobs to launch. The input text is split
|
||||
in ``njobs`` parts, phonemized on parallel instances of the backend and the
|
||||
outputs are finally collapsed.
|
||||
|
||||
logger: logging.Logger
|
||||
the logging instance where to send messages. If
|
||||
not specified, use the default system logger.
|
||||
|
||||
Returns
|
||||
-------
|
||||
phonemized text: str or list of str
|
||||
The input ``text`` phonemized for the
|
||||
given ``language`` and ``backend``. The returned value has the same type of
|
||||
the input text (either a list or a string), excepted if ``prepend_input``
|
||||
is True where the output is forced as a list of pairs (input_text,
|
||||
phonemized text).
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
if the ``backend`` is not valid or is valid but not installed,
|
||||
if the ``language`` is not supported by the ``backend``, if any incompatible options are used.
|
||||
|
||||
"""
|
||||
# ensure we are using a compatible Python version
|
||||
if sys.version_info < (3, 6): # pragma: nocover
|
||||
logger.error(
|
||||
'Your are using python-%s which is unsupported by the phonemizer, '
|
||||
'please update to python>=3.6', ".".join(sys.version_info))
|
||||
|
||||
# ensure the arguments are valid
|
||||
_check_arguments(
|
||||
backend, with_stress, tie, separator, language_switch, words_mismatch)
|
||||
|
||||
# preserve_punctuation and word separator not valid for espeak-mbrola
|
||||
if backend == 'espeak-mbrola' and preserve_punctuation:
|
||||
logger.warning('espeak-mbrola backend cannot preserve punctuation')
|
||||
if backend == 'espeak-mbrola' and separator.word:
|
||||
logger.warning('espeak-mbrola backend cannot preserve word separation')
|
||||
|
||||
# initialize the phonemization backend
|
||||
if backend == 'espeak':
|
||||
phonemizer = BACKENDS[backend](
|
||||
language,
|
||||
punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation,
|
||||
with_stress=with_stress,
|
||||
tie=tie,
|
||||
language_switch=language_switch,
|
||||
words_mismatch=words_mismatch,
|
||||
logger=logger)
|
||||
elif backend == 'espeak-mbrola':
|
||||
phonemizer = BACKENDS[backend](
|
||||
language,
|
||||
logger=logger)
|
||||
else: # festival or segments
|
||||
phonemizer = BACKENDS[backend](
|
||||
language,
|
||||
punctuation_marks=punctuation_marks,
|
||||
preserve_punctuation=preserve_punctuation,
|
||||
logger=logger)
|
||||
|
||||
# do the phonemization
|
||||
return _phonemize(phonemizer, text, separator, strip, njobs, prepend_text, preserve_empty_lines)
|
||||
|
||||
|
||||
def _check_arguments( # pylint: disable=too-many-arguments
|
||||
backend: Backend,
|
||||
with_stress: bool,
|
||||
tie: Union[bool, str],
|
||||
separator: Separator,
|
||||
language_switch: LanguageSwitch,
|
||||
words_mismatch: WordMismatch):
|
||||
"""Auxiliary function to phonemize()
|
||||
|
||||
Ensures the parameters are compatible with each other, raises a
|
||||
RuntimeError the first encountered error.
|
||||
|
||||
"""
|
||||
# ensure the backend is either espeak, festival or segments
|
||||
if backend not in ('espeak', 'espeak-mbrola', 'festival', 'segments'):
|
||||
raise RuntimeError(
|
||||
'{} is not a supported backend, choose in {}.'
|
||||
.format(backend, ', '.join(
|
||||
('espeak', 'espeak-mbrola', 'festival', 'segments'))))
|
||||
|
||||
# with_stress option only valid for espeak
|
||||
if with_stress and backend != 'espeak':
|
||||
raise RuntimeError(
|
||||
'the "with_stress" option is available for espeak backend only, '
|
||||
'but you are using {} backend'.format(backend))
|
||||
|
||||
# tie option only valid for espeak
|
||||
if tie and backend != 'espeak':
|
||||
raise RuntimeError(
|
||||
'the "tie" option is available for espeak backend only, '
|
||||
'but you are using {} backend'.format(backend))
|
||||
|
||||
# tie option incompatible with phone separator
|
||||
if tie and separator.phone:
|
||||
raise RuntimeError(
|
||||
'the "tie" option is incompatible with phone separator '
|
||||
f'(which is "{separator.phone}")')
|
||||
|
||||
# language_switch option only valid for espeak
|
||||
if language_switch != 'keep-flags' and backend != 'espeak':
|
||||
raise RuntimeError(
|
||||
'the "language_switch" option is available for espeak backend '
|
||||
'only, but you are using {} backend'.format(backend))
|
||||
|
||||
# words_mismatch option only valid for espeak
|
||||
if words_mismatch != 'ignore' and backend != 'espeak':
|
||||
raise RuntimeError(
|
||||
'the "words_mismatch" option is available for espeak backend '
|
||||
'only, but you are using {} backend'.format(backend))
|
||||
|
||||
|
||||
def _phonemize( # pylint: disable=too-many-arguments
|
||||
backend: BaseBackend,
|
||||
text: Union[str, List[str]],
|
||||
separator: Separator,
|
||||
strip: bool,
|
||||
njobs: int,
|
||||
prepend_text: bool,
|
||||
preserve_empty_lines: bool):
|
||||
"""Auxiliary function to phonemize()
|
||||
|
||||
Does the phonemization and returns the phonemized text. Raises a
|
||||
RuntimeError on error.
|
||||
|
||||
"""
|
||||
# remember the text type for output (either list or string)
|
||||
text_type = type(text)
|
||||
|
||||
# force the text as a list
|
||||
text = [line.strip(os.linesep) for line in str2list(text)]
|
||||
|
||||
# if preserving empty lines, note the index of each empty line
|
||||
if preserve_empty_lines:
|
||||
empty_lines = [n for n, line in enumerate(text) if not line.strip()]
|
||||
|
||||
# ignore empty lines
|
||||
text = [line for line in text if line.strip()]
|
||||
|
||||
if (text):
|
||||
# phonemize the text
|
||||
phonemized = backend.phonemize(
|
||||
text, separator=separator, strip=strip, njobs=njobs)
|
||||
else:
|
||||
phonemized = []
|
||||
|
||||
# if preserving empty lines, reinsert them into text and phonemized lists
|
||||
if preserve_empty_lines:
|
||||
for i in empty_lines: # noqa
|
||||
if prepend_text:
|
||||
text.insert(i, '')
|
||||
phonemized.insert(i, '')
|
||||
|
||||
# at that point, the phonemized text is a list of str. Format it as
|
||||
# expected by the parameters
|
||||
if prepend_text:
|
||||
return list(zip(text, phonemized))
|
||||
if text_type == str:
|
||||
return list2str(phonemized)
|
||||
return phonemized
|
||||
@@ -0,0 +1,220 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Implementation of punctuation processing"""
|
||||
|
||||
import collections
|
||||
import re
|
||||
from typing import List, Union, Tuple, Pattern
|
||||
|
||||
from phonemizer.utils import str2list
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
# The punctuation marks considered by default.
|
||||
_DEFAULT_MARKS = ';:,.!?¡¿—…"«»“”(){}[]'
|
||||
|
||||
_MarkIndex = collections.namedtuple(
|
||||
'_mark_index', ['index', 'mark', 'position'])
|
||||
|
||||
|
||||
class Punctuation:
|
||||
"""Preserve or remove the punctuation during phonemization
|
||||
|
||||
Backends behave differently with punctuation: festival and espeak ignore it
|
||||
and remove it silently whereas segments will raise an error. The
|
||||
Punctuation class solves that issue by "hiding" the punctuation to the
|
||||
phonemization backend and restoring it afterwards.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
marks (str or re.Pattern) : The punctuation marks to consider for processing
|
||||
(either removal or preservation). If a string, each mark must be made of
|
||||
a single character. Default to Punctuation.default_marks().
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, marks: Union[str, Pattern] = _DEFAULT_MARKS):
|
||||
self._marks: str = None # noqa
|
||||
self._marks_re: Pattern[str] = None # noqa
|
||||
self.marks = marks
|
||||
|
||||
@staticmethod
|
||||
def default_marks():
|
||||
"""Returns the default punctuation marks as a string"""
|
||||
return _DEFAULT_MARKS
|
||||
|
||||
@property
|
||||
def marks(self):
|
||||
"""The punctuation marks as a string"""
|
||||
if self._marks:
|
||||
return self._marks
|
||||
raise ValueError('punctuation initialized from regex, cannot access marks as a string')
|
||||
|
||||
@marks.setter
|
||||
def marks(self, value: Union[str, Pattern]):
|
||||
if isinstance(value, Pattern):
|
||||
# catch the pattern surrounded by zero or more spaces on either side
|
||||
self._marks_re = re.compile(r'((' + value.pattern + r')|\s)+')
|
||||
self._marks = None
|
||||
elif isinstance(value, str):
|
||||
self._marks = ''.join(set(value))
|
||||
|
||||
# catching all the marks in one regular expression: zero or more spaces
|
||||
# + one or more marks + zero or more spaces.
|
||||
self._marks_re = re.compile(fr'(\s*[{re.escape(self._marks)}]+\s*)+')
|
||||
else:
|
||||
raise ValueError('punctuation marks must be defined as a string or re.Pattern')
|
||||
|
||||
def remove(self, text: Union[str, List[str]]) -> Union[str, List[str]]:
|
||||
"""Returns the `text` with all punctuation marks replaced by spaces
|
||||
|
||||
The input `text` can be a string or a list and is returned with the
|
||||
same type and punctuation removed.
|
||||
|
||||
"""
|
||||
|
||||
def aux(text: str) -> str:
|
||||
return re.sub(self._marks_re, ' ', text).strip()
|
||||
|
||||
if isinstance(text, str):
|
||||
return aux(text)
|
||||
return [aux(line) for line in text]
|
||||
|
||||
def preserve(self, text: Union[List[str], str]) -> Tuple[List[List[str]], List[_MarkIndex]]:
|
||||
"""Removes punctuation from `text`, allowing for furter restoration
|
||||
|
||||
This method returns the text as a list of punctuated chunks, along with
|
||||
a list of punctuation marks for furter restoration:
|
||||
|
||||
'hello, my world!' -> ['hello', 'my world'], [',', '!']
|
||||
|
||||
"""
|
||||
text: List[str] = str2list(text)
|
||||
preserved_text = []
|
||||
preserved_marks = []
|
||||
|
||||
for num, line in enumerate(text):
|
||||
line, marks = self._preserve_line(line, num)
|
||||
preserved_text += line
|
||||
preserved_marks += marks
|
||||
return [line for line in preserved_text if line], preserved_marks
|
||||
|
||||
def _preserve_line(self, line: str, num: int) -> Tuple[List[str], List[_MarkIndex]]:
|
||||
"""Auxiliary method for Punctuation.preserve()"""
|
||||
matches = list(re.finditer(self._marks_re, line))
|
||||
if not matches:
|
||||
return [line], []
|
||||
|
||||
# the line is made only of punctuation marks
|
||||
if len(matches) == 1 and matches[0].group() == line:
|
||||
return [], [_MarkIndex(num, line, 'A')]
|
||||
|
||||
# build the list of mark indexes required to restore the punctuation
|
||||
marks = []
|
||||
for match in matches:
|
||||
# find the position of the punctuation mark in the utterance:
|
||||
# begin (B), end (E), in the middle (I) or alone (A)
|
||||
position = 'I'
|
||||
if match == matches[0] and line.startswith(match.group()):
|
||||
position = 'B'
|
||||
elif match == matches[-1] and line.endswith(match.group()):
|
||||
position = 'E'
|
||||
marks.append(_MarkIndex(num, match.group(), position))
|
||||
|
||||
# split the line into sublines, each separated by a punctuation mark
|
||||
preserved_line = []
|
||||
for mark in marks:
|
||||
split = line.split(mark.mark)
|
||||
prefix, suffix = split[0], mark.mark.join(split[1:])
|
||||
preserved_line.append(prefix)
|
||||
line = suffix
|
||||
|
||||
# append any trailing text to the preserved line
|
||||
return preserved_line + [line], marks
|
||||
|
||||
@classmethod
|
||||
def restore(cls, text: Union[str, List[str]],
|
||||
marks: List[_MarkIndex],
|
||||
sep: Separator,
|
||||
strip: bool) -> List[str]:
|
||||
"""Restore punctuation in a text.
|
||||
|
||||
This is the reverse operation of Punctuation.preserve(). It takes a
|
||||
list of punctuated chunks and a list of punctuation marks, as well as
|
||||
the separator and strip parameters used by phonemize. It returns the
|
||||
punctuated text as a list:
|
||||
|
||||
['hello', 'my world'], [',', '!'] -> ['hello, my world!']
|
||||
|
||||
"""
|
||||
text = str2list(text)
|
||||
punctuated_text = []
|
||||
pos = 0
|
||||
|
||||
while text or marks:
|
||||
|
||||
if not marks:
|
||||
for line in text:
|
||||
# if strip is False, ensure the final word ends with a word separator
|
||||
if not strip and sep.word and not line.endswith(sep.word):
|
||||
line = line + sep.word
|
||||
punctuated_text.append(line)
|
||||
text = []
|
||||
elif not text:
|
||||
# nothing has been phonemized, returns the marks alone, with internal
|
||||
# spaces replaced by the word separator
|
||||
punctuated_text.append(re.sub(' ', sep.word, ''.join(m.mark for m in marks)))
|
||||
marks = []
|
||||
|
||||
else:
|
||||
current_mark = marks[0]
|
||||
if current_mark.index == pos:
|
||||
|
||||
# place the current mark here
|
||||
mark = marks[0]
|
||||
marks = marks[1:]
|
||||
# replace internal spaces in the current mark with the word separator
|
||||
mark = re.sub(' ', sep.word, mark.mark)
|
||||
|
||||
# remove the word last separator from the current word
|
||||
if sep.word and text[0].endswith(sep.word):
|
||||
text[0] = text[0][:-len(sep.word)]
|
||||
|
||||
if current_mark.position == 'B':
|
||||
text[0] = mark + text[0]
|
||||
elif current_mark.position == 'E':
|
||||
punctuated_text.append(text[0] + mark + ('' if strip or mark.endswith(sep.word) else sep.word))
|
||||
text = text[1:]
|
||||
pos = pos + 1
|
||||
elif current_mark.position == 'A':
|
||||
punctuated_text.append(mark + ('' if strip or mark.endswith(sep.word) else sep.word))
|
||||
pos = pos + 1
|
||||
else:
|
||||
# position == 'I'
|
||||
if len(text) == 1: # pragma: nocover
|
||||
# a corner case where the final part of an intermediate
|
||||
# mark (I) has not been phonemized
|
||||
text[0] = text[0] + mark
|
||||
else:
|
||||
first_word = text[0]
|
||||
text = text[1:]
|
||||
text[0] = first_word + mark + text[0]
|
||||
|
||||
else:
|
||||
punctuated_text.append(text[0])
|
||||
text = text[1:]
|
||||
pos = pos + 1
|
||||
|
||||
|
||||
return punctuated_text
|
||||
@@ -0,0 +1,118 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Provides the Separator tuple and its default value"""
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
class Separator:
|
||||
"""Defines phone, syllable and word boundary tokens"""
|
||||
|
||||
def __init__(self, word: str = ' ',
|
||||
syllable: Optional[str] = None,
|
||||
phone: Optional[str] = None):
|
||||
# check we have different separators, None excluded
|
||||
sep1 = list(sep for sep in (phone, syllable, word) if sep)
|
||||
sep2 = set(sep for sep in (phone, syllable, word) if sep)
|
||||
if len(sep1) != len(sep2):
|
||||
raise ValueError(
|
||||
'illegal separator with word="{}", syllable="{}" and '
|
||||
'phone="{}", must be all differents if not empty'
|
||||
.format(phone, syllable, word))
|
||||
|
||||
self._phone = str(phone) if phone else ''
|
||||
self._syllable = str(syllable) if syllable else ''
|
||||
self._word = str(word) if word else ''
|
||||
|
||||
def __eq__(self, other: 'Separator'):
|
||||
return (
|
||||
self.phone == other.phone
|
||||
and self.syllable == other.syllable
|
||||
and self.word == other.word)
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
f'(phone: "{self.phone}", '
|
||||
f'syllable: "{self.syllable}", '
|
||||
f'word: "{self.word}")')
|
||||
|
||||
@property
|
||||
def phone(self):
|
||||
"""Phones separator"""
|
||||
return self._phone
|
||||
|
||||
@property
|
||||
def syllable(self):
|
||||
"""Syllables separator"""
|
||||
return self._syllable
|
||||
|
||||
@property
|
||||
def word(self):
|
||||
"""Words separator"""
|
||||
return self._word
|
||||
|
||||
def __contains__(self, value: str):
|
||||
"""Returns True if the separator has `value` as token separation"""
|
||||
return value in {self.phone, self.syllable, self.word}
|
||||
|
||||
def input_output_separator(self, field_separator: Union[str, bool]) \
|
||||
-> Union[str, bool]:
|
||||
"""Returns a suitable input/output separator based on token separator
|
||||
|
||||
The input/output separator split orthographic and phonetic texts when
|
||||
using the --prepend-text option from command-line.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
field_separator: bool or str
|
||||
If str, ensures it's value is not
|
||||
already defined as a token separator. If True choose one of "|",
|
||||
"||", "|||", "||||" (the first one that is not defined as a token
|
||||
separator)
|
||||
|
||||
Returns
|
||||
-------
|
||||
The input/output separator, or False if ``field_separator`` is False
|
||||
|
||||
Raises
|
||||
------
|
||||
RuntimeError
|
||||
if ``field_separator`` is a str but is already registered as token separator
|
||||
|
||||
"""
|
||||
if not field_separator:
|
||||
return False
|
||||
|
||||
if isinstance(field_separator, str):
|
||||
if field_separator in self:
|
||||
raise RuntimeError(
|
||||
f'cannot prepend input with "{field_separator}" because '
|
||||
f'it is already a token separator: {self}')
|
||||
return field_separator
|
||||
|
||||
if field_separator is True:
|
||||
field_separator = '|'
|
||||
while field_separator in self:
|
||||
field_separator += '|'
|
||||
return field_separator
|
||||
|
||||
# not a bool nor a str
|
||||
raise RuntimeError(
|
||||
'invalid input/output separator, must be bool or str but is'
|
||||
f'{field_separator}')
|
||||
|
||||
|
||||
default_separator = Separator(phone='', syllable='', word=' ')
|
||||
"""The default separation characters for phonemes, syllables and words"""
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
;; Copyright 2015-2021 Mathieu Bernard
|
||||
;;
|
||||
;; This file is part of phonemizer: you can redistribute it and/or
|
||||
;; modify it under the terms of the GNU General Public License as
|
||||
;; published by the Free Software Foundation, either version 3 of the
|
||||
;; License, or (at your option) any later version.
|
||||
;;
|
||||
;; Phonemizer is distributed in the hope that it will be useful, but
|
||||
;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;; General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
;; This script is executed by festival for English text phonemization.
|
||||
(define (phonemize line)
|
||||
"(phonemize LINE)
|
||||
Extract the phonemes of the string LINE as a tree and write it to stdout."
|
||||
(set! utterance (eval (list 'Utterance 'Text line)))
|
||||
(utt.synth utterance)
|
||||
;; Use of print instead of pprintf to have each utterance on one line
|
||||
(print (utt.relation_tree utterance "SylStructure")))
|
||||
|
||||
;; This double braket have to be replaced by the name of the text file
|
||||
;; you want to read data from. To be parsed by festival as a unique
|
||||
;; utterance, each line of that file must begin and end with
|
||||
;; double-quotes.
|
||||
(set! lines (load "{}" t))
|
||||
(mapcar (lambda (line) (phonemize line)) lines)
|
||||
@@ -0,0 +1,27 @@
|
||||
a ʌ
|
||||
â aː
|
||||
b b
|
||||
ch tʃ
|
||||
d d
|
||||
e eː
|
||||
f f
|
||||
g g
|
||||
h h
|
||||
i ɪ
|
||||
î iː
|
||||
j dʒ
|
||||
k k
|
||||
kw kʷ
|
||||
l l
|
||||
m m
|
||||
n n
|
||||
o ʊ
|
||||
p p
|
||||
s s
|
||||
sh ʃ
|
||||
t t
|
||||
th θ
|
||||
u ʊ
|
||||
û o
|
||||
w w
|
||||
y j
|
||||
@@ -0,0 +1,27 @@
|
||||
a ʌ
|
||||
â aː
|
||||
b b
|
||||
ch tʃ
|
||||
d d
|
||||
e eː
|
||||
f f
|
||||
g g
|
||||
h h
|
||||
i ɪ
|
||||
î iː
|
||||
j dʒ
|
||||
k k
|
||||
kw kʷ
|
||||
l l
|
||||
m m
|
||||
n n
|
||||
o ʊ
|
||||
p p
|
||||
s s
|
||||
sh ʃ
|
||||
t t
|
||||
th θ
|
||||
u ʊ
|
||||
û o
|
||||
w w
|
||||
y j
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
a a
|
||||
g g
|
||||
h h
|
||||
i i
|
||||
j j
|
||||
k k
|
||||
l l
|
||||
ll ɬ
|
||||
m m
|
||||
n n
|
||||
ng ŋ
|
||||
nng ŋŋ
|
||||
p p
|
||||
q q
|
||||
r ʁ
|
||||
rng ɴ
|
||||
s s
|
||||
t t
|
||||
u u
|
||||
v v
|
||||
@@ -0,0 +1,35 @@
|
||||
a a
|
||||
aa aː
|
||||
b b
|
||||
by bʲ
|
||||
ch tʃ
|
||||
d d
|
||||
e e
|
||||
ee eː
|
||||
f ɸ
|
||||
g g
|
||||
gy gʲ
|
||||
h h
|
||||
hy ç
|
||||
i i
|
||||
j dʒ
|
||||
k k
|
||||
ky kʲ
|
||||
m m
|
||||
my mʲ
|
||||
n n
|
||||
ny ɲ
|
||||
o o
|
||||
oo oː
|
||||
p p
|
||||
py pʲ
|
||||
r r
|
||||
ry rʲ
|
||||
sh ʃ
|
||||
t t
|
||||
ts t͡s
|
||||
u ɯ
|
||||
uu ɯː
|
||||
w w
|
||||
y j
|
||||
z z
|
||||
@@ -0,0 +1,38 @@
|
||||
a a
|
||||
b b
|
||||
ch tʃʰ
|
||||
d d
|
||||
e e
|
||||
f f
|
||||
g χ
|
||||
h h
|
||||
hl ɬ
|
||||
i i
|
||||
j dʒ
|
||||
k k
|
||||
kg kx
|
||||
kh kʰ
|
||||
l l
|
||||
m m
|
||||
n n
|
||||
ng ŋ
|
||||
nq ǃ̃
|
||||
ny ɲ
|
||||
o o
|
||||
p t
|
||||
ph pʰ
|
||||
q ǃ
|
||||
qh ǃʰ
|
||||
r r
|
||||
s s
|
||||
sh ʃ
|
||||
t t
|
||||
th tʰ
|
||||
tj tʃ
|
||||
tl tɬ
|
||||
tlh tɬʰ
|
||||
ts t͡s
|
||||
tsh t͡sʰ
|
||||
u u
|
||||
w w
|
||||
y j
|
||||
@@ -0,0 +1,45 @@
|
||||
a a
|
||||
aa aː
|
||||
aʼ a̰
|
||||
aʼa a̰ː
|
||||
b b
|
||||
ch t̠͡ʃ
|
||||
chʼ t̠͡ʃʼ
|
||||
e e
|
||||
ee eː
|
||||
eʼ ḛ
|
||||
eʼe ḛː
|
||||
f f
|
||||
h h
|
||||
i i
|
||||
ii iː
|
||||
iʼ ḭ
|
||||
iʼi ḭː
|
||||
j x
|
||||
k k
|
||||
kʼ kʼ
|
||||
l l
|
||||
m m
|
||||
n n
|
||||
ñ n
|
||||
o o
|
||||
oo oː
|
||||
oʼ o̰
|
||||
oʼo o̰ː
|
||||
p pʼ
|
||||
pʼ pʼ
|
||||
qu k
|
||||
r r
|
||||
s s
|
||||
x ʃ
|
||||
t t
|
||||
ts t͡s
|
||||
tsʼ t͡sʼ
|
||||
tʼ tʼ
|
||||
u u
|
||||
uu uː
|
||||
uʼ ṵ
|
||||
uʼu ṵː
|
||||
w w
|
||||
y j
|
||||
z s
|
||||
@@ -0,0 +1,131 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Provides utility functions for the phonemizer"""
|
||||
|
||||
import os
|
||||
from numbers import Number
|
||||
from pathlib import Path
|
||||
from typing import Union, List, Tuple, Iterable
|
||||
|
||||
import importlib
|
||||
|
||||
|
||||
def cumsum(iterable: Iterable[Number]) -> List[Number]:
|
||||
"""Returns the cumulative sum of the `iterable` as a list"""
|
||||
res = []
|
||||
cumulative = 0
|
||||
for value in iterable:
|
||||
cumulative += value
|
||||
res.append(cumulative)
|
||||
return res
|
||||
|
||||
|
||||
def str2list(text: Union[str, List[str]]) -> List[str]:
|
||||
"""Returns the string `text` as a list of lines, split by \n"""
|
||||
if isinstance(text, str):
|
||||
return text.strip(os.linesep).split(os.linesep)
|
||||
return text
|
||||
|
||||
|
||||
def list2str(text: Union[str, List[str]]) -> str:
|
||||
"""Returns the list of lines `text` as a single string separated by \n"""
|
||||
if isinstance(text, str):
|
||||
return text
|
||||
return os.linesep.join(text)
|
||||
|
||||
|
||||
def chunks(text: Union[str, List[str]], num: int) \
|
||||
-> Tuple[List[List[str]], List[int]]:
|
||||
"""Return a maximum of `num` equally sized chunks of a `text`
|
||||
|
||||
This method is usefull when phonemizing a single text on multiple jobs.
|
||||
|
||||
The exact number of chunks returned is `m = min(num, len(str2list(text)))`.
|
||||
Only the m-1 first chunks have equal size. The last chunk can be longer.
|
||||
The input `text` can be a list or a string. Return a list of `m` strings.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text (str or list) : The text to divide in chunks
|
||||
|
||||
num (int) : The number of chunks to build, must be a strictly positive
|
||||
integer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
chunks (list of list of str) : The chunked text with utterances separated
|
||||
by '\n'.
|
||||
|
||||
offsets (list of int) : offset used below to recover the line numbers in
|
||||
the input text wrt the chunks
|
||||
|
||||
"""
|
||||
text: List[str] = str2list(text)
|
||||
size = int(max(1, len(text) / num)) # noqa
|
||||
nchunks = min(num, len(text))
|
||||
|
||||
text_chunks = [
|
||||
text[i * size:(i + 1) * size] for i in range(nchunks - 1)]
|
||||
|
||||
last = text[(nchunks - 1) * size:]
|
||||
if last:
|
||||
text_chunks.append(last)
|
||||
|
||||
offsets = [0] + cumsum((len(c) for c in text_chunks[:-1]))
|
||||
return text_chunks, offsets
|
||||
|
||||
|
||||
def get_package_resource(path: str) -> Path:
|
||||
"""Returns the absolute path to a phonemizer resource file or directory
|
||||
|
||||
The packages resource are stored within the source tree in the
|
||||
'phonemizer/share' directory and, once the package is installed, are moved
|
||||
to another system directory (e.g. /share/phonemizer).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path (str) : the file or directory to get, must be relative to
|
||||
'phonemizer/share'.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError if the required `path` is not found
|
||||
|
||||
Returns
|
||||
-------
|
||||
The absolute path to the required resource as a `pathlib.Path`
|
||||
|
||||
"""
|
||||
try:
|
||||
# new in python-3.9
|
||||
path = importlib.resources.files('phonemizer') / 'share' / path
|
||||
except AttributeError: # pragma: nocover
|
||||
with importlib.resources.path('phonemizer', 'share') as share:
|
||||
path = share / path
|
||||
|
||||
if not path.exists(): # pragma: nocover
|
||||
raise ValueError(f'the requested resource does not exist: {path}')
|
||||
|
||||
return path.resolve()
|
||||
|
||||
|
||||
def version_as_tuple(version: str) -> Tuple[int, ...]:
|
||||
"""Returns a tuple of integers from a version string
|
||||
|
||||
Any '-dev' in version string is ignored. For instance, returns (1, 2, 3)
|
||||
from '1.2.3' or (0, 2) from '0.2-dev'
|
||||
|
||||
"""
|
||||
return tuple(int(v) for v in version.replace('-dev', '').split('.'))
|
||||
@@ -0,0 +1,67 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Phonemizer version description"""
|
||||
|
||||
import importlib
|
||||
|
||||
from phonemizer.backend import (
|
||||
EspeakBackend, EspeakMbrolaBackend, FestivalBackend, SegmentsBackend)
|
||||
|
||||
|
||||
def _version_as_str(vers):
|
||||
"""From (1, 49, 3) to '1.49.3'"""
|
||||
return '.'.join(str(v) for v in vers)
|
||||
|
||||
|
||||
def version():
|
||||
"""Return version information for front and backends"""
|
||||
# version of the phonemizer
|
||||
_version = 'phonemizer-' + importlib.metadata.version('phonemizer')
|
||||
|
||||
# for each backend, check if it is available or not. If so get its version
|
||||
available = []
|
||||
unavailable = []
|
||||
|
||||
if EspeakBackend.is_available():
|
||||
available.append(
|
||||
'espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '')
|
||||
+ _version_as_str(EspeakBackend.version()))
|
||||
else: # pragma: nocover
|
||||
unavailable.append('espeak')
|
||||
|
||||
if EspeakMbrolaBackend.is_available():
|
||||
available.append('espeak-mbrola')
|
||||
else: # pragma: nocover
|
||||
unavailable.append('espeak-mbrola')
|
||||
|
||||
if FestivalBackend.is_available():
|
||||
available.append(
|
||||
'festival-' + _version_as_str(FestivalBackend.version()))
|
||||
else: # pragma: nocover
|
||||
unavailable.append('festival')
|
||||
|
||||
if SegmentsBackend.is_available():
|
||||
available.append(
|
||||
'segments-' + _version_as_str(SegmentsBackend.version()))
|
||||
else: # pragma: nocover
|
||||
unavailable.append('segments')
|
||||
|
||||
# resumes the backends status in the final version string
|
||||
if available:
|
||||
_version += '\navailable backends: ' + ', '.join(available)
|
||||
if unavailable: # pragma: nocover
|
||||
_version += '\nuninstalled backends: ' + ', '.join(unavailable)
|
||||
|
||||
return _version
|
||||
Reference in New Issue
Block a user