2025-12-01

This commit is contained in:
2026-03-17 14:58:51 -06:00
parent 183e865f8b
commit 4b82b57113
6846 changed files with 954887 additions and 162606 deletions
@@ -0,0 +1,27 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Multilingual text to phonemes converter"""
# pylint: disable=unused-import
from .espeak.espeak import EspeakBackend
from .espeak.mbrola import EspeakMbrolaBackend
from .festival.festival import FestivalBackend
from .segments import SegmentsBackend
BACKENDS = {b.name(): b for b in (
EspeakBackend, FestivalBackend, SegmentsBackend, EspeakMbrolaBackend)}
"""The different phonemization backends as a mapping (name, class)"""
@@ -0,0 +1,255 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Abstract base class for phonemization backends"""
import abc
import itertools
import re
from logging import Logger
from typing import Optional, List, Any, Dict, Tuple, Union, Pattern
import joblib
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator, default_separator
from phonemizer.utils import chunks
class BaseBackend(abc.ABC):
"""Abstract base class of all the phonemization backends
Provides a common interface to all backends. The central method is
`phonemize()`
Parameters
----------
language: str
The language code of the input text, must be supported by
the backend. If ``backend`` is 'segments', the language can be a file with
a grapheme to phoneme mapping.
preserve_punctuation: bool
When True, will keep the punctuation in the
phonemized output. Not supported by the 'espeak-mbrola' backend. Default
to False and remove all the punctuation.
punctuation_marks: str
The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
Can be defined as a string or regular expression. Default to Punctuation.default_marks().
logger: logging.Logger
the logging instance where to send
messages. If not specified, use the default system logger.
Raises
------
RuntimeError
if the backend is not available of if the `language` cannot be initialized.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
if punctuation_marks is None:
punctuation_marks = Punctuation.default_marks()
if logger is None:
logger = get_logger()
# ensure the backend is installed on the system
if not self.is_available():
raise RuntimeError( # pragma: nocover
'{} not installed on your system'.format(self.name()))
self._logger = logger
self._logger.info(
'initializing backend %s-%s',
self.name(), '.'.join(str(v) for v in self.version()))
# ensure the backend support the requested language
self._language = self._init_language(language)
# setup punctuation processing
self._preserve_punctuation = preserve_punctuation
self._punctuator = Punctuation(punctuation_marks)
@classmethod
def _init_language(cls, language):
"""Language initialization
This method may be overloaded in child classes (see Segments backend)
"""
if not cls.is_supported_language(language):
raise RuntimeError(
f'language "{language}" is not supported by the '
f'{cls.name()} backend')
return language
@property
def logger(self):
"""A logging.Logger instance where to send messages"""
return self._logger
@property
def language(self):
"""The language code configured to be used for phonemization"""
return self._language
@staticmethod
@abc.abstractmethod
def name():
"""The name of the backend"""
@classmethod
@abc.abstractmethod
def is_available(cls):
"""Returns True if the backend is installed, False otherwise"""
@classmethod
@abc.abstractmethod
def version(cls):
"""Return the backend version as a tuple (major, minor, patch)"""
@staticmethod
@abc.abstractmethod
def supported_languages() -> Dict[str, str]:
"""Return a dict of language codes -> name supported by the backend"""
@classmethod
def is_supported_language(cls, language: str):
"""Returns True if `language` is supported by the backend"""
return language in cls.supported_languages()
def phonemize(self, text: List[str],
separator: Optional[Separator] = None,
strip: bool = False,
njobs: int = 1) -> List[str]:
"""Returns the `text` phonemized for the given language
Parameters
----------
text: list of str
The text to be phonemized. Each string in the list
is considered as a separated line. Each line is considered as a text
utterance. Any empty utterance will be ignored.
separator: Separator
string separators between phonemes, syllables
and words, default to separator.default_separator. Syllable separator
is considered only for the festival backend. Word separator is
ignored by the 'espeak-mbrola' backend.
strip: bool
If True, don't output the last word and phone separators
of a token, default to False.
njobs : int
The number of parallel jobs to launch. The input text is
split in ``njobs`` parts, phonemized on parallel instances of the
backend and the outputs are finally collapsed.
Returns
-------
phonemized text: list of str
The input ``text`` phonemized for the given ``language`` and ``backend``.
Raises
------
RuntimeError
if something went wrong during the phonemization
"""
if isinstance(text, str):
# changed in phonemizer-3.0, warn the user
raise RuntimeError(
'input text to phonemize() is str but it must be list of str')
if separator is None:
separator = default_separator
text, punctuation_marks = self._phonemize_preprocess(text)
if njobs == 1:
# phonemize the text forced as a string
phonemized = self._phonemize_aux(text, 0, separator, strip)
else:
# If using parallel jobs, disable the log as stderr is not
# picklable.
self.logger.info('running %s on %s jobs', self.name(), njobs)
# we have here a list of phonemized chunks
phonemized = joblib.Parallel(n_jobs=njobs)(
joblib.delayed(self._phonemize_aux)(
# chunk[0] is the text, chunk[1] is the offset
chunk[0], chunk[1], separator, strip)
for chunk in zip(*chunks(text, njobs)))
# flatten them in a single list
phonemized = self._flatten(phonemized)
return self._phonemize_postprocess(phonemized, punctuation_marks, separator, strip)
@staticmethod
def _flatten(phonemized: List[List[Any]]):
"""Flatten a list of lists into a single one
From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
format the output as obtained using multiple jobs.
"""
return list(itertools.chain(*phonemized))
@abc.abstractmethod
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
"""The "concrete" phonemization method
Must be implemented in child classes. `separator` and `strip`
parameters are as given to the phonemize() method. `text` is as
returned by _phonemize_preprocess(). `offset` is line number of the
first line in `text` with respect to the original text (this is only
usefull with running on chunks in multiple jobs. When using a single
jobs the offset is 0).
"""
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
"""Preprocess the text before phonemization
Removes the punctuation (keep trace of punctuation marks for further
restoration if required by the `preserve_punctuation` option).
"""
if self._preserve_punctuation:
# a tuple (text, punctuation marks)
return self._punctuator.preserve(text)
return self._punctuator.remove(text), []
def _phonemize_postprocess(self, phonemized: List[str],
punctuation_marks,
separator: Separator,
strip: bool):
"""Postprocess the raw phonemized output
Restores the punctuation as needed.
"""
if self._preserve_punctuation:
return self._punctuator.restore(phonemized, punctuation_marks, separator, strip)
return phonemized
@@ -0,0 +1,15 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Phonemizer module for espeak backend implementation"""
@@ -0,0 +1,275 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Low-level bindings to the espeak API"""
import atexit
import ctypes
import pathlib
import shutil
import sys
import tempfile
import weakref
from ctypes import CDLL
from pathlib import Path
from typing import Union
from phonemizer.backend.espeak.voice import EspeakVoice
if sys.platform != 'win32':
# cause a crash on Windows
import dlinfo
class EspeakAPI:
"""Exposes the espeak API to the EspeakWrapper
This class exposes only low-level bindings to the API and should not be
used directly.
"""
def __init__(self, library: Union[str, Path]):
# set to None to avoid an AttributeError in _delete if the __init__
# method raises, will be properly initialized below
self._library = None
# Because the library is not designed to be wrapped nor to be used in
# multithreaded/multiprocess contexts (massive use of global variables)
# we need a copy of the original library for each instance of the
# wrapper... (see "man dlopen" on Linux/MacOS: we cannot load two times
# the same library because a reference is then returned by dlopen). The
# tweak is therefore to make a copy of the original library in a
# different (temporary) directory.
try:
# load the original library in order to retrieve its full path?
# Forced as str as it is required on Windows.
espeak: CDLL = ctypes.cdll.LoadLibrary(str(library))
library_path = self._shared_library_path(espeak)
del espeak
except OSError as error:
raise RuntimeError(
f'failed to load espeak library: {str(error)}') from None
# will be automatically destroyed after use
self._tempdir = tempfile.mkdtemp()
# properly exit when the wrapper object is destroyed (see
# https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods).
# But... weakref implementation does not work on windows so we register
# the cleanup with atexit. This means that, on Windows, all the
# temporary directories created by EspeakAPI instances will remain on
# disk until the Python process exit.
if sys.platform == 'win32': # pragma: nocover
atexit.register(self._delete_win32)
else:
weakref.finalize(self, self._delete, self._library, self._tempdir)
espeak_copy = pathlib.Path(self._tempdir) / library_path.name
shutil.copy(library_path, espeak_copy, follow_symlinks=False)
# finally load the library copy and initialize it. 0x02 is
# AUDIO_OUTPUT_SYNCHRONOUS in the espeak API
self._library = ctypes.cdll.LoadLibrary(str(espeak_copy))
try:
if self._library.espeak_Initialize(0x02, 0, None, 0) <= 0:
raise RuntimeError( # pragma: nocover
'failed to initialize espeak shared library')
except AttributeError: # pragma: nocover
raise RuntimeError(
'failed to load espeak library') from None
# the path to the original one (the copy is considered an
# implementation detail and is not exposed)
self._library_path = library_path
def _delete_win32(self): # pragma: nocover
# Windows does not support static methods with ctypes libraries
# (library == None) so we use a proxy method...
self._delete(self._library, self._tempdir)
@staticmethod
def _delete(library, tempdir):
try:
# clean up the espeak library allocated memory
library.espeak_Terminate()
except AttributeError: # library not loaded
pass
# on Windows it is required to unload the library or the .dll file
# cannot be erased from the temporary directory
if sys.platform == 'win32': # pragma: nocover
# pylint: disable=import-outside-toplevel
# pylint: disable=protected-access
# pylint: disable=no-member
import _ctypes
_ctypes.FreeLibrary(library._handle)
# clean up the tempdir containing the copy of the library
shutil.rmtree(tempdir)
@property
def library_path(self):
"""Absolute path to the espeak library being in use"""
return self._library_path
@staticmethod
def _shared_library_path(library) -> Path:
"""Returns the absolute path to `library`
This function is cross-platform and works for Linux, MacOS and Windows.
Raises a RuntimeError if the library path cannot be retrieved
"""
# pylint: disable=protected-access
path = pathlib.Path(library._name).resolve()
if path.is_file():
return path
try:
# Linux or MacOS only, ImportError on Windows
return pathlib.Path(dlinfo.DLInfo(library).path).resolve()
except (Exception, ImportError): # pragma: nocover
raise RuntimeError(
f'failed to retrieve the path to {library} library') from None
def info(self):
"""Bindings to espeak_Info
Returns
-------
version, data_path: encoded strings containing the espeak version
number and data path respectively
"""
f_info = self._library.espeak_Info
f_info.restype = ctypes.c_char_p
data_path = ctypes.c_char_p()
version = f_info(ctypes.byref(data_path))
return version, data_path.value
def list_voices(self, name):
"""Bindings to espeak_ListVoices
Parameters
----------
name (str or None): if specified, a filter on voices to be listed
Returns
-------
voices: a pointer to EspeakVoice.Struct instances
"""
f_list_voices = self._library.espeak_ListVoices
f_list_voices.argtypes = [ctypes.POINTER(EspeakVoice.VoiceStruct)]
f_list_voices.restype = ctypes.POINTER(
ctypes.POINTER(EspeakVoice.VoiceStruct))
return f_list_voices(name)
def set_voice_by_name(self, name) -> int:
"""Bindings to espeak_SetVoiceByName
Parameters
----------
name (str) : the voice name to setup
Returns
-------
0 on success, non-zero integer on failure
"""
f_set_voice_by_name = self._library.espeak_SetVoiceByName
f_set_voice_by_name.argtypes = [ctypes.c_char_p]
return f_set_voice_by_name(name)
def get_current_voice(self):
"""Bindings to espeak_GetCurrentVoice
Returns
-------
a EspeakVoice.Struct instance or None if no voice has been setup
"""
f_get_current_voice = self._library.espeak_GetCurrentVoice
f_get_current_voice.restype = ctypes.POINTER(EspeakVoice.VoiceStruct)
return f_get_current_voice().contents
def text_to_phonemes(self, text_ptr, text_mode, phonemes_mode):
"""Bindings to espeak_TextToPhonemes
Parameters
----------
text_ptr (pointer): the text to be phonemized, as a pointer to a
pointer of chars
text_mode (bits field): see espeak sources for details
phonemes_mode (bits field): see espeak sources for details
Returns
-------
an encoded string containing the computed phonemes
"""
f_text_to_phonemes = self._library.espeak_TextToPhonemes
f_text_to_phonemes.restype = ctypes.c_char_p
f_text_to_phonemes.argtypes = [
ctypes.POINTER(ctypes.c_char_p),
ctypes.c_int,
ctypes.c_int]
return f_text_to_phonemes(text_ptr, text_mode, phonemes_mode)
def set_phoneme_trace(self, mode, file_pointer):
""""Bindings on espeak_SetPhonemeTrace
This method must be called before any call to synthetize()
Parameters
----------
mode (bits field): see espeak sources for details
file_pointer (FILE*): a pointer to an opened file in which to output
the phoneme trace
"""
f_set_phoneme_trace = self._library.espeak_SetPhonemeTrace
f_set_phoneme_trace.argtypes = [
ctypes.c_int,
ctypes.c_void_p]
f_set_phoneme_trace(mode, file_pointer)
def synthetize(self, text_ptr, size, mode):
"""Bindings on espeak_Synth
The output phonemes are sent to the file specified by a call to
set_phoneme_trace().
Parameters
----------
text (pointer) : a pointer to chars
size (int) : number of chars in `text`
mode (bits field) : see espeak sources for details
Returns
-------
0 on success, non-zero integer on failure
"""
f_synthetize = self._library.espeak_Synth
f_synthetize.argtypes = [
ctypes.c_void_p,
ctypes.c_size_t,
ctypes.c_uint,
ctypes.c_int, # position_type
ctypes.c_uint,
ctypes.POINTER(ctypes.c_uint),
ctypes.c_void_p]
return f_synthetize(text_ptr, size, 0, 1, 0, mode, None, None)
@@ -0,0 +1,113 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Base class of espeak backends for the phonemizer"""
import abc
from logging import Logger
from typing import Optional, Union, Pattern
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator
class BaseEspeakBackend(BaseBackend):
"""Abstract espeak backend for the phonemizer
Base class of the concrete backends Espeak and EspeakMbrola. It provides
facilities to find espeak library and read espeak version.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
self._espeak = EspeakWrapper()
self.logger.debug('loaded %s', self._espeak.library_path)
@classmethod
def set_library(cls, library):
"""Sets the espeak backend to use `library`
If this is not set, the backend uses the default espeak shared library
from the system installation.
Parameters
----------
library (str or None) : the path to the espeak shared library to use as
backend. Set `library` to None to restore the default.
"""
EspeakWrapper.set_library(library)
@classmethod
def library(cls):
"""Returns the espeak library used as backend
The following precedence rule applies for library lookup:
1. As specified by BaseEspeakBackend.set_library()
2. Or as specified by the environment variable
PHONEMIZER_ESPEAK_LIBRARY
3. Or the default espeak library found on the system
Raises
------
RuntimeError if the espeak library cannot be found or if the
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
non-readable file
"""
return EspeakWrapper.library()
@classmethod
def is_available(cls) -> bool:
try:
EspeakWrapper()
except RuntimeError: # pragma: nocover
return False
return True
@classmethod
def is_espeak_ng(cls) -> bool:
"""Returns True if using espeak-ng, False otherwise"""
# espeak-ng starts with version 1.49
return cls.version() >= (1, 49)
@classmethod
def version(cls):
"""Espeak version as a tuple (major, minor, patch)
Raises
------
RuntimeError if BaseEspeakBackend.is_available() is False or if the
version cannot be extracted for some reason.
"""
return EspeakWrapper().version
@abc.abstractmethod
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> str:
pass
@@ -0,0 +1,172 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Espeak backend for the phonemizer"""
import itertools
import re
from logging import Logger
from typing import Optional, Tuple, List, Union, Pattern
from phonemizer.backend.espeak.base import BaseEspeakBackend
from phonemizer.backend.espeak.language_switch import (
get_language_switch_processor, LanguageSwitch, BaseLanguageSwitch)
from phonemizer.backend.espeak.words_mismatch import (
get_words_mismatch_processor, WordMismatch, BaseWordsMismatch)
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator
class EspeakBackend(BaseEspeakBackend):
"""Espeak backend for the phonemizer"""
# a regular expression to find phonemes stresses in espeak output
_ESPEAK_STRESS_RE = re.compile(r"[ˈˌ'-]+")
# pylint: disable=too-many-arguments
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
with_stress: bool = False,
tie: Union[bool, str] = False,
language_switch: LanguageSwitch = 'keep-flags',
words_mismatch: WordMismatch = 'ignore',
logger: Optional[Logger] = None):
super().__init__(
language, punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation, logger=logger)
self._espeak.set_voice(language)
self._with_stress = with_stress
self._tie = self._init_tie(tie)
self._lang_switch: BaseLanguageSwitch = get_language_switch_processor(
language_switch, self.logger, self.language)
self._words_mismatch: BaseWordsMismatch = get_words_mismatch_processor(
words_mismatch, self.logger)
@staticmethod
def _init_tie(tie) -> Optional[str]:
if not tie:
return None
if tie is True: # default U+361 tie character
return '͡'
# non default tie charcacter
tie = str(tie)
if len(tie) != 1:
raise RuntimeError(
f'explicit tie must be a single charcacter but is {tie}')
return tie
@staticmethod
def name():
return 'espeak'
@classmethod
def supported_languages(cls):
return {
voice.language: voice.name
for voice in EspeakWrapper().available_voices()}
def _phonemize_aux(self, text, offset, separator, strip):
if self._tie is not None and separator.phone:
self.logger.warning(
'cannot use ties AND phone separation, '
'ignoring phone separator')
output = []
lang_switches = []
for num, line in enumerate(text, start=1):
line = self._espeak.text_to_phonemes(line, self._tie)
line, has_switch = self._postprocess_line(
line, num, separator, strip)
output.append(line)
if has_switch:
lang_switches.append(num + offset)
return output, lang_switches
def _process_stress(self, word):
if self._with_stress:
return word
# remove the stresses on phonemes
return re.sub(self._ESPEAK_STRESS_RE, '', word)
def _process_tie(self, word: str, separator: Separator):
# NOTE a bug in espeak append ties to (en) flags so as (͡e͡n).
# We do not correct it here.
if self._tie is not None and self._tie != '͡':
# replace default '͡' by the requested one
return word.replace('͡', self._tie)
return word.replace('_', separator.phone)
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> Tuple[str, bool]:
# espeak can split an utterance into several lines because
# of punctuation, here we merge the lines into a single one
line = line.strip().replace('\n', ' ').replace(' ', ' ')
# due to a bug in espeak-ng, some additional separators can be
# added at the end of a word. Here a quick fix to solve that
# issue. See https://github.com/espeak-ng/espeak-ng/issues/694
line = re.sub(r'_+', '_', line)
line = re.sub(r'_ ', ' ', line)
line, has_switch = self._lang_switch.process(line)
if not line:
return '', has_switch
out_line = ''
for word in line.split(' '):
word = self._process_stress(word.strip())
if not strip and self._tie is None:
word += '_'
word = self._process_tie(word, separator)
out_line += word + separator.word
if strip and separator.word:
# erase the last word separator from the line
out_line = out_line[:-len(separator.word)]
return out_line, has_switch
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
text, punctuation_marks = super()._phonemize_preprocess(text)
self._words_mismatch.count_text(text)
return text, punctuation_marks
def _phonemize_postprocess(self, phonemized, punctuation_marks, separator: Separator, strip: bool):
text = phonemized[0]
switches = phonemized[1]
self._words_mismatch.count_phonemized(text, separator)
self._lang_switch.warning(switches)
phonemized = super()._phonemize_postprocess(text, punctuation_marks, separator, strip)
return self._words_mismatch.process(phonemized)
@staticmethod
def _flatten(phonemized) -> List:
"""Specialization of BaseBackend._flatten for the espeak backend
From [([1, 2], ['a', 'b']), ([3],), ([4], ['c'])] to [[1, 2, 3, 4],
['a', 'b', 'c']].
"""
flattened = []
for i in range(len(phonemized[0])):
flattened.append(
list(itertools.chain(
c for chunk in phonemized for c in chunk[i])))
return flattened
@@ -0,0 +1,193 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Manages language switches for the espeak backend
This module is used in phonemizer.backend.EspeakBackend and should be
considered private.
It manages languages switches that occur during phonemization, where a part of
a text is phonemized in a language different from the target language. For
instance the sentence "j'aime le football" in French will be phonemized by
espeak as "ʒɛm lə (en)fʊtbɔːl(fr)", "football" be pronounced as an English
word. This may cause two issues to end users. First it introduces undesirable
(.) language switch flags. It may introduce extra phones that are not present
in the target language phoneset.
This module implements 3 alternative solutions the user can choose when
initializing the espeak backend:
- 'keep-flags' preserves the language switch flags,
- 'remove-flags' removes the flags (.) but preserves the words with alternative
phoneset,
- 'remove-utterance' removes the utterances where flags are detected.
"""
import abc
import re
from logging import Logger
from typing import List, Tuple
from typing_extensions import TypeAlias, Literal
LanguageSwitch: TypeAlias = Literal['keep-flags', 'remove-flags', 'remove-utterance']
def get_language_switch_processor(mode: LanguageSwitch, logger: Logger, language: str) -> 'BaseLanguageSwitch':
"""Returns a language switch processor initialized from `mode`
The `mode` can be one of the following:
- 'keep-flags' to preserve the switch flags
- 'remove-flags' to suppress the switch flags
- 'remove-utterance' to suppress the entire utterance
Raises a RuntimeError if the `mode` is unknown.
"""
processors = {
'keep-flags': KeepFlags,
'remove-flags': RemoveFlags,
'remove-utterance': RemoveUtterances}
try:
return processors[mode](logger, language)
except KeyError:
raise RuntimeError(
f'mode "{mode}" invalid, must be in {", ".join(processors.keys())}'
) from None
class BaseLanguageSwitch(abc.ABC):
"""The base class for language switch processors
Parameters
----------
logger (logging.Logger) : a logger instance to send warnings when language
switches are detected.
language (str) : the language code currently in use by the phonemizer, to
customize warning content
"""
# a regular expression to find language switch flags in espeak output,
# Switches have the following form (here a switch from English to French):
# "something (fr)quelque chose(en) another thing".
_ESPEAK_FLAGS_RE = re.compile(r'\(.+?\)')
def __init__(self, logger: Logger, language: str):
self._logger = logger
self._language = language
@classmethod
def is_language_switch(cls, utterance: str) -> bool:
"""Returns True is a language switch is present in the `utterance`"""
return bool(cls._ESPEAK_FLAGS_RE.search(utterance))
@classmethod
@abc.abstractmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
"""Detects and process language switches according to the mode
This method is called on each utterance as a phonemization
post-processing step.
Returns
-------
processed_utterance (str) : the utterance either preserved, deleted (as
'') or with the switch removed
has_switch (bool): True if a language switch flag is found in the
`utterance` and False otherwise
"""
@abc.abstractmethod
def warning(self, switches: List[int]):
"""Sends warnings to the logger with recorded language switches
This method is called a single time at the very end of the
phonemization process.
Parameters
----------
switches (list of int) : the line numbers where language switches has
been detected during phonemization
"""
class KeepFlags(BaseLanguageSwitch):
"""Preserves utterances even if language switch flags are present"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
return utterance, cls.is_language_switch(utterance)
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'%s utterances containing language switches '
'on lines %s', nswitches,
', '.join(str(switch) for switch in sorted(switches)))
self._logger.warning(
'extra phones may appear in the "%s" phoneset', self._language)
self._logger.warning(
'language switch flags have been kept '
'(applying "keep-flags" policy)')
class RemoveFlags(BaseLanguageSwitch):
"""Removes the language switch flags when detected"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
if cls.is_language_switch(utterance):
# remove all the (lang) flags in the current utterance
return re.sub(cls._ESPEAK_FLAGS_RE, '', utterance), True
return utterance, False
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'%s utterances containing language switches '
'on lines %s', nswitches,
', '.join(str(switch) for switch in sorted(switches)))
self._logger.warning(
'extra phones may appear in the "%s" phoneset', self._language)
self._logger.warning(
'language switch flags have been removed '
'(applying "remove-flags" policy)')
class RemoveUtterances(BaseLanguageSwitch):
"""Remove the entire utterance when a language switch flag is detected"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
if cls.is_language_switch(utterance):
# drop the entire utterance
return '', True
return utterance, False
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'removed %s utterances containing language switches '
'(applying "remove-utterance" policy)', nswitches)
@@ -0,0 +1,108 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Mbrola backend for the phonemizer"""
import pathlib
import shutil
import sys
from logging import Logger
from pathlib import Path
from typing import Union, Optional, List, Dict
from phonemizer.backend.espeak.base import BaseEspeakBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator
class EspeakMbrolaBackend(BaseEspeakBackend):
"""Espeak-mbrola backend for the phonemizer"""
# this will be initialized once, at the first call to supported_languages()
_supported_languages = None
def __init__(self, language: str, logger: Optional[Logger] = None):
super().__init__(language, logger=logger)
self._espeak.set_voice(language)
@staticmethod
def name():
return 'espeak-mbrola'
@classmethod
def is_available(cls) -> bool:
"""Mbrola backend is available for espeak>=1.49"""
return (
BaseEspeakBackend.is_available() and
shutil.which('mbrola') and
BaseEspeakBackend.is_espeak_ng())
@classmethod
def _all_supported_languages(cls):
# retrieve the mbrola voices. This voices must be installed separately.
voices = EspeakWrapper().available_voices('mbrola')
return {voice.identifier[3:]: voice.name for voice in voices}
@classmethod
def _is_language_installed(cls, language: str, data_path: Union[str, Path]) \
-> bool:
"""Returns True if the required mbrola voice is installed"""
# this is a reimplementation of LoadMbrolaTable from espeak
# synth_mbrola.h sources
voice = language[3:] # remove mb- prefix
if pathlib.Path(data_path / 'mbrola' / voice).is_file():
return True # pragma: nocover
if sys.platform != 'win32':
candidates = [
f'/usr/share/mbrola/{voice}',
f'/usr/share/mbrola/{voice}/{voice}',
f'/usr/share/mbrola/voices/{voice}']
for candidate in candidates:
if pathlib.Path(candidate).is_file():
return True
return False
@classmethod
def supported_languages(cls) -> Dict[str, str]: # pragma: nocover
"""Returns the list of installed mbrola voices"""
if cls._supported_languages is None:
data_path = EspeakWrapper().data_path
cls._supported_languages = {
k: v for k, v in cls._all_supported_languages().items()
if cls._is_language_installed(k, data_path)}
return cls._supported_languages
def _phonemize_aux(self, text: List[str], offset: int,
separator: Separator, strip: bool) -> List[str]:
output = []
for num, line in enumerate(text, start=1):
line = self._espeak.synthetize(line)
line = self._postprocess_line(line, offset + num, separator, strip)
output.append(line)
return output
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> str:
# retrieve the phonemes with the correct SAMPA alphabet (but
# without word separation)
phonemes = (
phn.split('\t')[0] for phn in line.split('\n') if phn.strip())
phonemes = separator.phone.join(pho for pho in phonemes if pho != '_')
if not strip:
phonemes += separator.phone
return phonemes
@@ -0,0 +1,81 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Voice struct from Espeak API exposed to Python"""
import ctypes
# This class can be a dataclass for compatibility with python-3.6 we don't use
# the dataclasses module.
class EspeakVoice:
"""A helper class to expose voice structures within C and Python"""
def __init__(self, name: str = '', language: str = '', identifier: str = ''):
self._name = name
self._language = language
self._identifier = identifier
@property
def name(self):
"""Voice name"""
return self._name
@property
def language(self):
"""Language code"""
return self._language
@property
def identifier(self):
"""Path to the voice file wrt espeak data path"""
return self._identifier
def __eq__(self, other: 'EspeakVoice'):
return (
self.name == other.name and
self.language == other.language and
self.identifier == other.identifier)
def __hash__(self):
return hash((self.name, self.language, self.identifier))
class VoiceStruct(ctypes.Structure): # pylint: disable=too-few-public-methods
"""A helper class to fetch voices information from the espeak library.
The espeak_VOICE struct is defined in speak_lib.h from the espeak code.
Here we use only name (voice name), languages (language code) and
identifier (voice file) information.
"""
_fields_ = [
('name', ctypes.c_char_p),
('languages', ctypes.c_char_p),
('identifier', ctypes.c_char_p)]
def to_ctypes(self):
"""Converts the Voice instance to an espeak ctypes structure"""
return self.VoiceStruct(
self.name.encode('utf8') if self.name else None,
self.language.encode('utf8') if self.language else None,
self.identifier.encode('utf8') if self.identifier else None)
@classmethod
def from_ctypes(cls, struct: VoiceStruct):
"""Returns a Voice instance built from an espeak ctypes structure"""
return cls(
name=(struct.name or b'').decode(),
# discard a useless char prepended by espeak
language=(struct.languages or b'0').decode()[1:],
identifier=(struct.identifier or b'').decode())
@@ -0,0 +1,152 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Manages words count mismatches for the espeak backend"""
import abc
import re
from logging import Logger
from typing import List, Tuple
from typing_extensions import TypeAlias, Literal, Union
from phonemizer.separator import Separator
WordMismatch: TypeAlias = Literal["warn", "ignore"]
def get_words_mismatch_processor(mode: WordMismatch, logger: Logger) -> 'BaseWordsMismatch':
"""Returns a word count mismatch processor according to `mode`
The `mode` can be one of the following:
- `ignore` to ignore words mismatches
- `warn` to display a warning on each mismatched utterance
- `remove` to remove any utterance containing a words mismatch
Raises a RuntimeError if the `mode` is unknown.
"""
processors = {
'ignore': Ignore,
'warn': Warn,
'remove': Remove}
try:
return processors[mode](logger)
except KeyError:
raise RuntimeError(
f'mode {mode} invalid, must be in {", ".join(processors.keys())}'
) from None
class BaseWordsMismatch(abc.ABC):
"""The base class of all word count mismatch processors"""
_RE_SPACES = re.compile(r'\s+')
def __init__(self, logger: Logger):
self._logger = logger
self._count_txt = []
self._count_phn = []
@classmethod
def _count_words(
cls,
text: List[str],
wordsep: Union[str, re.Pattern] = _RE_SPACES) -> List[int]:
"""Return the number of words contained in each line of `text`"""
if not isinstance(wordsep, re.Pattern):
wordsep = re.escape(wordsep)
return [
len([w for w in re.split(wordsep, line.strip()) if w])
for line in text]
def _mismatched_lines(self) -> List[Tuple[int, int, int]]:
"""Returns a list of (num_line, nwords_input, nwords_output)
Consider only the lines where nwords_input != nwords_output. Raises a
RuntimeError if input and output do not have the same number of lines.
"""
if len(self._count_txt) != len(self._count_phn):
raise RuntimeError( # pragma: nocover
f'number of lines in input and output must be equal, '
f'we have: input={len(self._count_txt)}, '
f'output={len(self._count_phn)}')
return [
(n, t, p) for n, (t, p) in
enumerate(zip(self._count_txt, self._count_phn))
if t != p]
def _resume(self, nmismatch: int, nlines: int):
"""Logs a high level undetailed warning"""
if nmismatch:
self._logger.warning(
'words count mismatch on %s%% of the lines (%s/%s)',
round(nmismatch / nlines, 2) * 100, nmismatch, nlines)
def count_text(self, text: List[str]):
"""Stores the number of words in each input line"""
self._count_txt = self._count_words(text)
def count_phonemized(self, text: List[str], separator: Separator):
"""Stores the number of words in each output line"""
self._count_phn = self._count_words(text, separator.word)
@abc.abstractmethod
def process(self, text: List[str]) -> List[str]:
"""Detects and process word count misatches according to the mode
This method is called at the very end of phonemization, during
post-processing.
"""
class Ignore(BaseWordsMismatch):
"""Ignores word count mismatches"""
def process(self, text: List[str]) -> List[str]:
self._resume(len(self._mismatched_lines()), len(text))
return text
class Warn(BaseWordsMismatch):
"""Warns on every mismatch detected"""
def process(self, text: List[str]) -> List[str]:
mismatch = self._mismatched_lines()
for num, ntxt, nphn in mismatch:
self._logger.warning(
'words count mismatch on line %s '
'(expected %s words but get %s)',
num + 1, ntxt, nphn)
self._resume(len(mismatch), len(text))
return text
class Remove(BaseWordsMismatch):
"""Removes any utterance containing a word count mismatch"""
def process(self, text: List[str]) -> List[str]:
mismatch = [line[0] for line in self._mismatched_lines()]
self._resume(len(mismatch), len(text))
self._logger.warning('removing the mismatched lines')
for index in mismatch:
text[index] = ''
return text
@@ -0,0 +1,370 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Wrapper on espeak-ng library"""
import ctypes
import ctypes.util
import functools
import os
import pathlib
import sys
import tempfile
import weakref
from typing import Tuple, Dict
from phonemizer.backend.espeak.api import EspeakAPI
from phonemizer.backend.espeak.voice import EspeakVoice
class EspeakWrapper:
"""Wrapper on espeak shared library
The aim of this wrapper is not to be exhaustive but to encapsulate the
espeak functions required for phonemization. It relies on a espeak shared
library (*.so on Linux, *.dylib on Mac and *.dll on Windows) that must be
installed on the system.
Use the function `EspeakWrapper.set_library()` before instanciation to
customize the library to use.
Raises
------
RuntimeError if the espeak shared library cannot be loaded
"""
# a static variable used to overload the default espeak library installed
# on the system. The user can choose an alternative espeak library with
# the method EspeakWrapper.set_library().
_ESPEAK_LIBRARY = None
def __init__(self):
# the following attributes are accessed through properties and are
# lazily initialized
self._version: Tuple[int, ...] = None
self._data_path = None
self._voice = None
# load the espeak API
self._espeak = EspeakAPI(self.library())
# lazy loading of attributes only required for the synthetize method
self._libc_ = None
self._tempfile_ = None
@property
def _libc(self):
if self._libc_ is None:
self._libc_ = (
ctypes.windll.msvcrt if sys.platform == 'win32' else
ctypes.cdll.LoadLibrary(ctypes.util.find_library('c')))
return self._libc_
@property
def _tempfile(self):
if self._tempfile_ is None:
# this will automatically removed at exit
# pylint: disable=consider-using-with
self._tempfile_ = tempfile.NamedTemporaryFile()
weakref.finalize(self._tempfile_, self._tempfile_.close)
return self._tempfile_
def __getstate__(self):
"""For pickling, when phonemizing on multiple jobs"""
return {
'version': self._version,
'data_path': self._data_path,
'voice': self._voice}
def __setstate__(self, state: Dict):
"""For unpickling, when phonemizing on multiple jobs"""
self.__init__()
self._version = state['version']
self._data_path = state['data_path']
self._voice = state['voice']
if self._voice:
if 'mb' in self._voice.identifier: # mbrola voice
self.set_voice(self._voice.identifier[3:])
else:
self.set_voice(self._voice.language)
@classmethod
def set_library(cls, library: str):
"""Sets the espeak backend to use `library`
If this is not set, the backend uses the default espeak shared library
from the system installation.
Parameters
----------
library (str or None) : the path to the espeak shared library to use as
backend. Set `library` to None to restore the default.
"""
cls._ESPEAK_LIBRARY = library
@classmethod
def library(cls):
"""Returns the espeak library used as backend
The following precedence rule applies for library lookup:
1. As specified by BaseEspeakBackend.set_library()
2. Or as specified by the environment variable
PHONEMIZER_ESPEAK_LIBRARY
3. Or the default espeak library found on the system
Raises
------
RuntimeError if the espeak library cannot be found or if the
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
non-readable file
"""
if cls._ESPEAK_LIBRARY:
return cls._ESPEAK_LIBRARY
if 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ:
library = pathlib.Path(os.environ['PHONEMIZER_ESPEAK_LIBRARY'])
if not (library.is_file() and os.access(library, os.R_OK)):
raise RuntimeError( # pragma: nocover
f'PHONEMIZER_ESPEAK_LIBRARY={library} '
f'is not a readable file')
return library.resolve()
library = (
ctypes.util.find_library('espeak-ng') or
ctypes.util.find_library('espeak'))
if not library: # pragma: nocover
raise RuntimeError(
'failed to find espeak library')
return library
def _fetch_version_and_path(self):
"""Initializes version and dapa path from the espeak library"""
version, data_path = self._espeak.info()
# pylint: disable=no-member
self._data_path = pathlib.Path(data_path.decode())
if not self._data_path.is_dir(): # pragma: nocover
raise RuntimeError('failed to retrieve espeak data directory')
# espeak-1.48 appends the release date to version number, here we
# simply ignore it
version = version.decode().strip().split(' ')[0].replace('-dev', '')
self._version = tuple(int(v) for v in version.split('.'))
@property
def version(self) -> Tuple[int, int, int]:
"""The espeak version as a tuple of integers (major, minor, patch)"""
if self._version is None:
self._fetch_version_and_path()
return self._version
@property
def library_path(self):
"""The espeak library as a pathlib.Path instance"""
return self._espeak.library_path
@property
def data_path(self):
"""The espeak data directory as a pathlib.Path instance"""
if self._data_path is None:
self._fetch_version_and_path()
return self._data_path
@property
def voice(self):
"""The configured voice as an EspeakVoice instance
If `set_voice` has not been called, returns None
"""
return self._voice
@functools.lru_cache(maxsize=None)
def available_voices(self, name=None):
"""Voices available for phonemization, as a list of `EspeakVoice`"""
if name:
name = EspeakVoice(language=name).to_ctypes()
voices = self._espeak.list_voices(name or None)
index = 0
available_voices = []
# voices is an array to pointers, terminated by None
while voices[index]:
voice = voices[index].contents
available_voices.append(EspeakVoice(
name=os.fsdecode(voice.name).replace('_', ' '),
language=os.fsdecode(voice.languages)[1:],
identifier=os.fsdecode(voice.identifier)))
index += 1
return available_voices
def set_voice(self, voice_code):
"""Setup the voice to use for phonemization
Parameters
----------
voice_code (str) : Must be a valid language code that is actually
supported by espeak
Raises
------
RuntimeError if the required voice cannot be initialized
"""
if 'mb' in voice_code:
# this is an mbrola voice code. Select the voice by using
# identifier in the format 'mb/{voice_code}'
available = {
voice.identifier[3:]: voice.identifier
for voice in self.available_voices('mbrola')}
else:
# this are espeak voices. Select the voice using it's attached
# language code. Consider only the first voice of a given code as
# they are sorted by relevancy
available = {}
for voice in self.available_voices():
if voice.language not in available:
available[voice.language] = voice.identifier
try:
voice_name = available[voice_code]
except KeyError:
raise RuntimeError(f'invalid voice code "{voice_code}"') from None
if self._espeak.set_voice_by_name(voice_name.encode('utf8')) != 0:
raise RuntimeError( # pragma: nocover
f'failed to load voice "{voice_code}"')
voice = self._get_voice()
if not voice: # pragma: nocover
raise RuntimeError(f'failed to load voice "{voice_code}"')
self._voice = voice
def _get_voice(self):
"""Returns the current voice used for phonemization
If no voice has been set up, returns None.
"""
voice = self._espeak.get_current_voice()
if voice.name:
return EspeakVoice.from_ctypes(voice)
return None # pragma: nocover
def text_to_phonemes(self, text: str, tie: bool = False) -> str:
"""Translates a text into phonemes, must call set_voice() first.
This method is used by the Espeak backend. Wrapper on the
espeak_TextToPhonemes function.
Parameters
----------
text (str) : the text to phonemize
tie (bool, optional) : When True use a '͡' character between
consecutive characters of a single phoneme. Else separate phoneme
with '_'. This option requires espeak>=1.49. Default to False.
Returns
-------
phonemes (str) : the phonemes for the text encoded in IPA, with '_' as
phonemes separator (excepted if ``tie`` is True) and ' ' as word
separator.
"""
if self.voice is None: # pragma: nocover
raise RuntimeError('no voice specified')
if tie and self.version <= (1, 48, 3):
raise RuntimeError( # pragma: nocover
'tie option only compatible with espeak>=1.49')
# from Python string to C void** (a pointer to a pointer to chars)
text_ptr = ctypes.pointer(ctypes.c_char_p(text.encode('utf8')))
# input text is encoded as UTF8
text_mode = 1
# output phonemes in IPA and separated by _, or with a tie character if
# required. See comments for the function espeak_TextToPhonemes in
# speak_lib.h of the espeak sources for details.
if self.version <= (1, 48, 3): # pragma: nocover
phonemes_mode = 0x03 | 0x01 << 4
elif tie:
phonemes_mode = 0x02 | 0x01 << 7 | ord('͡') << 8
else:
phonemes_mode = ord('_') << 8 | 0x02
result = []
while text_ptr.contents.value is not None:
phonemes = self._espeak.text_to_phonemes(
text_ptr, text_mode, phonemes_mode)
if phonemes:
result.append(phonemes.decode())
return ' '.join(result)
def synthetize(self, text: str):
"""Translates a text into phonemes, must call set_voice() first.
Only compatible with espeak>=1.49. This method is used by the
EspeakMbrola backend. Wrapper on the espeak_Synthesize function.
Parameters
----------
text (str) : the text to phonemize
Returns
-------
phonemes (str) : the phonemes for the text encoded in SAMPA, with '_'
as phonemes separator and no word separation.
"""
if self.version < (1, 49): # pragma: nocover
raise RuntimeError('not compatible with espeak<=1.48')
if self.voice is None: # pragma: nocover
raise RuntimeError('no voice specified')
# init libc fopen and fclose functions
self._libc.fopen.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
self._libc.fopen.restype = ctypes.c_void_p
self._libc.fclose.argtypes = [ctypes.c_void_p]
self._libc.fclose.restype = ctypes.c_int
# output phonemes in SAMPA and separated by _. Write the result to a
# tempfile which is read back after phonemization (seems not possible
# to redirect to stdout). See comments for the function
# espeak_SetPhonemeTrace in speak_lib.h of the espeak sources for
# details.
self._tempfile.truncate(0)
file_p = self._libc.fopen(
self._tempfile.name.encode(),
self._tempfile.mode.encode())
self._espeak.set_phoneme_trace(0x01 << 4 | ord('_') << 8, file_p)
status = self._espeak.synthetize(
ctypes.c_char_p(text.encode('utf8')),
ctypes.c_size_t(len(text) + 1),
ctypes.c_uint(0x01))
self._libc.fclose(file_p) # because flush does not work...
if status != 0: # pragma: nocover
raise RuntimeError('failed to synthetize')
self._tempfile.seek(0)
phonemized = self._tempfile.read().decode().strip()
return phonemized
@@ -0,0 +1,15 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Phonemizer module for festival backend implementation"""
@@ -0,0 +1,334 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Festival backend for the phonemizer"""
import os
import pathlib
import re
import shlex
import shutil
import subprocess
import sys
import tempfile
from logging import Logger
from pathlib import Path
from typing import Optional, Dict, List, IO, Union, Pattern
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.festival import lispy
from phonemizer.separator import Separator
from phonemizer.utils import get_package_resource, version_as_tuple
class FestivalBackend(BaseBackend):
"""Festival backend for the phonemizer"""
# a static variable used to overload the default festival binary installed
# on the system. The user can choose an alternative festival binary with
# the method FestivalBackend.set_executable().
_FESTIVAL_EXECUTABLE = None
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
self.logger.debug('festival executable is %s', self.executable())
# the Scheme script to be send to festival
script_file = get_package_resource('festival/phonemize.scm')
with open(script_file, 'r') as fscript:
self._script = fscript.read()
self.logger.debug('loaded %s', script_file)
@staticmethod
def name():
return 'festival'
@classmethod
def set_executable(cls, executable: str):
"""Sets the festival backend to use `executable`
If this is not set, the backend uses the default festival executable
from the system installation.
Parameters
----------
executable (str) : the path to the festival executable to use as
backend. Set `executable` to None to restore the default.
Raises
------
RuntimeError if `executable` is not an executable file.
"""
if executable is None:
cls._FESTIVAL_EXECUTABLE = None
return
executable = pathlib.Path(executable)
if not (executable.is_file() and os.access(executable, os.X_OK)):
raise RuntimeError(
f'{executable} is not an executable file')
cls._FESTIVAL_EXECUTABLE = executable.resolve()
@classmethod
def executable(cls) -> Path:
"""Returns the absolute path to the festival executable used as backend
The following precedence rule applies for executable lookup:
1. As specified by FestivalBackend.set_executable()
2. Or as specified by the environment variable
PHONEMIZER_FESTIVAL_EXECUTABLE
3. Or the default 'festival' binary found on the system with ``shutil.which('festival')``
Raises
------
RuntimeError
if the festival executable cannot be found or if the
environment variable PHONEMIZER_FESTIVAL_EXECUTABLE is set to a
non-executable file
"""
if cls._FESTIVAL_EXECUTABLE:
return cls._FESTIVAL_EXECUTABLE
if 'PHONEMIZER_FESTIVAL_EXECUTABLE' in os.environ:
executable = pathlib.Path(os.environ[
'PHONEMIZER_FESTIVAL_EXECUTABLE'])
if not (
executable.is_file()
and os.access(executable, mode=os.X_OK)
):
raise RuntimeError(
f'PHONEMIZER_FESTIVAL_EXECUTABLE={executable} '
f'is not an executable file')
return executable.resolve()
executable = shutil.which('festival')
if not executable: # pragma: nocover
raise RuntimeError(
'failed to find festival executable')
return Path(executable).resolve()
@classmethod
def is_available(cls):
"""True if the festival executable is available, False otherwise"""
try:
cls.executable()
except RuntimeError: # pragma: nocover
return False
return True
@classmethod
def version(cls):
"""Festival version as a tupe of integers (major, minor, patch)
Raises
------
RuntimeError if FestivalBackend.is_available() is False or if the
version cannot be extracted for some reason.
"""
festival = cls.executable()
# the full version version string includes extra information
# we don't need
long_version = subprocess.check_output(
[festival, '--version']).decode('latin1').strip()
# extract the version number with a regular expression
festival_version_re = r'.* ([0-9\.]+[0-9]):'
try:
version = re.match(festival_version_re, long_version).group(1)
except AttributeError:
raise RuntimeError(
f'cannot extract festival version from {festival}') from None
return version_as_tuple(version)
@staticmethod
def supported_languages() -> Dict[str, str]:
"""A dictionnary of language codes -> name supported by festival
Actually only en-us (American English) is supported.
"""
return {'en-us': 'english-us'}
# pylint: disable=unused-argument
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
"""Return a phonemized version of `text` with festival
This function is a wrapper on festival, a text to speech
program, allowing simple phonemization of some English
text. The US phoneset we use is the default one in festival,
as described at http://www.festvox.org/bsv/c4711.html
Any opening and closing parenthesis in `text` are removed, as
they interfer with the Scheme expression syntax. Moreover
double quotes are replaced by simple quotes because double
quotes denotes utterances boundaries in festival.
Parsing a ill-formed Scheme expression during post-processing
(typically with unbalanced parenthesis) raises an IndexError.
"""
text = self._preprocess(text)
if len(text) == 0:
return []
text = self._process(text)
text = self._postprocess(text, separator, strip)
return text
@staticmethod
def _double_quoted(line: str) -> str:
"""Return the string `line` surrounded by double quotes"""
return '"' + line + '"'
@staticmethod
def _cleaned(line: str):
"""Remove 'forbidden' characters from the line"""
# special case (very unlikely but causes a crash in festival)
# where a line is only made of '
if set(line) == set("'"):
line = ''
# remove forbidden characters (reserved for scheme, ie festival
# scripting language)
return line.replace('"', '').replace('(', '').replace(')', '').strip()
@classmethod
def _preprocess(cls, text: List[str]):
"""Returns the contents of `text` formatted for festival input
This function adds double quotes to begining and end of each
line in text, if not already presents. The returned result is
a multiline string. Empty lines in inputs are ignored.
"""
cleaned_text = (
cls._cleaned(line) for line in text if line != '')
return '\n'.join(
cls._double_quoted(line) for line in cleaned_text if line != '')
def _process(self, text: str):
"""Return the raw phonemization of `text`
This function delegates to festival the text analysis and
syllabic structure extraction.
Return a string containing the "SylStructure" relation tree of
the text, as a scheme expression.
"""
with tempfile.NamedTemporaryFile('w+', delete=False) as data:
try:
# save the text as a tempfile
data.write(text)
data.close()
# fix the path name for windows
name = data.name
if sys.platform == 'win32': # pragma: nocover
name = name.replace('\\', '\\\\')
with tempfile.NamedTemporaryFile('w+', delete=False) as scm:
try:
scm.write(self._script.format(name))
scm.close()
cmd = f'{self.executable()} -b {scm.name}'
if self.logger:
self.logger.debug('running %s', cmd)
# redirect stderr to a tempfile and displaying it only
# on errors. Messages are something like: "UniSyn:
# using default diphone ax-ax for y-pau". This is
# related to wave synthesis (done by festival during
# phonemization).
with tempfile.TemporaryFile('w+') as fstderr:
return self._run_festival(cmd, fstderr)
finally:
os.remove(scm.name)
finally:
os.remove(data.name)
@staticmethod
def _run_festival(cmd: str, fstderr: IO) -> str:
"""Runs the festival command for phonemization
Returns the raw phonemized output (need to be postprocesses). Raises a
RuntimeError if festival fails.
"""
try:
output = subprocess.check_output(
shlex.split(cmd, posix=False), stderr=fstderr)
# festival seems to use latin1 and not utf8
return re.sub(' +', ' ', output.decode('latin1'))
except subprocess.CalledProcessError as err: # pragma: nocover
fstderr.seek(0)
raise RuntimeError(
f'Command "{cmd}" returned exit status {err.returncode}, '
f'output is:\n{fstderr.read()}') from None
@staticmethod
def _postprocess_syll(syll: List[str], separator: Separator, strip: bool) -> str:
"""Parse a syllable from festival to phonemized output"""
sep = separator.phone
out = (phone[0][0].replace('"', '') for phone in syll[1:])
out = sep.join(o for o in out if o != '')
return out if strip else out + sep
@classmethod
def _postprocess_word(cls, word: List[List[str]], separator: Separator, strip: bool) -> str:
"""Parse a word from festival to phonemized output"""
sep = separator.syllable
out = sep.join(
cls._postprocess_syll(syll, separator, strip)
for syll in word[1:])
return out if strip else out + sep
@classmethod
def _postprocess_line(cls, line: str, separator, strip: bool) -> str:
"""Parse a line from festival to phonemized output"""
sep = separator.word
out = []
for word in lispy.parse(line):
word = cls._postprocess_word(word, separator, strip)
if word != '':
out.append(word)
out = sep.join(out)
return out if strip else out + sep
@classmethod
def _postprocess(cls, tree: str, separator: Separator, strip: bool) -> List[str]:
"""Conversion from festival syllable tree to desired format"""
return [cls._postprocess_line(line, separator, strip)
for line in tree.split('\n')
if line not in ['', '(nil nil nil)']]
@@ -0,0 +1,66 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Parse a Scheme expression as a nested list
The main function of this module is lispy.parse, other ones should be
considered private. This module is a dependency of the festival
backend.
From http://www.norvig.com/lispy.html
"""
from typing import List, Union
def parse(program: str):
"""Read a Scheme expression from a string
Return a nested list
Raises an IndexError if the expression is not valid scheme
(unbalanced parenthesis).
>>> parse('(+ 2 (* 5 2))')
['+', '2', ['*', '5', '2']]
"""
return _read_from_tokens(_tokenize(program))
def _tokenize(chars: str) -> List[str]:
"""Convert a string of characters into a list of tokens."""
return chars.replace('(', ' ( ').replace(')', ' ) ').split()
Expr = Union[str, List['Expr']]
def _read_from_tokens(tokens: List[str]) -> Expr:
"""Read an expression from a sequence of tokens"""
if len(tokens) == 0: # pragma: nocover
raise SyntaxError('unexpected EOF while reading')
token = tokens.pop(0)
if token == '(':
expr = []
while tokens[0] != ')':
expr.append(_read_from_tokens(tokens))
tokens.pop(0) # pop off ')'
return expr
if token == ')': # pragma: nocover
raise SyntaxError('unexpected )')
return token
@@ -0,0 +1,143 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Segments backend for the phonemizer"""
import pathlib
from logging import Logger
from typing import Optional, Dict, List, Union, Pattern
import segments
from phonemizer.backend.base import BaseBackend
from phonemizer.separator import Separator
from phonemizer.utils import get_package_resource, version_as_tuple
class SegmentsBackend(BaseBackend):
"""Segments backends for the phonemizer
The phonemize method will raise a ValueError when parsing an
unknown morpheme.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
# will be initialized in _init_language() from super().__init__()
self._tokenizer: Optional[segments.Tokenizer] = None
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
def _init_language(self, language):
# load the grapheme to phoneme mapping
profile = self._load_g2p_profile(language)
self._tokenizer = segments.Tokenizer(profile=profile)
# this is the language code
return pathlib.Path(language).stem
@staticmethod
def name():
return 'segments'
@classmethod
def version(cls):
return version_as_tuple(segments.__version__)
@classmethod
def is_available(cls):
return True
@staticmethod
def supported_languages():
"""Returns a dict of language: file supported by the segments backend
The supported languages have a grapheme to phoneme conversion file
bundled with phonemizer. Users can also use their own file as
parameter of the phonemize() function.
"""
# directory phonemizer/share/segments
directory = get_package_resource('segments')
# supported languages are files with the 'g2p' extension
return {g2p.stem: g2p
for g2p in directory.iterdir() if g2p.suffix == '.g2p'}
@classmethod
def is_supported_language(cls, language: str) -> bool:
if pathlib.Path(language).is_file():
try:
cls._load_g2p_profile(language)
return True
except RuntimeError:
return False
return language in cls.supported_languages()
@classmethod
def _load_g2p_profile(cls, language: str) -> segments.Profile:
"""Returns a segments profile from a `language`"""
# make sure the g2p file exists
if not pathlib.Path(language).is_file():
try:
language = cls.supported_languages()[language]
except KeyError:
raise RuntimeError(
f'grapheme to phoneme file not found: '
f'{language}') from None
# load the mapping grapheme -> phoneme from the file, make sure all
# lines are well formatted
g2p: Dict[str, str] = {}
with open(language, 'r', encoding='utf8') as flang:
for num, line in enumerate(flang):
elts = line.strip().split()
if not len(elts) == 2:
raise RuntimeError(
'grapheme to phoneme file, line {} must have 2 rows '
'but have {}: {}'.format(num + 1, len(elts), language))
g2p[elts[0]] = elts[1]
# build the segments profile from the g2p mapping
return segments.Profile(
*[{'Grapheme': k, 'mapping': v} for k, v in g2p.items()])
# pylint: disable=unused-argument
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
# tokenize the input text per utterance
phonemized = (
self._tokenizer(line, column='mapping', errors='strict')
for line in text)
# the output of segments is always strip, so we need to add
# token separation at the end when strip is False.
if not strip:
# add word separator at end of utterance
phonemized = (p + ' # ' for p in phonemized)
# add phoneme separator at end of word
phonemized = (p.replace(' # ', ' # ') for p in phonemized)
# replace default separators by our custom ones
phonemized = (p.replace(' # ', '#') for p in phonemized)
phonemized = (p.replace(' ', separator.phone) for p in phonemized)
phonemized = (p.replace('#', separator.word) for p in phonemized)
# return the result as a list of utterances
return list(phonemized)