2025-12-01

This commit is contained in:
2026-03-17 14:58:51 -06:00
parent 183e865f8b
commit 4b82b57113
6846 changed files with 954887 additions and 162606 deletions
@@ -0,0 +1,21 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Multilingual text to phones converter"""
from .phonemize import phonemize # pylint: disable=unused-import
__version__ = '3.3.0'
"""Phonemizer version"""
@@ -0,0 +1,27 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Multilingual text to phonemes converter"""
# pylint: disable=unused-import
from .espeak.espeak import EspeakBackend
from .espeak.mbrola import EspeakMbrolaBackend
from .festival.festival import FestivalBackend
from .segments import SegmentsBackend
BACKENDS = {b.name(): b for b in (
EspeakBackend, FestivalBackend, SegmentsBackend, EspeakMbrolaBackend)}
"""The different phonemization backends as a mapping (name, class)"""
@@ -0,0 +1,255 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Abstract base class for phonemization backends"""
import abc
import itertools
import re
from logging import Logger
from typing import Optional, List, Any, Dict, Tuple, Union, Pattern
import joblib
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator, default_separator
from phonemizer.utils import chunks
class BaseBackend(abc.ABC):
"""Abstract base class of all the phonemization backends
Provides a common interface to all backends. The central method is
`phonemize()`
Parameters
----------
language: str
The language code of the input text, must be supported by
the backend. If ``backend`` is 'segments', the language can be a file with
a grapheme to phoneme mapping.
preserve_punctuation: bool
When True, will keep the punctuation in the
phonemized output. Not supported by the 'espeak-mbrola' backend. Default
to False and remove all the punctuation.
punctuation_marks: str
The punctuation marks to consider when dealing with punctuation, either for removal or preservation.
Can be defined as a string or regular expression. Default to Punctuation.default_marks().
logger: logging.Logger
the logging instance where to send
messages. If not specified, use the default system logger.
Raises
------
RuntimeError
if the backend is not available of if the `language` cannot be initialized.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
if punctuation_marks is None:
punctuation_marks = Punctuation.default_marks()
if logger is None:
logger = get_logger()
# ensure the backend is installed on the system
if not self.is_available():
raise RuntimeError( # pragma: nocover
'{} not installed on your system'.format(self.name()))
self._logger = logger
self._logger.info(
'initializing backend %s-%s',
self.name(), '.'.join(str(v) for v in self.version()))
# ensure the backend support the requested language
self._language = self._init_language(language)
# setup punctuation processing
self._preserve_punctuation = preserve_punctuation
self._punctuator = Punctuation(punctuation_marks)
@classmethod
def _init_language(cls, language):
"""Language initialization
This method may be overloaded in child classes (see Segments backend)
"""
if not cls.is_supported_language(language):
raise RuntimeError(
f'language "{language}" is not supported by the '
f'{cls.name()} backend')
return language
@property
def logger(self):
"""A logging.Logger instance where to send messages"""
return self._logger
@property
def language(self):
"""The language code configured to be used for phonemization"""
return self._language
@staticmethod
@abc.abstractmethod
def name():
"""The name of the backend"""
@classmethod
@abc.abstractmethod
def is_available(cls):
"""Returns True if the backend is installed, False otherwise"""
@classmethod
@abc.abstractmethod
def version(cls):
"""Return the backend version as a tuple (major, minor, patch)"""
@staticmethod
@abc.abstractmethod
def supported_languages() -> Dict[str, str]:
"""Return a dict of language codes -> name supported by the backend"""
@classmethod
def is_supported_language(cls, language: str):
"""Returns True if `language` is supported by the backend"""
return language in cls.supported_languages()
def phonemize(self, text: List[str],
separator: Optional[Separator] = None,
strip: bool = False,
njobs: int = 1) -> List[str]:
"""Returns the `text` phonemized for the given language
Parameters
----------
text: list of str
The text to be phonemized. Each string in the list
is considered as a separated line. Each line is considered as a text
utterance. Any empty utterance will be ignored.
separator: Separator
string separators between phonemes, syllables
and words, default to separator.default_separator. Syllable separator
is considered only for the festival backend. Word separator is
ignored by the 'espeak-mbrola' backend.
strip: bool
If True, don't output the last word and phone separators
of a token, default to False.
njobs : int
The number of parallel jobs to launch. The input text is
split in ``njobs`` parts, phonemized on parallel instances of the
backend and the outputs are finally collapsed.
Returns
-------
phonemized text: list of str
The input ``text`` phonemized for the given ``language`` and ``backend``.
Raises
------
RuntimeError
if something went wrong during the phonemization
"""
if isinstance(text, str):
# changed in phonemizer-3.0, warn the user
raise RuntimeError(
'input text to phonemize() is str but it must be list of str')
if separator is None:
separator = default_separator
text, punctuation_marks = self._phonemize_preprocess(text)
if njobs == 1:
# phonemize the text forced as a string
phonemized = self._phonemize_aux(text, 0, separator, strip)
else:
# If using parallel jobs, disable the log as stderr is not
# picklable.
self.logger.info('running %s on %s jobs', self.name(), njobs)
# we have here a list of phonemized chunks
phonemized = joblib.Parallel(n_jobs=njobs)(
joblib.delayed(self._phonemize_aux)(
# chunk[0] is the text, chunk[1] is the offset
chunk[0], chunk[1], separator, strip)
for chunk in zip(*chunks(text, njobs)))
# flatten them in a single list
phonemized = self._flatten(phonemized)
return self._phonemize_postprocess(phonemized, punctuation_marks, separator, strip)
@staticmethod
def _flatten(phonemized: List[List[Any]]):
"""Flatten a list of lists into a single one
From [[1, 2], [3], [4]] returns [1, 2, 3, 4]. This method is used to
format the output as obtained using multiple jobs.
"""
return list(itertools.chain(*phonemized))
@abc.abstractmethod
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
"""The "concrete" phonemization method
Must be implemented in child classes. `separator` and `strip`
parameters are as given to the phonemize() method. `text` is as
returned by _phonemize_preprocess(). `offset` is line number of the
first line in `text` with respect to the original text (this is only
usefull with running on chunks in multiple jobs. When using a single
jobs the offset is 0).
"""
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
"""Preprocess the text before phonemization
Removes the punctuation (keep trace of punctuation marks for further
restoration if required by the `preserve_punctuation` option).
"""
if self._preserve_punctuation:
# a tuple (text, punctuation marks)
return self._punctuator.preserve(text)
return self._punctuator.remove(text), []
def _phonemize_postprocess(self, phonemized: List[str],
punctuation_marks,
separator: Separator,
strip: bool):
"""Postprocess the raw phonemized output
Restores the punctuation as needed.
"""
if self._preserve_punctuation:
return self._punctuator.restore(phonemized, punctuation_marks, separator, strip)
return phonemized
@@ -0,0 +1,15 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Phonemizer module for espeak backend implementation"""
@@ -0,0 +1,275 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Low-level bindings to the espeak API"""
import atexit
import ctypes
import pathlib
import shutil
import sys
import tempfile
import weakref
from ctypes import CDLL
from pathlib import Path
from typing import Union
from phonemizer.backend.espeak.voice import EspeakVoice
if sys.platform != 'win32':
# cause a crash on Windows
import dlinfo
class EspeakAPI:
"""Exposes the espeak API to the EspeakWrapper
This class exposes only low-level bindings to the API and should not be
used directly.
"""
def __init__(self, library: Union[str, Path]):
# set to None to avoid an AttributeError in _delete if the __init__
# method raises, will be properly initialized below
self._library = None
# Because the library is not designed to be wrapped nor to be used in
# multithreaded/multiprocess contexts (massive use of global variables)
# we need a copy of the original library for each instance of the
# wrapper... (see "man dlopen" on Linux/MacOS: we cannot load two times
# the same library because a reference is then returned by dlopen). The
# tweak is therefore to make a copy of the original library in a
# different (temporary) directory.
try:
# load the original library in order to retrieve its full path?
# Forced as str as it is required on Windows.
espeak: CDLL = ctypes.cdll.LoadLibrary(str(library))
library_path = self._shared_library_path(espeak)
del espeak
except OSError as error:
raise RuntimeError(
f'failed to load espeak library: {str(error)}') from None
# will be automatically destroyed after use
self._tempdir = tempfile.mkdtemp()
# properly exit when the wrapper object is destroyed (see
# https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods).
# But... weakref implementation does not work on windows so we register
# the cleanup with atexit. This means that, on Windows, all the
# temporary directories created by EspeakAPI instances will remain on
# disk until the Python process exit.
if sys.platform == 'win32': # pragma: nocover
atexit.register(self._delete_win32)
else:
weakref.finalize(self, self._delete, self._library, self._tempdir)
espeak_copy = pathlib.Path(self._tempdir) / library_path.name
shutil.copy(library_path, espeak_copy, follow_symlinks=False)
# finally load the library copy and initialize it. 0x02 is
# AUDIO_OUTPUT_SYNCHRONOUS in the espeak API
self._library = ctypes.cdll.LoadLibrary(str(espeak_copy))
try:
if self._library.espeak_Initialize(0x02, 0, None, 0) <= 0:
raise RuntimeError( # pragma: nocover
'failed to initialize espeak shared library')
except AttributeError: # pragma: nocover
raise RuntimeError(
'failed to load espeak library') from None
# the path to the original one (the copy is considered an
# implementation detail and is not exposed)
self._library_path = library_path
def _delete_win32(self): # pragma: nocover
# Windows does not support static methods with ctypes libraries
# (library == None) so we use a proxy method...
self._delete(self._library, self._tempdir)
@staticmethod
def _delete(library, tempdir):
try:
# clean up the espeak library allocated memory
library.espeak_Terminate()
except AttributeError: # library not loaded
pass
# on Windows it is required to unload the library or the .dll file
# cannot be erased from the temporary directory
if sys.platform == 'win32': # pragma: nocover
# pylint: disable=import-outside-toplevel
# pylint: disable=protected-access
# pylint: disable=no-member
import _ctypes
_ctypes.FreeLibrary(library._handle)
# clean up the tempdir containing the copy of the library
shutil.rmtree(tempdir)
@property
def library_path(self):
"""Absolute path to the espeak library being in use"""
return self._library_path
@staticmethod
def _shared_library_path(library) -> Path:
"""Returns the absolute path to `library`
This function is cross-platform and works for Linux, MacOS and Windows.
Raises a RuntimeError if the library path cannot be retrieved
"""
# pylint: disable=protected-access
path = pathlib.Path(library._name).resolve()
if path.is_file():
return path
try:
# Linux or MacOS only, ImportError on Windows
return pathlib.Path(dlinfo.DLInfo(library).path).resolve()
except (Exception, ImportError): # pragma: nocover
raise RuntimeError(
f'failed to retrieve the path to {library} library') from None
def info(self):
"""Bindings to espeak_Info
Returns
-------
version, data_path: encoded strings containing the espeak version
number and data path respectively
"""
f_info = self._library.espeak_Info
f_info.restype = ctypes.c_char_p
data_path = ctypes.c_char_p()
version = f_info(ctypes.byref(data_path))
return version, data_path.value
def list_voices(self, name):
"""Bindings to espeak_ListVoices
Parameters
----------
name (str or None): if specified, a filter on voices to be listed
Returns
-------
voices: a pointer to EspeakVoice.Struct instances
"""
f_list_voices = self._library.espeak_ListVoices
f_list_voices.argtypes = [ctypes.POINTER(EspeakVoice.VoiceStruct)]
f_list_voices.restype = ctypes.POINTER(
ctypes.POINTER(EspeakVoice.VoiceStruct))
return f_list_voices(name)
def set_voice_by_name(self, name) -> int:
"""Bindings to espeak_SetVoiceByName
Parameters
----------
name (str) : the voice name to setup
Returns
-------
0 on success, non-zero integer on failure
"""
f_set_voice_by_name = self._library.espeak_SetVoiceByName
f_set_voice_by_name.argtypes = [ctypes.c_char_p]
return f_set_voice_by_name(name)
def get_current_voice(self):
"""Bindings to espeak_GetCurrentVoice
Returns
-------
a EspeakVoice.Struct instance or None if no voice has been setup
"""
f_get_current_voice = self._library.espeak_GetCurrentVoice
f_get_current_voice.restype = ctypes.POINTER(EspeakVoice.VoiceStruct)
return f_get_current_voice().contents
def text_to_phonemes(self, text_ptr, text_mode, phonemes_mode):
"""Bindings to espeak_TextToPhonemes
Parameters
----------
text_ptr (pointer): the text to be phonemized, as a pointer to a
pointer of chars
text_mode (bits field): see espeak sources for details
phonemes_mode (bits field): see espeak sources for details
Returns
-------
an encoded string containing the computed phonemes
"""
f_text_to_phonemes = self._library.espeak_TextToPhonemes
f_text_to_phonemes.restype = ctypes.c_char_p
f_text_to_phonemes.argtypes = [
ctypes.POINTER(ctypes.c_char_p),
ctypes.c_int,
ctypes.c_int]
return f_text_to_phonemes(text_ptr, text_mode, phonemes_mode)
def set_phoneme_trace(self, mode, file_pointer):
""""Bindings on espeak_SetPhonemeTrace
This method must be called before any call to synthetize()
Parameters
----------
mode (bits field): see espeak sources for details
file_pointer (FILE*): a pointer to an opened file in which to output
the phoneme trace
"""
f_set_phoneme_trace = self._library.espeak_SetPhonemeTrace
f_set_phoneme_trace.argtypes = [
ctypes.c_int,
ctypes.c_void_p]
f_set_phoneme_trace(mode, file_pointer)
def synthetize(self, text_ptr, size, mode):
"""Bindings on espeak_Synth
The output phonemes are sent to the file specified by a call to
set_phoneme_trace().
Parameters
----------
text (pointer) : a pointer to chars
size (int) : number of chars in `text`
mode (bits field) : see espeak sources for details
Returns
-------
0 on success, non-zero integer on failure
"""
f_synthetize = self._library.espeak_Synth
f_synthetize.argtypes = [
ctypes.c_void_p,
ctypes.c_size_t,
ctypes.c_uint,
ctypes.c_int, # position_type
ctypes.c_uint,
ctypes.POINTER(ctypes.c_uint),
ctypes.c_void_p]
return f_synthetize(text_ptr, size, 0, 1, 0, mode, None, None)
@@ -0,0 +1,113 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Base class of espeak backends for the phonemizer"""
import abc
from logging import Logger
from typing import Optional, Union, Pattern
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import Separator
class BaseEspeakBackend(BaseBackend):
"""Abstract espeak backend for the phonemizer
Base class of the concrete backends Espeak and EspeakMbrola. It provides
facilities to find espeak library and read espeak version.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
self._espeak = EspeakWrapper()
self.logger.debug('loaded %s', self._espeak.library_path)
@classmethod
def set_library(cls, library):
"""Sets the espeak backend to use `library`
If this is not set, the backend uses the default espeak shared library
from the system installation.
Parameters
----------
library (str or None) : the path to the espeak shared library to use as
backend. Set `library` to None to restore the default.
"""
EspeakWrapper.set_library(library)
@classmethod
def library(cls):
"""Returns the espeak library used as backend
The following precedence rule applies for library lookup:
1. As specified by BaseEspeakBackend.set_library()
2. Or as specified by the environment variable
PHONEMIZER_ESPEAK_LIBRARY
3. Or the default espeak library found on the system
Raises
------
RuntimeError if the espeak library cannot be found or if the
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
non-readable file
"""
return EspeakWrapper.library()
@classmethod
def is_available(cls) -> bool:
try:
EspeakWrapper()
except RuntimeError: # pragma: nocover
return False
return True
@classmethod
def is_espeak_ng(cls) -> bool:
"""Returns True if using espeak-ng, False otherwise"""
# espeak-ng starts with version 1.49
return cls.version() >= (1, 49)
@classmethod
def version(cls):
"""Espeak version as a tuple (major, minor, patch)
Raises
------
RuntimeError if BaseEspeakBackend.is_available() is False or if the
version cannot be extracted for some reason.
"""
return EspeakWrapper().version
@abc.abstractmethod
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> str:
pass
@@ -0,0 +1,172 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Espeak backend for the phonemizer"""
import itertools
import re
from logging import Logger
from typing import Optional, Tuple, List, Union, Pattern
from phonemizer.backend.espeak.base import BaseEspeakBackend
from phonemizer.backend.espeak.language_switch import (
get_language_switch_processor, LanguageSwitch, BaseLanguageSwitch)
from phonemizer.backend.espeak.words_mismatch import (
get_words_mismatch_processor, WordMismatch, BaseWordsMismatch)
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator
class EspeakBackend(BaseEspeakBackend):
"""Espeak backend for the phonemizer"""
# a regular expression to find phonemes stresses in espeak output
_ESPEAK_STRESS_RE = re.compile(r"[ˈˌ'-]+")
# pylint: disable=too-many-arguments
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
with_stress: bool = False,
tie: Union[bool, str] = False,
language_switch: LanguageSwitch = 'keep-flags',
words_mismatch: WordMismatch = 'ignore',
logger: Optional[Logger] = None):
super().__init__(
language, punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation, logger=logger)
self._espeak.set_voice(language)
self._with_stress = with_stress
self._tie = self._init_tie(tie)
self._lang_switch: BaseLanguageSwitch = get_language_switch_processor(
language_switch, self.logger, self.language)
self._words_mismatch: BaseWordsMismatch = get_words_mismatch_processor(
words_mismatch, self.logger)
@staticmethod
def _init_tie(tie) -> Optional[str]:
if not tie:
return None
if tie is True: # default U+361 tie character
return '͡'
# non default tie charcacter
tie = str(tie)
if len(tie) != 1:
raise RuntimeError(
f'explicit tie must be a single charcacter but is {tie}')
return tie
@staticmethod
def name():
return 'espeak'
@classmethod
def supported_languages(cls):
return {
voice.language: voice.name
for voice in EspeakWrapper().available_voices()}
def _phonemize_aux(self, text, offset, separator, strip):
if self._tie is not None and separator.phone:
self.logger.warning(
'cannot use ties AND phone separation, '
'ignoring phone separator')
output = []
lang_switches = []
for num, line in enumerate(text, start=1):
line = self._espeak.text_to_phonemes(line, self._tie)
line, has_switch = self._postprocess_line(
line, num, separator, strip)
output.append(line)
if has_switch:
lang_switches.append(num + offset)
return output, lang_switches
def _process_stress(self, word):
if self._with_stress:
return word
# remove the stresses on phonemes
return re.sub(self._ESPEAK_STRESS_RE, '', word)
def _process_tie(self, word: str, separator: Separator):
# NOTE a bug in espeak append ties to (en) flags so as (͡e͡n).
# We do not correct it here.
if self._tie is not None and self._tie != '͡':
# replace default '͡' by the requested one
return word.replace('͡', self._tie)
return word.replace('_', separator.phone)
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> Tuple[str, bool]:
# espeak can split an utterance into several lines because
# of punctuation, here we merge the lines into a single one
line = line.strip().replace('\n', ' ').replace(' ', ' ')
# due to a bug in espeak-ng, some additional separators can be
# added at the end of a word. Here a quick fix to solve that
# issue. See https://github.com/espeak-ng/espeak-ng/issues/694
line = re.sub(r'_+', '_', line)
line = re.sub(r'_ ', ' ', line)
line, has_switch = self._lang_switch.process(line)
if not line:
return '', has_switch
out_line = ''
for word in line.split(' '):
word = self._process_stress(word.strip())
if not strip and self._tie is None:
word += '_'
word = self._process_tie(word, separator)
out_line += word + separator.word
if strip and separator.word:
# erase the last word separator from the line
out_line = out_line[:-len(separator.word)]
return out_line, has_switch
def _phonemize_preprocess(self, text: List[str]) -> Tuple[Union[str, List[str]], List]:
text, punctuation_marks = super()._phonemize_preprocess(text)
self._words_mismatch.count_text(text)
return text, punctuation_marks
def _phonemize_postprocess(self, phonemized, punctuation_marks, separator: Separator, strip: bool):
text = phonemized[0]
switches = phonemized[1]
self._words_mismatch.count_phonemized(text, separator)
self._lang_switch.warning(switches)
phonemized = super()._phonemize_postprocess(text, punctuation_marks, separator, strip)
return self._words_mismatch.process(phonemized)
@staticmethod
def _flatten(phonemized) -> List:
"""Specialization of BaseBackend._flatten for the espeak backend
From [([1, 2], ['a', 'b']), ([3],), ([4], ['c'])] to [[1, 2, 3, 4],
['a', 'b', 'c']].
"""
flattened = []
for i in range(len(phonemized[0])):
flattened.append(
list(itertools.chain(
c for chunk in phonemized for c in chunk[i])))
return flattened
@@ -0,0 +1,193 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Manages language switches for the espeak backend
This module is used in phonemizer.backend.EspeakBackend and should be
considered private.
It manages languages switches that occur during phonemization, where a part of
a text is phonemized in a language different from the target language. For
instance the sentence "j'aime le football" in French will be phonemized by
espeak as "ʒɛm lə (en)fʊtbɔːl(fr)", "football" be pronounced as an English
word. This may cause two issues to end users. First it introduces undesirable
(.) language switch flags. It may introduce extra phones that are not present
in the target language phoneset.
This module implements 3 alternative solutions the user can choose when
initializing the espeak backend:
- 'keep-flags' preserves the language switch flags,
- 'remove-flags' removes the flags (.) but preserves the words with alternative
phoneset,
- 'remove-utterance' removes the utterances where flags are detected.
"""
import abc
import re
from logging import Logger
from typing import List, Tuple
from typing_extensions import TypeAlias, Literal
LanguageSwitch: TypeAlias = Literal['keep-flags', 'remove-flags', 'remove-utterance']
def get_language_switch_processor(mode: LanguageSwitch, logger: Logger, language: str) -> 'BaseLanguageSwitch':
"""Returns a language switch processor initialized from `mode`
The `mode` can be one of the following:
- 'keep-flags' to preserve the switch flags
- 'remove-flags' to suppress the switch flags
- 'remove-utterance' to suppress the entire utterance
Raises a RuntimeError if the `mode` is unknown.
"""
processors = {
'keep-flags': KeepFlags,
'remove-flags': RemoveFlags,
'remove-utterance': RemoveUtterances}
try:
return processors[mode](logger, language)
except KeyError:
raise RuntimeError(
f'mode "{mode}" invalid, must be in {", ".join(processors.keys())}'
) from None
class BaseLanguageSwitch(abc.ABC):
"""The base class for language switch processors
Parameters
----------
logger (logging.Logger) : a logger instance to send warnings when language
switches are detected.
language (str) : the language code currently in use by the phonemizer, to
customize warning content
"""
# a regular expression to find language switch flags in espeak output,
# Switches have the following form (here a switch from English to French):
# "something (fr)quelque chose(en) another thing".
_ESPEAK_FLAGS_RE = re.compile(r'\(.+?\)')
def __init__(self, logger: Logger, language: str):
self._logger = logger
self._language = language
@classmethod
def is_language_switch(cls, utterance: str) -> bool:
"""Returns True is a language switch is present in the `utterance`"""
return bool(cls._ESPEAK_FLAGS_RE.search(utterance))
@classmethod
@abc.abstractmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
"""Detects and process language switches according to the mode
This method is called on each utterance as a phonemization
post-processing step.
Returns
-------
processed_utterance (str) : the utterance either preserved, deleted (as
'') or with the switch removed
has_switch (bool): True if a language switch flag is found in the
`utterance` and False otherwise
"""
@abc.abstractmethod
def warning(self, switches: List[int]):
"""Sends warnings to the logger with recorded language switches
This method is called a single time at the very end of the
phonemization process.
Parameters
----------
switches (list of int) : the line numbers where language switches has
been detected during phonemization
"""
class KeepFlags(BaseLanguageSwitch):
"""Preserves utterances even if language switch flags are present"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
return utterance, cls.is_language_switch(utterance)
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'%s utterances containing language switches '
'on lines %s', nswitches,
', '.join(str(switch) for switch in sorted(switches)))
self._logger.warning(
'extra phones may appear in the "%s" phoneset', self._language)
self._logger.warning(
'language switch flags have been kept '
'(applying "keep-flags" policy)')
class RemoveFlags(BaseLanguageSwitch):
"""Removes the language switch flags when detected"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
if cls.is_language_switch(utterance):
# remove all the (lang) flags in the current utterance
return re.sub(cls._ESPEAK_FLAGS_RE, '', utterance), True
return utterance, False
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'%s utterances containing language switches '
'on lines %s', nswitches,
', '.join(str(switch) for switch in sorted(switches)))
self._logger.warning(
'extra phones may appear in the "%s" phoneset', self._language)
self._logger.warning(
'language switch flags have been removed '
'(applying "remove-flags" policy)')
class RemoveUtterances(BaseLanguageSwitch):
"""Remove the entire utterance when a language switch flag is detected"""
@classmethod
def process(cls, utterance: str) -> Tuple[str, bool]:
if cls.is_language_switch(utterance):
# drop the entire utterance
return '', True
return utterance, False
def warning(self, switches: List[int]):
if not switches:
return
nswitches = len(switches)
self._logger.warning(
'removed %s utterances containing language switches '
'(applying "remove-utterance" policy)', nswitches)
@@ -0,0 +1,108 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Mbrola backend for the phonemizer"""
import pathlib
import shutil
import sys
from logging import Logger
from pathlib import Path
from typing import Union, Optional, List, Dict
from phonemizer.backend.espeak.base import BaseEspeakBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator
class EspeakMbrolaBackend(BaseEspeakBackend):
"""Espeak-mbrola backend for the phonemizer"""
# this will be initialized once, at the first call to supported_languages()
_supported_languages = None
def __init__(self, language: str, logger: Optional[Logger] = None):
super().__init__(language, logger=logger)
self._espeak.set_voice(language)
@staticmethod
def name():
return 'espeak-mbrola'
@classmethod
def is_available(cls) -> bool:
"""Mbrola backend is available for espeak>=1.49"""
return (
BaseEspeakBackend.is_available() and
shutil.which('mbrola') and
BaseEspeakBackend.is_espeak_ng())
@classmethod
def _all_supported_languages(cls):
# retrieve the mbrola voices. This voices must be installed separately.
voices = EspeakWrapper().available_voices('mbrola')
return {voice.identifier[3:]: voice.name for voice in voices}
@classmethod
def _is_language_installed(cls, language: str, data_path: Union[str, Path]) \
-> bool:
"""Returns True if the required mbrola voice is installed"""
# this is a reimplementation of LoadMbrolaTable from espeak
# synth_mbrola.h sources
voice = language[3:] # remove mb- prefix
if pathlib.Path(data_path / 'mbrola' / voice).is_file():
return True # pragma: nocover
if sys.platform != 'win32':
candidates = [
f'/usr/share/mbrola/{voice}',
f'/usr/share/mbrola/{voice}/{voice}',
f'/usr/share/mbrola/voices/{voice}']
for candidate in candidates:
if pathlib.Path(candidate).is_file():
return True
return False
@classmethod
def supported_languages(cls) -> Dict[str, str]: # pragma: nocover
"""Returns the list of installed mbrola voices"""
if cls._supported_languages is None:
data_path = EspeakWrapper().data_path
cls._supported_languages = {
k: v for k, v in cls._all_supported_languages().items()
if cls._is_language_installed(k, data_path)}
return cls._supported_languages
def _phonemize_aux(self, text: List[str], offset: int,
separator: Separator, strip: bool) -> List[str]:
output = []
for num, line in enumerate(text, start=1):
line = self._espeak.synthetize(line)
line = self._postprocess_line(line, offset + num, separator, strip)
output.append(line)
return output
def _postprocess_line(self, line: str, num: int,
separator: Separator, strip: bool) -> str:
# retrieve the phonemes with the correct SAMPA alphabet (but
# without word separation)
phonemes = (
phn.split('\t')[0] for phn in line.split('\n') if phn.strip())
phonemes = separator.phone.join(pho for pho in phonemes if pho != '_')
if not strip:
phonemes += separator.phone
return phonemes
@@ -0,0 +1,81 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Voice struct from Espeak API exposed to Python"""
import ctypes
# This class can be a dataclass for compatibility with python-3.6 we don't use
# the dataclasses module.
class EspeakVoice:
"""A helper class to expose voice structures within C and Python"""
def __init__(self, name: str = '', language: str = '', identifier: str = ''):
self._name = name
self._language = language
self._identifier = identifier
@property
def name(self):
"""Voice name"""
return self._name
@property
def language(self):
"""Language code"""
return self._language
@property
def identifier(self):
"""Path to the voice file wrt espeak data path"""
return self._identifier
def __eq__(self, other: 'EspeakVoice'):
return (
self.name == other.name and
self.language == other.language and
self.identifier == other.identifier)
def __hash__(self):
return hash((self.name, self.language, self.identifier))
class VoiceStruct(ctypes.Structure): # pylint: disable=too-few-public-methods
"""A helper class to fetch voices information from the espeak library.
The espeak_VOICE struct is defined in speak_lib.h from the espeak code.
Here we use only name (voice name), languages (language code) and
identifier (voice file) information.
"""
_fields_ = [
('name', ctypes.c_char_p),
('languages', ctypes.c_char_p),
('identifier', ctypes.c_char_p)]
def to_ctypes(self):
"""Converts the Voice instance to an espeak ctypes structure"""
return self.VoiceStruct(
self.name.encode('utf8') if self.name else None,
self.language.encode('utf8') if self.language else None,
self.identifier.encode('utf8') if self.identifier else None)
@classmethod
def from_ctypes(cls, struct: VoiceStruct):
"""Returns a Voice instance built from an espeak ctypes structure"""
return cls(
name=(struct.name or b'').decode(),
# discard a useless char prepended by espeak
language=(struct.languages or b'0').decode()[1:],
identifier=(struct.identifier or b'').decode())
@@ -0,0 +1,152 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Manages words count mismatches for the espeak backend"""
import abc
import re
from logging import Logger
from typing import List, Tuple
from typing_extensions import TypeAlias, Literal, Union
from phonemizer.separator import Separator
WordMismatch: TypeAlias = Literal["warn", "ignore"]
def get_words_mismatch_processor(mode: WordMismatch, logger: Logger) -> 'BaseWordsMismatch':
"""Returns a word count mismatch processor according to `mode`
The `mode` can be one of the following:
- `ignore` to ignore words mismatches
- `warn` to display a warning on each mismatched utterance
- `remove` to remove any utterance containing a words mismatch
Raises a RuntimeError if the `mode` is unknown.
"""
processors = {
'ignore': Ignore,
'warn': Warn,
'remove': Remove}
try:
return processors[mode](logger)
except KeyError:
raise RuntimeError(
f'mode {mode} invalid, must be in {", ".join(processors.keys())}'
) from None
class BaseWordsMismatch(abc.ABC):
"""The base class of all word count mismatch processors"""
_RE_SPACES = re.compile(r'\s+')
def __init__(self, logger: Logger):
self._logger = logger
self._count_txt = []
self._count_phn = []
@classmethod
def _count_words(
cls,
text: List[str],
wordsep: Union[str, re.Pattern] = _RE_SPACES) -> List[int]:
"""Return the number of words contained in each line of `text`"""
if not isinstance(wordsep, re.Pattern):
wordsep = re.escape(wordsep)
return [
len([w for w in re.split(wordsep, line.strip()) if w])
for line in text]
def _mismatched_lines(self) -> List[Tuple[int, int, int]]:
"""Returns a list of (num_line, nwords_input, nwords_output)
Consider only the lines where nwords_input != nwords_output. Raises a
RuntimeError if input and output do not have the same number of lines.
"""
if len(self._count_txt) != len(self._count_phn):
raise RuntimeError( # pragma: nocover
f'number of lines in input and output must be equal, '
f'we have: input={len(self._count_txt)}, '
f'output={len(self._count_phn)}')
return [
(n, t, p) for n, (t, p) in
enumerate(zip(self._count_txt, self._count_phn))
if t != p]
def _resume(self, nmismatch: int, nlines: int):
"""Logs a high level undetailed warning"""
if nmismatch:
self._logger.warning(
'words count mismatch on %s%% of the lines (%s/%s)',
round(nmismatch / nlines, 2) * 100, nmismatch, nlines)
def count_text(self, text: List[str]):
"""Stores the number of words in each input line"""
self._count_txt = self._count_words(text)
def count_phonemized(self, text: List[str], separator: Separator):
"""Stores the number of words in each output line"""
self._count_phn = self._count_words(text, separator.word)
@abc.abstractmethod
def process(self, text: List[str]) -> List[str]:
"""Detects and process word count misatches according to the mode
This method is called at the very end of phonemization, during
post-processing.
"""
class Ignore(BaseWordsMismatch):
"""Ignores word count mismatches"""
def process(self, text: List[str]) -> List[str]:
self._resume(len(self._mismatched_lines()), len(text))
return text
class Warn(BaseWordsMismatch):
"""Warns on every mismatch detected"""
def process(self, text: List[str]) -> List[str]:
mismatch = self._mismatched_lines()
for num, ntxt, nphn in mismatch:
self._logger.warning(
'words count mismatch on line %s '
'(expected %s words but get %s)',
num + 1, ntxt, nphn)
self._resume(len(mismatch), len(text))
return text
class Remove(BaseWordsMismatch):
"""Removes any utterance containing a word count mismatch"""
def process(self, text: List[str]) -> List[str]:
mismatch = [line[0] for line in self._mismatched_lines()]
self._resume(len(mismatch), len(text))
self._logger.warning('removing the mismatched lines')
for index in mismatch:
text[index] = ''
return text
@@ -0,0 +1,370 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Wrapper on espeak-ng library"""
import ctypes
import ctypes.util
import functools
import os
import pathlib
import sys
import tempfile
import weakref
from typing import Tuple, Dict
from phonemizer.backend.espeak.api import EspeakAPI
from phonemizer.backend.espeak.voice import EspeakVoice
class EspeakWrapper:
"""Wrapper on espeak shared library
The aim of this wrapper is not to be exhaustive but to encapsulate the
espeak functions required for phonemization. It relies on a espeak shared
library (*.so on Linux, *.dylib on Mac and *.dll on Windows) that must be
installed on the system.
Use the function `EspeakWrapper.set_library()` before instanciation to
customize the library to use.
Raises
------
RuntimeError if the espeak shared library cannot be loaded
"""
# a static variable used to overload the default espeak library installed
# on the system. The user can choose an alternative espeak library with
# the method EspeakWrapper.set_library().
_ESPEAK_LIBRARY = None
def __init__(self):
# the following attributes are accessed through properties and are
# lazily initialized
self._version: Tuple[int, ...] = None
self._data_path = None
self._voice = None
# load the espeak API
self._espeak = EspeakAPI(self.library())
# lazy loading of attributes only required for the synthetize method
self._libc_ = None
self._tempfile_ = None
@property
def _libc(self):
if self._libc_ is None:
self._libc_ = (
ctypes.windll.msvcrt if sys.platform == 'win32' else
ctypes.cdll.LoadLibrary(ctypes.util.find_library('c')))
return self._libc_
@property
def _tempfile(self):
if self._tempfile_ is None:
# this will automatically removed at exit
# pylint: disable=consider-using-with
self._tempfile_ = tempfile.NamedTemporaryFile()
weakref.finalize(self._tempfile_, self._tempfile_.close)
return self._tempfile_
def __getstate__(self):
"""For pickling, when phonemizing on multiple jobs"""
return {
'version': self._version,
'data_path': self._data_path,
'voice': self._voice}
def __setstate__(self, state: Dict):
"""For unpickling, when phonemizing on multiple jobs"""
self.__init__()
self._version = state['version']
self._data_path = state['data_path']
self._voice = state['voice']
if self._voice:
if 'mb' in self._voice.identifier: # mbrola voice
self.set_voice(self._voice.identifier[3:])
else:
self.set_voice(self._voice.language)
@classmethod
def set_library(cls, library: str):
"""Sets the espeak backend to use `library`
If this is not set, the backend uses the default espeak shared library
from the system installation.
Parameters
----------
library (str or None) : the path to the espeak shared library to use as
backend. Set `library` to None to restore the default.
"""
cls._ESPEAK_LIBRARY = library
@classmethod
def library(cls):
"""Returns the espeak library used as backend
The following precedence rule applies for library lookup:
1. As specified by BaseEspeakBackend.set_library()
2. Or as specified by the environment variable
PHONEMIZER_ESPEAK_LIBRARY
3. Or the default espeak library found on the system
Raises
------
RuntimeError if the espeak library cannot be found or if the
environment variable PHONEMIZER_ESPEAK_LIBRARY is set to a
non-readable file
"""
if cls._ESPEAK_LIBRARY:
return cls._ESPEAK_LIBRARY
if 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ:
library = pathlib.Path(os.environ['PHONEMIZER_ESPEAK_LIBRARY'])
if not (library.is_file() and os.access(library, os.R_OK)):
raise RuntimeError( # pragma: nocover
f'PHONEMIZER_ESPEAK_LIBRARY={library} '
f'is not a readable file')
return library.resolve()
library = (
ctypes.util.find_library('espeak-ng') or
ctypes.util.find_library('espeak'))
if not library: # pragma: nocover
raise RuntimeError(
'failed to find espeak library')
return library
def _fetch_version_and_path(self):
"""Initializes version and dapa path from the espeak library"""
version, data_path = self._espeak.info()
# pylint: disable=no-member
self._data_path = pathlib.Path(data_path.decode())
if not self._data_path.is_dir(): # pragma: nocover
raise RuntimeError('failed to retrieve espeak data directory')
# espeak-1.48 appends the release date to version number, here we
# simply ignore it
version = version.decode().strip().split(' ')[0].replace('-dev', '')
self._version = tuple(int(v) for v in version.split('.'))
@property
def version(self) -> Tuple[int, int, int]:
"""The espeak version as a tuple of integers (major, minor, patch)"""
if self._version is None:
self._fetch_version_and_path()
return self._version
@property
def library_path(self):
"""The espeak library as a pathlib.Path instance"""
return self._espeak.library_path
@property
def data_path(self):
"""The espeak data directory as a pathlib.Path instance"""
if self._data_path is None:
self._fetch_version_and_path()
return self._data_path
@property
def voice(self):
"""The configured voice as an EspeakVoice instance
If `set_voice` has not been called, returns None
"""
return self._voice
@functools.lru_cache(maxsize=None)
def available_voices(self, name=None):
"""Voices available for phonemization, as a list of `EspeakVoice`"""
if name:
name = EspeakVoice(language=name).to_ctypes()
voices = self._espeak.list_voices(name or None)
index = 0
available_voices = []
# voices is an array to pointers, terminated by None
while voices[index]:
voice = voices[index].contents
available_voices.append(EspeakVoice(
name=os.fsdecode(voice.name).replace('_', ' '),
language=os.fsdecode(voice.languages)[1:],
identifier=os.fsdecode(voice.identifier)))
index += 1
return available_voices
def set_voice(self, voice_code):
"""Setup the voice to use for phonemization
Parameters
----------
voice_code (str) : Must be a valid language code that is actually
supported by espeak
Raises
------
RuntimeError if the required voice cannot be initialized
"""
if 'mb' in voice_code:
# this is an mbrola voice code. Select the voice by using
# identifier in the format 'mb/{voice_code}'
available = {
voice.identifier[3:]: voice.identifier
for voice in self.available_voices('mbrola')}
else:
# this are espeak voices. Select the voice using it's attached
# language code. Consider only the first voice of a given code as
# they are sorted by relevancy
available = {}
for voice in self.available_voices():
if voice.language not in available:
available[voice.language] = voice.identifier
try:
voice_name = available[voice_code]
except KeyError:
raise RuntimeError(f'invalid voice code "{voice_code}"') from None
if self._espeak.set_voice_by_name(voice_name.encode('utf8')) != 0:
raise RuntimeError( # pragma: nocover
f'failed to load voice "{voice_code}"')
voice = self._get_voice()
if not voice: # pragma: nocover
raise RuntimeError(f'failed to load voice "{voice_code}"')
self._voice = voice
def _get_voice(self):
"""Returns the current voice used for phonemization
If no voice has been set up, returns None.
"""
voice = self._espeak.get_current_voice()
if voice.name:
return EspeakVoice.from_ctypes(voice)
return None # pragma: nocover
def text_to_phonemes(self, text: str, tie: bool = False) -> str:
"""Translates a text into phonemes, must call set_voice() first.
This method is used by the Espeak backend. Wrapper on the
espeak_TextToPhonemes function.
Parameters
----------
text (str) : the text to phonemize
tie (bool, optional) : When True use a '͡' character between
consecutive characters of a single phoneme. Else separate phoneme
with '_'. This option requires espeak>=1.49. Default to False.
Returns
-------
phonemes (str) : the phonemes for the text encoded in IPA, with '_' as
phonemes separator (excepted if ``tie`` is True) and ' ' as word
separator.
"""
if self.voice is None: # pragma: nocover
raise RuntimeError('no voice specified')
if tie and self.version <= (1, 48, 3):
raise RuntimeError( # pragma: nocover
'tie option only compatible with espeak>=1.49')
# from Python string to C void** (a pointer to a pointer to chars)
text_ptr = ctypes.pointer(ctypes.c_char_p(text.encode('utf8')))
# input text is encoded as UTF8
text_mode = 1
# output phonemes in IPA and separated by _, or with a tie character if
# required. See comments for the function espeak_TextToPhonemes in
# speak_lib.h of the espeak sources for details.
if self.version <= (1, 48, 3): # pragma: nocover
phonemes_mode = 0x03 | 0x01 << 4
elif tie:
phonemes_mode = 0x02 | 0x01 << 7 | ord('͡') << 8
else:
phonemes_mode = ord('_') << 8 | 0x02
result = []
while text_ptr.contents.value is not None:
phonemes = self._espeak.text_to_phonemes(
text_ptr, text_mode, phonemes_mode)
if phonemes:
result.append(phonemes.decode())
return ' '.join(result)
def synthetize(self, text: str):
"""Translates a text into phonemes, must call set_voice() first.
Only compatible with espeak>=1.49. This method is used by the
EspeakMbrola backend. Wrapper on the espeak_Synthesize function.
Parameters
----------
text (str) : the text to phonemize
Returns
-------
phonemes (str) : the phonemes for the text encoded in SAMPA, with '_'
as phonemes separator and no word separation.
"""
if self.version < (1, 49): # pragma: nocover
raise RuntimeError('not compatible with espeak<=1.48')
if self.voice is None: # pragma: nocover
raise RuntimeError('no voice specified')
# init libc fopen and fclose functions
self._libc.fopen.argtypes = [ctypes.c_char_p, ctypes.c_char_p]
self._libc.fopen.restype = ctypes.c_void_p
self._libc.fclose.argtypes = [ctypes.c_void_p]
self._libc.fclose.restype = ctypes.c_int
# output phonemes in SAMPA and separated by _. Write the result to a
# tempfile which is read back after phonemization (seems not possible
# to redirect to stdout). See comments for the function
# espeak_SetPhonemeTrace in speak_lib.h of the espeak sources for
# details.
self._tempfile.truncate(0)
file_p = self._libc.fopen(
self._tempfile.name.encode(),
self._tempfile.mode.encode())
self._espeak.set_phoneme_trace(0x01 << 4 | ord('_') << 8, file_p)
status = self._espeak.synthetize(
ctypes.c_char_p(text.encode('utf8')),
ctypes.c_size_t(len(text) + 1),
ctypes.c_uint(0x01))
self._libc.fclose(file_p) # because flush does not work...
if status != 0: # pragma: nocover
raise RuntimeError('failed to synthetize')
self._tempfile.seek(0)
phonemized = self._tempfile.read().decode().strip()
return phonemized
@@ -0,0 +1,15 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonologizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonologizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonologizer. If not, see <http://www.gnu.org/licenses/>.
"""Phonemizer module for festival backend implementation"""
@@ -0,0 +1,334 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Festival backend for the phonemizer"""
import os
import pathlib
import re
import shlex
import shutil
import subprocess
import sys
import tempfile
from logging import Logger
from pathlib import Path
from typing import Optional, Dict, List, IO, Union, Pattern
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.festival import lispy
from phonemizer.separator import Separator
from phonemizer.utils import get_package_resource, version_as_tuple
class FestivalBackend(BaseBackend):
"""Festival backend for the phonemizer"""
# a static variable used to overload the default festival binary installed
# on the system. The user can choose an alternative festival binary with
# the method FestivalBackend.set_executable().
_FESTIVAL_EXECUTABLE = None
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
self.logger.debug('festival executable is %s', self.executable())
# the Scheme script to be send to festival
script_file = get_package_resource('festival/phonemize.scm')
with open(script_file, 'r') as fscript:
self._script = fscript.read()
self.logger.debug('loaded %s', script_file)
@staticmethod
def name():
return 'festival'
@classmethod
def set_executable(cls, executable: str):
"""Sets the festival backend to use `executable`
If this is not set, the backend uses the default festival executable
from the system installation.
Parameters
----------
executable (str) : the path to the festival executable to use as
backend. Set `executable` to None to restore the default.
Raises
------
RuntimeError if `executable` is not an executable file.
"""
if executable is None:
cls._FESTIVAL_EXECUTABLE = None
return
executable = pathlib.Path(executable)
if not (executable.is_file() and os.access(executable, os.X_OK)):
raise RuntimeError(
f'{executable} is not an executable file')
cls._FESTIVAL_EXECUTABLE = executable.resolve()
@classmethod
def executable(cls) -> Path:
"""Returns the absolute path to the festival executable used as backend
The following precedence rule applies for executable lookup:
1. As specified by FestivalBackend.set_executable()
2. Or as specified by the environment variable
PHONEMIZER_FESTIVAL_EXECUTABLE
3. Or the default 'festival' binary found on the system with ``shutil.which('festival')``
Raises
------
RuntimeError
if the festival executable cannot be found or if the
environment variable PHONEMIZER_FESTIVAL_EXECUTABLE is set to a
non-executable file
"""
if cls._FESTIVAL_EXECUTABLE:
return cls._FESTIVAL_EXECUTABLE
if 'PHONEMIZER_FESTIVAL_EXECUTABLE' in os.environ:
executable = pathlib.Path(os.environ[
'PHONEMIZER_FESTIVAL_EXECUTABLE'])
if not (
executable.is_file()
and os.access(executable, mode=os.X_OK)
):
raise RuntimeError(
f'PHONEMIZER_FESTIVAL_EXECUTABLE={executable} '
f'is not an executable file')
return executable.resolve()
executable = shutil.which('festival')
if not executable: # pragma: nocover
raise RuntimeError(
'failed to find festival executable')
return Path(executable).resolve()
@classmethod
def is_available(cls):
"""True if the festival executable is available, False otherwise"""
try:
cls.executable()
except RuntimeError: # pragma: nocover
return False
return True
@classmethod
def version(cls):
"""Festival version as a tupe of integers (major, minor, patch)
Raises
------
RuntimeError if FestivalBackend.is_available() is False or if the
version cannot be extracted for some reason.
"""
festival = cls.executable()
# the full version version string includes extra information
# we don't need
long_version = subprocess.check_output(
[festival, '--version']).decode('latin1').strip()
# extract the version number with a regular expression
festival_version_re = r'.* ([0-9\.]+[0-9]):'
try:
version = re.match(festival_version_re, long_version).group(1)
except AttributeError:
raise RuntimeError(
f'cannot extract festival version from {festival}') from None
return version_as_tuple(version)
@staticmethod
def supported_languages() -> Dict[str, str]:
"""A dictionnary of language codes -> name supported by festival
Actually only en-us (American English) is supported.
"""
return {'en-us': 'english-us'}
# pylint: disable=unused-argument
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
"""Return a phonemized version of `text` with festival
This function is a wrapper on festival, a text to speech
program, allowing simple phonemization of some English
text. The US phoneset we use is the default one in festival,
as described at http://www.festvox.org/bsv/c4711.html
Any opening and closing parenthesis in `text` are removed, as
they interfer with the Scheme expression syntax. Moreover
double quotes are replaced by simple quotes because double
quotes denotes utterances boundaries in festival.
Parsing a ill-formed Scheme expression during post-processing
(typically with unbalanced parenthesis) raises an IndexError.
"""
text = self._preprocess(text)
if len(text) == 0:
return []
text = self._process(text)
text = self._postprocess(text, separator, strip)
return text
@staticmethod
def _double_quoted(line: str) -> str:
"""Return the string `line` surrounded by double quotes"""
return '"' + line + '"'
@staticmethod
def _cleaned(line: str):
"""Remove 'forbidden' characters from the line"""
# special case (very unlikely but causes a crash in festival)
# where a line is only made of '
if set(line) == set("'"):
line = ''
# remove forbidden characters (reserved for scheme, ie festival
# scripting language)
return line.replace('"', '').replace('(', '').replace(')', '').strip()
@classmethod
def _preprocess(cls, text: List[str]):
"""Returns the contents of `text` formatted for festival input
This function adds double quotes to begining and end of each
line in text, if not already presents. The returned result is
a multiline string. Empty lines in inputs are ignored.
"""
cleaned_text = (
cls._cleaned(line) for line in text if line != '')
return '\n'.join(
cls._double_quoted(line) for line in cleaned_text if line != '')
def _process(self, text: str):
"""Return the raw phonemization of `text`
This function delegates to festival the text analysis and
syllabic structure extraction.
Return a string containing the "SylStructure" relation tree of
the text, as a scheme expression.
"""
with tempfile.NamedTemporaryFile('w+', delete=False) as data:
try:
# save the text as a tempfile
data.write(text)
data.close()
# fix the path name for windows
name = data.name
if sys.platform == 'win32': # pragma: nocover
name = name.replace('\\', '\\\\')
with tempfile.NamedTemporaryFile('w+', delete=False) as scm:
try:
scm.write(self._script.format(name))
scm.close()
cmd = f'{self.executable()} -b {scm.name}'
if self.logger:
self.logger.debug('running %s', cmd)
# redirect stderr to a tempfile and displaying it only
# on errors. Messages are something like: "UniSyn:
# using default diphone ax-ax for y-pau". This is
# related to wave synthesis (done by festival during
# phonemization).
with tempfile.TemporaryFile('w+') as fstderr:
return self._run_festival(cmd, fstderr)
finally:
os.remove(scm.name)
finally:
os.remove(data.name)
@staticmethod
def _run_festival(cmd: str, fstderr: IO) -> str:
"""Runs the festival command for phonemization
Returns the raw phonemized output (need to be postprocesses). Raises a
RuntimeError if festival fails.
"""
try:
output = subprocess.check_output(
shlex.split(cmd, posix=False), stderr=fstderr)
# festival seems to use latin1 and not utf8
return re.sub(' +', ' ', output.decode('latin1'))
except subprocess.CalledProcessError as err: # pragma: nocover
fstderr.seek(0)
raise RuntimeError(
f'Command "{cmd}" returned exit status {err.returncode}, '
f'output is:\n{fstderr.read()}') from None
@staticmethod
def _postprocess_syll(syll: List[str], separator: Separator, strip: bool) -> str:
"""Parse a syllable from festival to phonemized output"""
sep = separator.phone
out = (phone[0][0].replace('"', '') for phone in syll[1:])
out = sep.join(o for o in out if o != '')
return out if strip else out + sep
@classmethod
def _postprocess_word(cls, word: List[List[str]], separator: Separator, strip: bool) -> str:
"""Parse a word from festival to phonemized output"""
sep = separator.syllable
out = sep.join(
cls._postprocess_syll(syll, separator, strip)
for syll in word[1:])
return out if strip else out + sep
@classmethod
def _postprocess_line(cls, line: str, separator, strip: bool) -> str:
"""Parse a line from festival to phonemized output"""
sep = separator.word
out = []
for word in lispy.parse(line):
word = cls._postprocess_word(word, separator, strip)
if word != '':
out.append(word)
out = sep.join(out)
return out if strip else out + sep
@classmethod
def _postprocess(cls, tree: str, separator: Separator, strip: bool) -> List[str]:
"""Conversion from festival syllable tree to desired format"""
return [cls._postprocess_line(line, separator, strip)
for line in tree.split('\n')
if line not in ['', '(nil nil nil)']]
@@ -0,0 +1,66 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Parse a Scheme expression as a nested list
The main function of this module is lispy.parse, other ones should be
considered private. This module is a dependency of the festival
backend.
From http://www.norvig.com/lispy.html
"""
from typing import List, Union
def parse(program: str):
"""Read a Scheme expression from a string
Return a nested list
Raises an IndexError if the expression is not valid scheme
(unbalanced parenthesis).
>>> parse('(+ 2 (* 5 2))')
['+', '2', ['*', '5', '2']]
"""
return _read_from_tokens(_tokenize(program))
def _tokenize(chars: str) -> List[str]:
"""Convert a string of characters into a list of tokens."""
return chars.replace('(', ' ( ').replace(')', ' ) ').split()
Expr = Union[str, List['Expr']]
def _read_from_tokens(tokens: List[str]) -> Expr:
"""Read an expression from a sequence of tokens"""
if len(tokens) == 0: # pragma: nocover
raise SyntaxError('unexpected EOF while reading')
token = tokens.pop(0)
if token == '(':
expr = []
while tokens[0] != ')':
expr.append(_read_from_tokens(tokens))
tokens.pop(0) # pop off ')'
return expr
if token == ')': # pragma: nocover
raise SyntaxError('unexpected )')
return token
@@ -0,0 +1,143 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Segments backend for the phonemizer"""
import pathlib
from logging import Logger
from typing import Optional, Dict, List, Union, Pattern
import segments
from phonemizer.backend.base import BaseBackend
from phonemizer.separator import Separator
from phonemizer.utils import get_package_resource, version_as_tuple
class SegmentsBackend(BaseBackend):
"""Segments backends for the phonemizer
The phonemize method will raise a ValueError when parsing an
unknown morpheme.
"""
def __init__(self, language: str,
punctuation_marks: Optional[Union[str, Pattern]] = None,
preserve_punctuation: bool = False,
logger: Optional[Logger] = None):
# will be initialized in _init_language() from super().__init__()
self._tokenizer: Optional[segments.Tokenizer] = None
super().__init__(
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
def _init_language(self, language):
# load the grapheme to phoneme mapping
profile = self._load_g2p_profile(language)
self._tokenizer = segments.Tokenizer(profile=profile)
# this is the language code
return pathlib.Path(language).stem
@staticmethod
def name():
return 'segments'
@classmethod
def version(cls):
return version_as_tuple(segments.__version__)
@classmethod
def is_available(cls):
return True
@staticmethod
def supported_languages():
"""Returns a dict of language: file supported by the segments backend
The supported languages have a grapheme to phoneme conversion file
bundled with phonemizer. Users can also use their own file as
parameter of the phonemize() function.
"""
# directory phonemizer/share/segments
directory = get_package_resource('segments')
# supported languages are files with the 'g2p' extension
return {g2p.stem: g2p
for g2p in directory.iterdir() if g2p.suffix == '.g2p'}
@classmethod
def is_supported_language(cls, language: str) -> bool:
if pathlib.Path(language).is_file():
try:
cls._load_g2p_profile(language)
return True
except RuntimeError:
return False
return language in cls.supported_languages()
@classmethod
def _load_g2p_profile(cls, language: str) -> segments.Profile:
"""Returns a segments profile from a `language`"""
# make sure the g2p file exists
if not pathlib.Path(language).is_file():
try:
language = cls.supported_languages()[language]
except KeyError:
raise RuntimeError(
f'grapheme to phoneme file not found: '
f'{language}') from None
# load the mapping grapheme -> phoneme from the file, make sure all
# lines are well formatted
g2p: Dict[str, str] = {}
with open(language, 'r', encoding='utf8') as flang:
for num, line in enumerate(flang):
elts = line.strip().split()
if not len(elts) == 2:
raise RuntimeError(
'grapheme to phoneme file, line {} must have 2 rows '
'but have {}: {}'.format(num + 1, len(elts), language))
g2p[elts[0]] = elts[1]
# build the segments profile from the g2p mapping
return segments.Profile(
*[{'Grapheme': k, 'mapping': v} for k, v in g2p.items()])
# pylint: disable=unused-argument
def _phonemize_aux(self, text: List[str], offset: int, separator: Separator, strip: bool) -> List[str]:
# tokenize the input text per utterance
phonemized = (
self._tokenizer(line, column='mapping', errors='strict')
for line in text)
# the output of segments is always strip, so we need to add
# token separation at the end when strip is False.
if not strip:
# add word separator at end of utterance
phonemized = (p + ' # ' for p in phonemized)
# add phoneme separator at end of word
phonemized = (p.replace(' # ', ' # ') for p in phonemized)
# replace default separators by our custom ones
phonemized = (p.replace(' # ', '#') for p in phonemized)
phonemized = (p.replace(' ', separator.phone) for p in phonemized)
phonemized = (p.replace('#', separator.word) for p in phonemized)
# return the result as a list of utterances
return list(phonemized)
@@ -0,0 +1,63 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Logging facilities for the phonemizer"""
import logging
import sys
from logging import Logger
def get_logger(verbosity: str = 'quiet', name: str = 'phonemizer') -> Logger:
"""Returns a configured logging.Logger instance
The logger is configured to output messages on the standard error stream
(stderr).
Parameters
----------
verbosity (str) : The level of verbosity, must be 'verbose' (displays
debug/info and warning messages), 'normal' (warnings only) or 'quiet' (do
not display anything).
name (str) : The logger name, default to 'phonemizer'
Raises
------
RuntimeError if `verbosity` is not 'normal', 'verbose', or 'quiet'.
"""
# make sure the verbosity argument is valid
valid_verbosity = ['normal', 'verbose', 'quiet']
if verbosity not in valid_verbosity:
raise RuntimeError(
f'verbosity is {verbosity} but must be in '
f'{", ".join(valid_verbosity)}')
logger = logging.getLogger(name)
# setup output to stderr
logger.handlers = []
handler = logging.StreamHandler(sys.stderr)
# setup verbosity level
logger.setLevel(logging.WARNING)
if verbosity == 'verbose':
logger.setLevel(logging.DEBUG)
elif verbosity == 'quiet':
handler = logging.NullHandler()
# setup messages format
handler.setFormatter(logging.Formatter('[%(levelname)s] %(message)s'))
logger.addHandler(handler)
return logger
@@ -0,0 +1,428 @@
#!/usr/bin/env python
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Command-line phonemizer tool, have a 'phonemizer --help' to get in"""
import argparse
import os
import sys
import re
from phonemizer import phonemize, separator, version, logger, punctuation
from phonemizer.backend import BACKENDS
class CatchExceptions: # pragma: nocover
"""Decorator wrapping a function in a try/except block
When an exception occurs, display a user friendly message on
standard output before exiting with error code 1.
The detected exceptions are ValueError, OSError, RuntimeError,
AssertionError and KeyboardInterrupt.
Parameters
----------
function :
The function to wrap in a try/except block
"""
def __init__(self, function):
self.function = function
def __call__(self):
"""Executes the wrapped function and catch common exceptions"""
try:
self.function()
except (IOError, ValueError, OSError,
RuntimeError, AssertionError) as err:
self.exit('fatal error: {}'.format(err))
except KeyboardInterrupt:
self.exit('keyboard interruption, exiting')
@staticmethod
def exit(msg):
"""Write `msg` on stderr and exit with error code 1"""
sys.stderr.write(msg.strip() + '\n')
sys.exit(1)
def parse_args():
"""Argument parser for the phonemization script"""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='''Multilingual text to phonemes converter
The 'phonemize' program allows simple phonemization of words and texts
in many language using four backends: espeak, espeak-mbrola, festival
and segments.
- espeak is a text-to-speech software supporting multiple languages
and IPA (International Phonetic Alphabet) output. See
http://espeak.sourceforge.net or
https://github.com/espeak-ng/espeak-ng
- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
installed as well as additional mbrola voices. It does not support word or
syllable tokenization. See
https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md
- festival is also a text-to-speech software. Currently only American
English is supported and festival uses a custom phoneset
(http://www.festvox.org/bsv/c4711.html), but festival is the only
backend supporting tokenization at the syllable
level. See http://www.cstr.ed.ac.uk/projects/festival
- segments is a Unicode tokenizer that build a phonemization from a
grapheme to phoneme mapping provided as a file by the user. See
https://github.com/cldf/segments.
See the '--list-languages' option below for details on the languages
supported by each backend.
''',
epilog='''
Examples:
* Phonemize a US English text with espeak
$ echo 'hello world' | phonemize -l en-us -b espeak
həloʊ wɜːld
* Phonemize a US English text with festival
$ echo 'hello world' | phonemize -l en-us -b festival
hhaxlow werld
* Phonemize a Japanese text with segments
$ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
konnitʃiwa t͡sekai
* Add a separator between phones
$ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
hh-ax-l-ow w-er-l-d
* Phonemize some French text file using espeak
$ phonemize -l fr-fr -b espeak text.txt -o phones.txt
''')
# general arguments
parser.add_argument(
'-V', '--version',
action='store_true',
help='show version information and exit.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
'-v', '--verbose',
action='store_true',
help='write all log messages to stderr '
'(displays only warnings by default).')
group.add_argument(
'-q', '--quiet',
action='store_true',
help='do not display any log message, even warnings.')
parser.add_argument(
'-j', '--njobs',
type=int, metavar='<int>', default=1,
help='number of parallel jobs, default is %(default)s.')
# input/output arguments
group = parser.add_argument_group('input/output')
group.add_argument(
'input',
default=sys.stdin, nargs='?', metavar='<file>',
help='input text file to phonemize, if not specified read from stdin.')
group.add_argument(
'-o', '--output',
default=sys.stdout, metavar='<file>',
help='output text file to write, if not specified write to stdout.')
group.add_argument(
'--prepend-text',
default=False, const=True, nargs='?', metavar='<str>',
help='''prepend each line of the phonemized output text with its
matching input text. If a string is specified as option value, use it
as field separator, else use one of "|", "||", "|||", "||||" by
selecting the first one that is not configured as a token separator
(see -p/-s/-w options).''')
group.add_argument(
'--preserve-empty-lines',
action='store_true',
help='''preserve the empty lines in the phonemized output, default is
to remove them.''')
group = parser.add_argument_group('backends')
group.add_argument(
'-b', '--backend',
metavar='<str>', default=None,
choices=['espeak', 'espeak-mbrola', 'festival', 'segments'],
help="""the phonemization backend, must be 'espeak', 'espeak-mbrola',
'festival' or 'segments'. Default is 'espeak'.""")
group.add_argument(
'-L', '--list-languages',
action='store_true',
help="""list available languages (and exit) for the specified backend,
or for all backends if none selected.""")
group = parser.add_argument_group('language')
group.add_argument(
'-l', '--language',
metavar='<str|file>', default='en-us',
help='''the language code of the input text, use '--list-languages'
for a list of supported languages. Default is %(default)s.''')
group = parser.add_argument_group('token separators')
group.add_argument(
'-p', '--phone-separator',
metavar='<str>', default=separator.default_separator.phone,
help='phone separator, default is "%(default)s".')
group.add_argument(
'-w', '--word-separator',
metavar='<str>', default=separator.default_separator.word,
help='''word separator, not valid for espeak-mbrola backend,
default is "%(default)s".''')
group.add_argument(
'-s', '--syllable-separator',
metavar='<str>', default=separator.default_separator.syllable,
help='''syllable separator, only valid for festival backend,
this option has no effect if another backend is used.
Default is "%(default)s".''')
group.add_argument(
'--strip',
action='store_true',
help='removes the end separators in phonemized tokens.')
group = parser.add_argument_group('specific to espeak backend')
try:
espeak_library = BACKENDS['espeak'].library()
except RuntimeError: # pragma: nocover
espeak_library = None
group.add_argument(
'--espeak-library',
default=None, type=str, metavar='<library>',
help=f'''the path to the espeak shared library to use (*.so on Linux,
*.dylib on Mac and *.dll on Windows, useful to overload the default
espeak version installed on the system). Default to
{espeak_library}. This path can also be specified
using the PHONEMIZER_ESPEAK_LIBRARY environment variable.''')
group.add_argument(
'--tie',
nargs='?', default=False, const=True, metavar='<chr>',
help='''when the option is set, use a tie character within multi-letter
phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
only compatible with espeak>1.48 and incompatible with the
-p/--phone-separator option''')
group.add_argument(
'--with-stress',
action='store_true',
help='''when the option is set, the stresses on phonemes are present
(stresses characters are ˈ'ˌ). By default stresses are removed.''')
group.add_argument(
'--language-switch',
default='keep-flags',
choices=['keep-flags', 'remove-flags', 'remove-utterance'],
help="""espeak can pronounce some words in another language (typically
English) when phonemizing a text. This option setups the policy to use
when such a language switch occurs. Three values are available:
'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
'keep-flags' policy keeps the language switching flags, for example
(en) or (jp), in the output. The 'remove-flags' policy removes them and
the 'remove-utterance' policy removes the whole line of text including
a language switch.""")
group.add_argument(
'--words-mismatch',
default='ignore', choices=['ignore', 'warn', 'remove'],
help="""espeak can join two consecutive words or drop some words,
yielding a word count mismatch between orthographic and phonemized
text. This option setups the policy to use when such a words count
mismatch occurs. Three values are available: 'ignore' (the default)
which do nothing, 'warn' which issue a warning for each mismatched
line, and 'remove' which remove the mismatched lines from the
output.""")
group = parser.add_argument_group('specific to festival backend')
try:
festival_executable = BACKENDS['festival'].executable()
except RuntimeError: # pragma: nocover
festival_executable = None
group.add_argument(
'--festival-executable',
default=None, type=str, metavar='<executable>',
help=f'''the path to the festival executable to use (useful to
overload the default festival installed on the system). Default to
{festival_executable}. This path can also be specified using the
PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.''')
group = parser.add_argument_group(
'punctuation processing',
description='not available for espeak-mbrola backend')
group.add_argument(
'--preserve-punctuation',
action='store_true',
help='''preserve the punctuation marks in the phonemized output,
default is to remove them.''')
group.add_argument(
'--punctuation-marks',
type=str, metavar='<str>',
default=punctuation.Punctuation.default_marks(),
help='''the marks to consider during punctuation processing (either
for removal or preservation). Default is %(default)s.''')
group.add_argument(
'--punctuation-marks-is-regex',
action='store_true',
help="""interpret the '--punctuation-marks' parameter as a regex.
Default is to interpret as a string.""")
return parser.parse_args()
def list_languages(args_backend):
"""Returns the available languages for the given `backend` as a str"""
for backend in BACKENDS.keys() if not args_backend else [args_backend]:
print(
f'supported languages for {backend} are:\n' +
'\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted(
BACKENDS[backend].supported_languages().items())))
def get_logger(verbose, quiet):
"""Returns a configured logger"""
verbosity = 'normal'
if verbose:
verbosity = 'verbose'
elif quiet:
verbosity = 'quiet'
return logger.get_logger(verbosity=verbosity)
def setup_stream(stream, mode):
"""If `stream` is a filename, open it as a file"""
if isinstance(stream, str):
# pylint: disable=consider-using-with
return open(stream, mode, encoding='utf8')
return stream # pragma: nocover
@CatchExceptions
def main():
"""Phonemize a text from command-line arguments"""
args = parse_args()
# setup a custom path to espeak and festival if required (this must be done
# before generating the version message)
if args.espeak_library:
BACKENDS['espeak'].set_library(args.espeak_library)
if args.festival_executable:
BACKENDS['festival'].set_executable(args.festival_executable)
# display version information and exit
if args.version:
print(version.version())
return
# list supported languages and exit
if args.list_languages:
print(list_languages(args.backend))
return
# set default backend as espeak if not specified
args.backend = args.backend or 'espeak'
# configure logging according to --verbose/--quiet options
log = get_logger(args.verbose, args.quiet)
# configure input:output as a readable/writable streams
streamin = setup_stream(args.input, 'r')
log.debug('reading from %s', streamin.name)
streamout = setup_stream(args.output, 'w')
log.debug('writing to %s', streamout.name)
# configure the separator for phonemes, syllables and words.
if args.backend == 'espeak-mbrola':
log.debug('using espeak-mbrola backend: ignoring word separator')
sep = separator.Separator(
phone=args.phone_separator,
syllable=None,
word=None)
else:
sep = separator.Separator(
phone=args.phone_separator,
syllable=args.syllable_separator,
word=args.word_separator)
log.debug('separator is %s', sep)
if args.prepend_text:
input_output_separator = sep.input_output_separator(args.prepend_text)
log.debug(
'prepend input text to output, separator is "%s"',
input_output_separator)
else:
input_output_separator = False
if args.punctuation_marks_is_regex:
try:
log.debug('punctuation marks is regex %s', args.punctuation_marks)
args.punctuation_marks = re.compile(args.punctuation_marks)
except re.error:
# manually close the open streams for windows
streamin.close()
streamout.close()
raise ValueError(f"can't compile regex pattern from {args.punctuation_marks}")
# phonemize the input text
out = phonemize(
streamin.readlines(),
language=args.language,
backend=args.backend,
separator=sep,
strip=args.strip,
prepend_text=args.prepend_text,
preserve_empty_lines=args.preserve_empty_lines,
preserve_punctuation=args.preserve_punctuation,
punctuation_marks=args.punctuation_marks,
with_stress=args.with_stress,
tie=args.tie,
language_switch=args.language_switch,
words_mismatch=args.words_mismatch,
njobs=args.njobs,
logger=log)
if out and input_output_separator:
streamout.write(
os.linesep.join(
f'{line[0]} {input_output_separator} {line[1]}'
for line in out)
+ os.linesep)
elif out:
streamout.write(os.linesep.join(out) + os.linesep)
if __name__ == '__main__': # pragma: nocover
main()
@@ -0,0 +1,328 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides the phonemize function
To use it in your own code, type:
from phonemizer import phonemize
"""
import os
import sys
from logging import Logger
from typing import Optional, Union, List, Pattern
from typing_extensions import Literal
from phonemizer.backend import BACKENDS
from phonemizer.backend.base import BaseBackend
from phonemizer.backend.espeak.language_switch import LanguageSwitch
from phonemizer.backend.espeak.words_mismatch import WordMismatch
from phonemizer.logger import get_logger
from phonemizer.punctuation import Punctuation
from phonemizer.separator import default_separator, Separator
from phonemizer.utils import list2str, str2list
Backend = Literal['espeak', 'espeak-mbrola', 'festival', 'segments']
def phonemize( # pylint: disable=too-many-arguments
text,
language: str = 'en-us',
backend: Backend = 'espeak',
separator: Optional[Separator] = default_separator,
strip: bool = False,
prepend_text: bool = False,
preserve_empty_lines: bool = False,
preserve_punctuation: bool = False,
punctuation_marks: Union[str, Pattern] = Punctuation.default_marks(),
with_stress: bool = False,
tie: Union[bool, str] = False,
language_switch: LanguageSwitch = 'keep-flags',
words_mismatch: WordMismatch = 'ignore',
njobs: int = 1,
logger: Logger = get_logger()):
"""Multilingual text to phonemes converter
Return a phonemized version of an input `text`, given its `language` and a
phonemization `backend`.
Note
----
To improve the processing speed it is better to minimize the calls to this
function: provide the input text as a list and call phonemize() a single
time is much more efficient than calling it on each element of the list.
Indeed the initialization of the phonemization backend can be expensive,
especially for espeak. In one example,
Do this:
>>> text = [line1, line2, ...]
>>> phonemize(text, ...)
Not this:
>>> for line in text:
>>> phonemize(line, ...)
Parameters
----------
text: str or list of str
The text to be phonemized. Any empty line will
be ignored. If ``text`` is an str, it can be multiline (lines being
separated by ``\\n``). If ``text`` is a list, each element is considered as a
separated line. Each line is considered as a text utterance.
language: str
The language code of the input text, must be supported by
the backend. If ``backend`` is 'segments', the language can be a file with
a grapheme to phoneme mapping.
backend: str, optional
The software backend to use for phonemization,
must be 'festival' (US English only is supported, coded 'en-us'),
'espeak', 'espeak-mbrola' or 'segments'.
separator: Separator
string separators between phonemes, syllables and
words, default to separator.default_separator. Syllable separator is
considered only for the festival backend. Word separator is ignored by
the 'espeak-mbrola' backend. Initialize it as follows:
>>> from phonemizer.separator import Separator
>>> separator = Separator(phone='-', word=' ')
strip: bool, optional
If True, don't output the last word and phone
separators of a token, default to False.
prepend_text: bool, optional
When True, returns a pair (input utterance,
phonemized utterance) for each line of the input text. When False,
returns only the phonemized utterances. Default to False
preserve_empty_lines: bool, optional
When True, will keep the empty lines
in the phonemized output. Default to False and remove all empty lines.
preserve_punctuation: bool, optional
When True, will keep the punctuation
in the phonemized output. Not supported by the 'espeak-mbrola' backend.
Default to False and remove all the punctuation.
punctuation_marks: str or re.Pattern, optional
The punctuation marks to consider when dealing with punctuation,
either for removal or preservation. Can be defined as a string or regular expression.
Default to Punctuation.default_marks().
with_stress: bool, optional
This option is only valid for the 'espeak'
backend. When True the stresses on phonemes are present (stresses
characters are ˈ'ˌ). When False stresses are removed. Default to False.
tie: bool or char, optional
This option is only valid for the 'espeak'
backend with espeak>=1.49. It is incompatible with phone separator. When
not False, use a tie character within multi-letter phoneme names. When
True, the char 'U+361' is used (as in d͡ʒ), 'z' means ZWJ character,
default to False.
language_switch: str, optional
Espeak can output some words in another
language (typically English) when phonemizing a text. This option setups
the policy to use when such a language switch occurs. Three values are
available : 'keep-flags' (the default), 'remove-flags' or
'remove-utterance'. The 'keep-flags' policy keeps the language switching
flags, for example "(en) or (jp)", in the output. The 'remove-flags'
policy removes them and the 'remove-utterance' policy removes the whole
line of text including a language switch. This option is only valid for
the 'espeak' backend.
words_mismatch: str, optional
Espeak can join two consecutive words or
drop some words, yielding a word count mismatch between orthographic and
phonemized text. This option setups the policy to use when such a words
count mismatch occurs. Three values are available: 'ignore' (the default)
which do nothing, 'warn' which issue a warning for each mismatched line,
and 'remove' which remove the mismatched lines from the output.
njobs: int
The number of parallel jobs to launch. The input text is split
in ``njobs`` parts, phonemized on parallel instances of the backend and the
outputs are finally collapsed.
logger: logging.Logger
the logging instance where to send messages. If
not specified, use the default system logger.
Returns
-------
phonemized text: str or list of str
The input ``text`` phonemized for the
given ``language`` and ``backend``. The returned value has the same type of
the input text (either a list or a string), excepted if ``prepend_input``
is True where the output is forced as a list of pairs (input_text,
phonemized text).
Raises
------
RuntimeError
if the ``backend`` is not valid or is valid but not installed,
if the ``language`` is not supported by the ``backend``, if any incompatible options are used.
"""
# ensure we are using a compatible Python version
if sys.version_info < (3, 6): # pragma: nocover
logger.error(
'Your are using python-%s which is unsupported by the phonemizer, '
'please update to python>=3.6', ".".join(sys.version_info))
# ensure the arguments are valid
_check_arguments(
backend, with_stress, tie, separator, language_switch, words_mismatch)
# preserve_punctuation and word separator not valid for espeak-mbrola
if backend == 'espeak-mbrola' and preserve_punctuation:
logger.warning('espeak-mbrola backend cannot preserve punctuation')
if backend == 'espeak-mbrola' and separator.word:
logger.warning('espeak-mbrola backend cannot preserve word separation')
# initialize the phonemization backend
if backend == 'espeak':
phonemizer = BACKENDS[backend](
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
with_stress=with_stress,
tie=tie,
language_switch=language_switch,
words_mismatch=words_mismatch,
logger=logger)
elif backend == 'espeak-mbrola':
phonemizer = BACKENDS[backend](
language,
logger=logger)
else: # festival or segments
phonemizer = BACKENDS[backend](
language,
punctuation_marks=punctuation_marks,
preserve_punctuation=preserve_punctuation,
logger=logger)
# do the phonemization
return _phonemize(phonemizer, text, separator, strip, njobs, prepend_text, preserve_empty_lines)
def _check_arguments( # pylint: disable=too-many-arguments
backend: Backend,
with_stress: bool,
tie: Union[bool, str],
separator: Separator,
language_switch: LanguageSwitch,
words_mismatch: WordMismatch):
"""Auxiliary function to phonemize()
Ensures the parameters are compatible with each other, raises a
RuntimeError the first encountered error.
"""
# ensure the backend is either espeak, festival or segments
if backend not in ('espeak', 'espeak-mbrola', 'festival', 'segments'):
raise RuntimeError(
'{} is not a supported backend, choose in {}.'
.format(backend, ', '.join(
('espeak', 'espeak-mbrola', 'festival', 'segments'))))
# with_stress option only valid for espeak
if with_stress and backend != 'espeak':
raise RuntimeError(
'the "with_stress" option is available for espeak backend only, '
'but you are using {} backend'.format(backend))
# tie option only valid for espeak
if tie and backend != 'espeak':
raise RuntimeError(
'the "tie" option is available for espeak backend only, '
'but you are using {} backend'.format(backend))
# tie option incompatible with phone separator
if tie and separator.phone:
raise RuntimeError(
'the "tie" option is incompatible with phone separator '
f'(which is "{separator.phone}")')
# language_switch option only valid for espeak
if language_switch != 'keep-flags' and backend != 'espeak':
raise RuntimeError(
'the "language_switch" option is available for espeak backend '
'only, but you are using {} backend'.format(backend))
# words_mismatch option only valid for espeak
if words_mismatch != 'ignore' and backend != 'espeak':
raise RuntimeError(
'the "words_mismatch" option is available for espeak backend '
'only, but you are using {} backend'.format(backend))
def _phonemize( # pylint: disable=too-many-arguments
backend: BaseBackend,
text: Union[str, List[str]],
separator: Separator,
strip: bool,
njobs: int,
prepend_text: bool,
preserve_empty_lines: bool):
"""Auxiliary function to phonemize()
Does the phonemization and returns the phonemized text. Raises a
RuntimeError on error.
"""
# remember the text type for output (either list or string)
text_type = type(text)
# force the text as a list
text = [line.strip(os.linesep) for line in str2list(text)]
# if preserving empty lines, note the index of each empty line
if preserve_empty_lines:
empty_lines = [n for n, line in enumerate(text) if not line.strip()]
# ignore empty lines
text = [line for line in text if line.strip()]
if (text):
# phonemize the text
phonemized = backend.phonemize(
text, separator=separator, strip=strip, njobs=njobs)
else:
phonemized = []
# if preserving empty lines, reinsert them into text and phonemized lists
if preserve_empty_lines:
for i in empty_lines: # noqa
if prepend_text:
text.insert(i, '')
phonemized.insert(i, '')
# at that point, the phonemized text is a list of str. Format it as
# expected by the parameters
if prepend_text:
return list(zip(text, phonemized))
if text_type == str:
return list2str(phonemized)
return phonemized
@@ -0,0 +1,220 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Implementation of punctuation processing"""
import collections
import re
from typing import List, Union, Tuple, Pattern
from phonemizer.utils import str2list
from phonemizer.separator import Separator
# The punctuation marks considered by default.
_DEFAULT_MARKS = ';:,.!?¡¿—…"«»“”(){}[]'
_MarkIndex = collections.namedtuple(
'_mark_index', ['index', 'mark', 'position'])
class Punctuation:
"""Preserve or remove the punctuation during phonemization
Backends behave differently with punctuation: festival and espeak ignore it
and remove it silently whereas segments will raise an error. The
Punctuation class solves that issue by "hiding" the punctuation to the
phonemization backend and restoring it afterwards.
Parameters
----------
marks (str or re.Pattern) : The punctuation marks to consider for processing
(either removal or preservation). If a string, each mark must be made of
a single character. Default to Punctuation.default_marks().
"""
def __init__(self, marks: Union[str, Pattern] = _DEFAULT_MARKS):
self._marks: str = None # noqa
self._marks_re: Pattern[str] = None # noqa
self.marks = marks
@staticmethod
def default_marks():
"""Returns the default punctuation marks as a string"""
return _DEFAULT_MARKS
@property
def marks(self):
"""The punctuation marks as a string"""
if self._marks:
return self._marks
raise ValueError('punctuation initialized from regex, cannot access marks as a string')
@marks.setter
def marks(self, value: Union[str, Pattern]):
if isinstance(value, Pattern):
# catch the pattern surrounded by zero or more spaces on either side
self._marks_re = re.compile(r'((' + value.pattern + r')|\s)+')
self._marks = None
elif isinstance(value, str):
self._marks = ''.join(set(value))
# catching all the marks in one regular expression: zero or more spaces
# + one or more marks + zero or more spaces.
self._marks_re = re.compile(fr'(\s*[{re.escape(self._marks)}]+\s*)+')
else:
raise ValueError('punctuation marks must be defined as a string or re.Pattern')
def remove(self, text: Union[str, List[str]]) -> Union[str, List[str]]:
"""Returns the `text` with all punctuation marks replaced by spaces
The input `text` can be a string or a list and is returned with the
same type and punctuation removed.
"""
def aux(text: str) -> str:
return re.sub(self._marks_re, ' ', text).strip()
if isinstance(text, str):
return aux(text)
return [aux(line) for line in text]
def preserve(self, text: Union[List[str], str]) -> Tuple[List[List[str]], List[_MarkIndex]]:
"""Removes punctuation from `text`, allowing for furter restoration
This method returns the text as a list of punctuated chunks, along with
a list of punctuation marks for furter restoration:
'hello, my world!' -> ['hello', 'my world'], [',', '!']
"""
text: List[str] = str2list(text)
preserved_text = []
preserved_marks = []
for num, line in enumerate(text):
line, marks = self._preserve_line(line, num)
preserved_text += line
preserved_marks += marks
return [line for line in preserved_text if line], preserved_marks
def _preserve_line(self, line: str, num: int) -> Tuple[List[str], List[_MarkIndex]]:
"""Auxiliary method for Punctuation.preserve()"""
matches = list(re.finditer(self._marks_re, line))
if not matches:
return [line], []
# the line is made only of punctuation marks
if len(matches) == 1 and matches[0].group() == line:
return [], [_MarkIndex(num, line, 'A')]
# build the list of mark indexes required to restore the punctuation
marks = []
for match in matches:
# find the position of the punctuation mark in the utterance:
# begin (B), end (E), in the middle (I) or alone (A)
position = 'I'
if match == matches[0] and line.startswith(match.group()):
position = 'B'
elif match == matches[-1] and line.endswith(match.group()):
position = 'E'
marks.append(_MarkIndex(num, match.group(), position))
# split the line into sublines, each separated by a punctuation mark
preserved_line = []
for mark in marks:
split = line.split(mark.mark)
prefix, suffix = split[0], mark.mark.join(split[1:])
preserved_line.append(prefix)
line = suffix
# append any trailing text to the preserved line
return preserved_line + [line], marks
@classmethod
def restore(cls, text: Union[str, List[str]],
marks: List[_MarkIndex],
sep: Separator,
strip: bool) -> List[str]:
"""Restore punctuation in a text.
This is the reverse operation of Punctuation.preserve(). It takes a
list of punctuated chunks and a list of punctuation marks, as well as
the separator and strip parameters used by phonemize. It returns the
punctuated text as a list:
['hello', 'my world'], [',', '!'] -> ['hello, my world!']
"""
text = str2list(text)
punctuated_text = []
pos = 0
while text or marks:
if not marks:
for line in text:
# if strip is False, ensure the final word ends with a word separator
if not strip and sep.word and not line.endswith(sep.word):
line = line + sep.word
punctuated_text.append(line)
text = []
elif not text:
# nothing has been phonemized, returns the marks alone, with internal
# spaces replaced by the word separator
punctuated_text.append(re.sub(' ', sep.word, ''.join(m.mark for m in marks)))
marks = []
else:
current_mark = marks[0]
if current_mark.index == pos:
# place the current mark here
mark = marks[0]
marks = marks[1:]
# replace internal spaces in the current mark with the word separator
mark = re.sub(' ', sep.word, mark.mark)
# remove the word last separator from the current word
if sep.word and text[0].endswith(sep.word):
text[0] = text[0][:-len(sep.word)]
if current_mark.position == 'B':
text[0] = mark + text[0]
elif current_mark.position == 'E':
punctuated_text.append(text[0] + mark + ('' if strip or mark.endswith(sep.word) else sep.word))
text = text[1:]
pos = pos + 1
elif current_mark.position == 'A':
punctuated_text.append(mark + ('' if strip or mark.endswith(sep.word) else sep.word))
pos = pos + 1
else:
# position == 'I'
if len(text) == 1: # pragma: nocover
# a corner case where the final part of an intermediate
# mark (I) has not been phonemized
text[0] = text[0] + mark
else:
first_word = text[0]
text = text[1:]
text[0] = first_word + mark + text[0]
else:
punctuated_text.append(text[0])
text = text[1:]
pos = pos + 1
return punctuated_text
@@ -0,0 +1,118 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides the Separator tuple and its default value"""
from typing import Optional, Union
class Separator:
"""Defines phone, syllable and word boundary tokens"""
def __init__(self, word: str = ' ',
syllable: Optional[str] = None,
phone: Optional[str] = None):
# check we have different separators, None excluded
sep1 = list(sep for sep in (phone, syllable, word) if sep)
sep2 = set(sep for sep in (phone, syllable, word) if sep)
if len(sep1) != len(sep2):
raise ValueError(
'illegal separator with word="{}", syllable="{}" and '
'phone="{}", must be all differents if not empty'
.format(phone, syllable, word))
self._phone = str(phone) if phone else ''
self._syllable = str(syllable) if syllable else ''
self._word = str(word) if word else ''
def __eq__(self, other: 'Separator'):
return (
self.phone == other.phone
and self.syllable == other.syllable
and self.word == other.word)
def __str__(self):
return (
f'(phone: "{self.phone}", '
f'syllable: "{self.syllable}", '
f'word: "{self.word}")')
@property
def phone(self):
"""Phones separator"""
return self._phone
@property
def syllable(self):
"""Syllables separator"""
return self._syllable
@property
def word(self):
"""Words separator"""
return self._word
def __contains__(self, value: str):
"""Returns True if the separator has `value` as token separation"""
return value in {self.phone, self.syllable, self.word}
def input_output_separator(self, field_separator: Union[str, bool]) \
-> Union[str, bool]:
"""Returns a suitable input/output separator based on token separator
The input/output separator split orthographic and phonetic texts when
using the --prepend-text option from command-line.
Parameters
----------
field_separator: bool or str
If str, ensures it's value is not
already defined as a token separator. If True choose one of "|",
"||", "|||", "||||" (the first one that is not defined as a token
separator)
Returns
-------
The input/output separator, or False if ``field_separator`` is False
Raises
------
RuntimeError
if ``field_separator`` is a str but is already registered as token separator
"""
if not field_separator:
return False
if isinstance(field_separator, str):
if field_separator in self:
raise RuntimeError(
f'cannot prepend input with "{field_separator}" because '
f'it is already a token separator: {self}')
return field_separator
if field_separator is True:
field_separator = '|'
while field_separator in self:
field_separator += '|'
return field_separator
# not a bool nor a str
raise RuntimeError(
'invalid input/output separator, must be bool or str but is'
f'{field_separator}')
default_separator = Separator(phone='', syllable='', word=' ')
"""The default separation characters for phonemes, syllables and words"""
@@ -0,0 +1,30 @@
;; Copyright 2015-2021 Mathieu Bernard
;;
;; This file is part of phonemizer: you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;;
;; Phonemizer is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
;; This script is executed by festival for English text phonemization.
(define (phonemize line)
"(phonemize LINE)
Extract the phonemes of the string LINE as a tree and write it to stdout."
(set! utterance (eval (list 'Utterance 'Text line)))
(utt.synth utterance)
;; Use of print instead of pprintf to have each utterance on one line
(print (utt.relation_tree utterance "SylStructure")))
;; This double braket have to be replaced by the name of the text file
;; you want to read data from. To be parsed by festival as a unique
;; utterance, each line of that file must begin and end with
;; double-quotes.
(set! lines (load "{}" t))
(mapcar (lambda (line) (phonemize line)) lines)
@@ -0,0 +1,27 @@
a ʌ
â aː
b b
ch tʃ
d d
e eː
f f
g g
h h
i ɪ
î iː
j dʒ
k k
kw kʷ
l l
m m
n n
o ʊ
p p
s s
sh ʃ
t t
th θ
u ʊ
û o
w w
y j
@@ -0,0 +1,27 @@
a ʌ
â aː
b b
ch tʃ
d d
e eː
f f
g g
h h
i ɪ
î iː
j dʒ
k k
kw kʷ
l l
m m
n n
o ʊ
p p
s s
sh ʃ
t t
th θ
u ʊ
û o
w w
y j
@@ -0,0 +1,20 @@
a a
g g
h h
i i
j j
k k
l l
ll ɬ
m m
n n
ng ŋ
nng ŋŋ
p p
q q
r ʁ
rng ɴ
s s
t t
u u
v v
@@ -0,0 +1,35 @@
a a
aa aː
b b
by bʲ
ch tʃ
d d
e e
ee eː
f ɸ
g g
gy gʲ
h h
hy ç
i i
j dʒ
k k
ky kʲ
m m
my mʲ
n n
ny ɲ
o o
oo oː
p p
py pʲ
r r
ry rʲ
sh ʃ
t t
ts t͡s
u ɯ
uu ɯː
w w
y j
z z
@@ -0,0 +1,38 @@
a a
b b
ch tʃʰ
d d
e e
f f
g χ
h h
hl ɬ
i i
j dʒ
k k
kg kx
kh kʰ
l l
m m
n n
ng ŋ
nq ǃ̃
ny ɲ
o o
p t
ph pʰ
q ǃ
qh ǃʰ
r r
s s
sh ʃ
t t
th tʰ
tj tʃ
tl tɬ
tlh tɬʰ
ts t͡s
tsh t͡sʰ
u u
w w
y j
@@ -0,0 +1,45 @@
a a
aa aː
aʼ
aʼa a̰ː
b b
ch t̠͡ʃ
chʼ t̠͡ʃʼ
e e
ee eː
eʼ
eʼe ḛː
f f
h h
i i
ii iː
iʼ
iʼi ḭː
j x
k k
kʼ kʼ
l l
m m
n n
ñ n
o o
oo oː
oʼ
oʼo o̰ː
p pʼ
pʼ pʼ
qu k
r r
s s
x ʃ
t t
ts t͡s
tsʼ t͡sʼ
tʼ tʼ
u u
uu uː
uʼ
uʼu ṵː
w w
y j
z s
@@ -0,0 +1,131 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides utility functions for the phonemizer"""
import os
from numbers import Number
from pathlib import Path
from typing import Union, List, Tuple, Iterable
import importlib
def cumsum(iterable: Iterable[Number]) -> List[Number]:
"""Returns the cumulative sum of the `iterable` as a list"""
res = []
cumulative = 0
for value in iterable:
cumulative += value
res.append(cumulative)
return res
def str2list(text: Union[str, List[str]]) -> List[str]:
"""Returns the string `text` as a list of lines, split by \n"""
if isinstance(text, str):
return text.strip(os.linesep).split(os.linesep)
return text
def list2str(text: Union[str, List[str]]) -> str:
"""Returns the list of lines `text` as a single string separated by \n"""
if isinstance(text, str):
return text
return os.linesep.join(text)
def chunks(text: Union[str, List[str]], num: int) \
-> Tuple[List[List[str]], List[int]]:
"""Return a maximum of `num` equally sized chunks of a `text`
This method is usefull when phonemizing a single text on multiple jobs.
The exact number of chunks returned is `m = min(num, len(str2list(text)))`.
Only the m-1 first chunks have equal size. The last chunk can be longer.
The input `text` can be a list or a string. Return a list of `m` strings.
Parameters
----------
text (str or list) : The text to divide in chunks
num (int) : The number of chunks to build, must be a strictly positive
integer.
Returns
-------
chunks (list of list of str) : The chunked text with utterances separated
by '\n'.
offsets (list of int) : offset used below to recover the line numbers in
the input text wrt the chunks
"""
text: List[str] = str2list(text)
size = int(max(1, len(text) / num)) # noqa
nchunks = min(num, len(text))
text_chunks = [
text[i * size:(i + 1) * size] for i in range(nchunks - 1)]
last = text[(nchunks - 1) * size:]
if last:
text_chunks.append(last)
offsets = [0] + cumsum((len(c) for c in text_chunks[:-1]))
return text_chunks, offsets
def get_package_resource(path: str) -> Path:
"""Returns the absolute path to a phonemizer resource file or directory
The packages resource are stored within the source tree in the
'phonemizer/share' directory and, once the package is installed, are moved
to another system directory (e.g. /share/phonemizer).
Parameters
----------
path (str) : the file or directory to get, must be relative to
'phonemizer/share'.
Raises
------
ValueError if the required `path` is not found
Returns
-------
The absolute path to the required resource as a `pathlib.Path`
"""
try:
# new in python-3.9
path = importlib.resources.files('phonemizer') / 'share' / path
except AttributeError: # pragma: nocover
with importlib.resources.path('phonemizer', 'share') as share:
path = share / path
if not path.exists(): # pragma: nocover
raise ValueError(f'the requested resource does not exist: {path}')
return path.resolve()
def version_as_tuple(version: str) -> Tuple[int, ...]:
"""Returns a tuple of integers from a version string
Any '-dev' in version string is ignored. For instance, returns (1, 2, 3)
from '1.2.3' or (0, 2) from '0.2-dev'
"""
return tuple(int(v) for v in version.replace('-dev', '').split('.'))
@@ -0,0 +1,67 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Phonemizer version description"""
import importlib
from phonemizer.backend import (
EspeakBackend, EspeakMbrolaBackend, FestivalBackend, SegmentsBackend)
def _version_as_str(vers):
"""From (1, 49, 3) to '1.49.3'"""
return '.'.join(str(v) for v in vers)
def version():
"""Return version information for front and backends"""
# version of the phonemizer
_version = 'phonemizer-' + importlib.metadata.version('phonemizer')
# for each backend, check if it is available or not. If so get its version
available = []
unavailable = []
if EspeakBackend.is_available():
available.append(
'espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '')
+ _version_as_str(EspeakBackend.version()))
else: # pragma: nocover
unavailable.append('espeak')
if EspeakMbrolaBackend.is_available():
available.append('espeak-mbrola')
else: # pragma: nocover
unavailable.append('espeak-mbrola')
if FestivalBackend.is_available():
available.append(
'festival-' + _version_as_str(FestivalBackend.version()))
else: # pragma: nocover
unavailable.append('festival')
if SegmentsBackend.is_available():
available.append(
'segments-' + _version_as_str(SegmentsBackend.version()))
else: # pragma: nocover
unavailable.append('segments')
# resumes the backends status in the final version string
if available:
_version += '\navailable backends: ' + ', '.join(available)
if unavailable: # pragma: nocover
_version += '\nuninstalled backends: ' + ', '.join(unavailable)
return _version