#!/usr/bin/env python
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see .
"""Command-line phonemizer tool, have a 'phonemizer --help' to get in"""
import argparse
import os
import sys
import re
from phonemizer import phonemize, separator, version, logger, punctuation
from phonemizer.backend import BACKENDS
class CatchExceptions: # pragma: nocover
"""Decorator wrapping a function in a try/except block
When an exception occurs, display a user friendly message on
standard output before exiting with error code 1.
The detected exceptions are ValueError, OSError, RuntimeError,
AssertionError and KeyboardInterrupt.
Parameters
----------
function :
The function to wrap in a try/except block
"""
def __init__(self, function):
self.function = function
def __call__(self):
"""Executes the wrapped function and catch common exceptions"""
try:
self.function()
except (IOError, ValueError, OSError,
RuntimeError, AssertionError) as err:
self.exit('fatal error: {}'.format(err))
except KeyboardInterrupt:
self.exit('keyboard interruption, exiting')
@staticmethod
def exit(msg):
"""Write `msg` on stderr and exit with error code 1"""
sys.stderr.write(msg.strip() + '\n')
sys.exit(1)
def parse_args():
"""Argument parser for the phonemization script"""
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='''Multilingual text to phonemes converter
The 'phonemize' program allows simple phonemization of words and texts
in many language using four backends: espeak, espeak-mbrola, festival
and segments.
- espeak is a text-to-speech software supporting multiple languages
and IPA (International Phonetic Alphabet) output. See
http://espeak.sourceforge.net or
https://github.com/espeak-ng/espeak-ng
- espeak-mbrola uses the SAMPA phonetic alphabet, it requires mbrola to be
installed as well as additional mbrola voices. It does not support word or
syllable tokenization. See
https://github.com/espeak-ng/espeak-ng/blob/master/docs/mbrola.md
- festival is also a text-to-speech software. Currently only American
English is supported and festival uses a custom phoneset
(http://www.festvox.org/bsv/c4711.html), but festival is the only
backend supporting tokenization at the syllable
level. See http://www.cstr.ed.ac.uk/projects/festival
- segments is a Unicode tokenizer that build a phonemization from a
grapheme to phoneme mapping provided as a file by the user. See
https://github.com/cldf/segments.
See the '--list-languages' option below for details on the languages
supported by each backend.
''',
epilog='''
Examples:
* Phonemize a US English text with espeak
$ echo 'hello world' | phonemize -l en-us -b espeak
həloʊ wɜːld
* Phonemize a US English text with festival
$ echo 'hello world' | phonemize -l en-us -b festival
hhaxlow werld
* Phonemize a Japanese text with segments
$ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
konnitʃiwa t͡sekai
* Add a separator between phones
$ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
hh-ax-l-ow w-er-l-d
* Phonemize some French text file using espeak
$ phonemize -l fr-fr -b espeak text.txt -o phones.txt
''')
# general arguments
parser.add_argument(
'-V', '--version',
action='store_true',
help='show version information and exit.')
group = parser.add_mutually_exclusive_group()
group.add_argument(
'-v', '--verbose',
action='store_true',
help='write all log messages to stderr '
'(displays only warnings by default).')
group.add_argument(
'-q', '--quiet',
action='store_true',
help='do not display any log message, even warnings.')
parser.add_argument(
'-j', '--njobs',
type=int, metavar='', default=1,
help='number of parallel jobs, default is %(default)s.')
# input/output arguments
group = parser.add_argument_group('input/output')
group.add_argument(
'input',
default=sys.stdin, nargs='?', metavar='',
help='input text file to phonemize, if not specified read from stdin.')
group.add_argument(
'-o', '--output',
default=sys.stdout, metavar='',
help='output text file to write, if not specified write to stdout.')
group.add_argument(
'--prepend-text',
default=False, const=True, nargs='?', metavar='',
help='''prepend each line of the phonemized output text with its
matching input text. If a string is specified as option value, use it
as field separator, else use one of "|", "||", "|||", "||||" by
selecting the first one that is not configured as a token separator
(see -p/-s/-w options).''')
group.add_argument(
'--preserve-empty-lines',
action='store_true',
help='''preserve the empty lines in the phonemized output, default is
to remove them.''')
group = parser.add_argument_group('backends')
group.add_argument(
'-b', '--backend',
metavar='', default=None,
choices=['espeak', 'espeak-mbrola', 'festival', 'segments'],
help="""the phonemization backend, must be 'espeak', 'espeak-mbrola',
'festival' or 'segments'. Default is 'espeak'.""")
group.add_argument(
'-L', '--list-languages',
action='store_true',
help="""list available languages (and exit) for the specified backend,
or for all backends if none selected.""")
group = parser.add_argument_group('language')
group.add_argument(
'-l', '--language',
metavar='', default='en-us',
help='''the language code of the input text, use '--list-languages'
for a list of supported languages. Default is %(default)s.''')
group = parser.add_argument_group('token separators')
group.add_argument(
'-p', '--phone-separator',
metavar='', default=separator.default_separator.phone,
help='phone separator, default is "%(default)s".')
group.add_argument(
'-w', '--word-separator',
metavar='', default=separator.default_separator.word,
help='''word separator, not valid for espeak-mbrola backend,
default is "%(default)s".''')
group.add_argument(
'-s', '--syllable-separator',
metavar='', default=separator.default_separator.syllable,
help='''syllable separator, only valid for festival backend,
this option has no effect if another backend is used.
Default is "%(default)s".''')
group.add_argument(
'--strip',
action='store_true',
help='removes the end separators in phonemized tokens.')
group = parser.add_argument_group('specific to espeak backend')
try:
espeak_library = BACKENDS['espeak'].library()
except RuntimeError: # pragma: nocover
espeak_library = None
group.add_argument(
'--espeak-library',
default=None, type=str, metavar='',
help=f'''the path to the espeak shared library to use (*.so on Linux,
*.dylib on Mac and *.dll on Windows, useful to overload the default
espeak version installed on the system). Default to
{espeak_library}. This path can also be specified
using the PHONEMIZER_ESPEAK_LIBRARY environment variable.''')
group.add_argument(
'--tie',
nargs='?', default=False, const=True, metavar='',
help='''when the option is set, use a tie character within multi-letter
phoneme names, default to U+361 (as in d͡ʒ), 'z' means ZWJ character,
only compatible with espeak>1.48 and incompatible with the
-p/--phone-separator option''')
group.add_argument(
'--with-stress',
action='store_true',
help='''when the option is set, the stresses on phonemes are present
(stresses characters are ˈ'ˌ). By default stresses are removed.''')
group.add_argument(
'--language-switch',
default='keep-flags',
choices=['keep-flags', 'remove-flags', 'remove-utterance'],
help="""espeak can pronounce some words in another language (typically
English) when phonemizing a text. This option setups the policy to use
when such a language switch occurs. Three values are available:
'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
'keep-flags' policy keeps the language switching flags, for example
(en) or (jp), in the output. The 'remove-flags' policy removes them and
the 'remove-utterance' policy removes the whole line of text including
a language switch.""")
group.add_argument(
'--words-mismatch',
default='ignore', choices=['ignore', 'warn', 'remove'],
help="""espeak can join two consecutive words or drop some words,
yielding a word count mismatch between orthographic and phonemized
text. This option setups the policy to use when such a words count
mismatch occurs. Three values are available: 'ignore' (the default)
which do nothing, 'warn' which issue a warning for each mismatched
line, and 'remove' which remove the mismatched lines from the
output.""")
group = parser.add_argument_group('specific to festival backend')
try:
festival_executable = BACKENDS['festival'].executable()
except RuntimeError: # pragma: nocover
festival_executable = None
group.add_argument(
'--festival-executable',
default=None, type=str, metavar='',
help=f'''the path to the festival executable to use (useful to
overload the default festival installed on the system). Default to
{festival_executable}. This path can also be specified using the
PHONEMIZER_FESTIVAL_EXECUTABLE environment variable.''')
group = parser.add_argument_group(
'punctuation processing',
description='not available for espeak-mbrola backend')
group.add_argument(
'--preserve-punctuation',
action='store_true',
help='''preserve the punctuation marks in the phonemized output,
default is to remove them.''')
group.add_argument(
'--punctuation-marks',
type=str, metavar='',
default=punctuation.Punctuation.default_marks(),
help='''the marks to consider during punctuation processing (either
for removal or preservation). Default is %(default)s.''')
group.add_argument(
'--punctuation-marks-is-regex',
action='store_true',
help="""interpret the '--punctuation-marks' parameter as a regex.
Default is to interpret as a string.""")
return parser.parse_args()
def list_languages(args_backend):
"""Returns the available languages for the given `backend` as a str"""
for backend in BACKENDS.keys() if not args_backend else [args_backend]:
print(
f'supported languages for {backend} are:\n' +
'\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted(
BACKENDS[backend].supported_languages().items())))
def get_logger(verbose, quiet):
"""Returns a configured logger"""
verbosity = 'normal'
if verbose:
verbosity = 'verbose'
elif quiet:
verbosity = 'quiet'
return logger.get_logger(verbosity=verbosity)
def setup_stream(stream, mode):
"""If `stream` is a filename, open it as a file"""
if isinstance(stream, str):
# pylint: disable=consider-using-with
return open(stream, mode, encoding='utf8')
return stream # pragma: nocover
@CatchExceptions
def main():
"""Phonemize a text from command-line arguments"""
args = parse_args()
# setup a custom path to espeak and festival if required (this must be done
# before generating the version message)
if args.espeak_library:
BACKENDS['espeak'].set_library(args.espeak_library)
if args.festival_executable:
BACKENDS['festival'].set_executable(args.festival_executable)
# display version information and exit
if args.version:
print(version.version())
return
# list supported languages and exit
if args.list_languages:
print(list_languages(args.backend))
return
# set default backend as espeak if not specified
args.backend = args.backend or 'espeak'
# configure logging according to --verbose/--quiet options
log = get_logger(args.verbose, args.quiet)
# configure input:output as a readable/writable streams
streamin = setup_stream(args.input, 'r')
log.debug('reading from %s', streamin.name)
streamout = setup_stream(args.output, 'w')
log.debug('writing to %s', streamout.name)
# configure the separator for phonemes, syllables and words.
if args.backend == 'espeak-mbrola':
log.debug('using espeak-mbrola backend: ignoring word separator')
sep = separator.Separator(
phone=args.phone_separator,
syllable=None,
word=None)
else:
sep = separator.Separator(
phone=args.phone_separator,
syllable=args.syllable_separator,
word=args.word_separator)
log.debug('separator is %s', sep)
if args.prepend_text:
input_output_separator = sep.input_output_separator(args.prepend_text)
log.debug(
'prepend input text to output, separator is "%s"',
input_output_separator)
else:
input_output_separator = False
if args.punctuation_marks_is_regex:
try:
log.debug('punctuation marks is regex %s', args.punctuation_marks)
args.punctuation_marks = re.compile(args.punctuation_marks)
except re.error:
# manually close the open streams for windows
streamin.close()
streamout.close()
raise ValueError(f"can't compile regex pattern from {args.punctuation_marks}")
# phonemize the input text
out = phonemize(
streamin.readlines(),
language=args.language,
backend=args.backend,
separator=sep,
strip=args.strip,
prepend_text=args.prepend_text,
preserve_empty_lines=args.preserve_empty_lines,
preserve_punctuation=args.preserve_punctuation,
punctuation_marks=args.punctuation_marks,
with_stress=args.with_stress,
tie=args.tie,
language_switch=args.language_switch,
words_mismatch=args.words_mismatch,
njobs=args.njobs,
logger=log)
if out and input_output_separator:
streamout.write(
os.linesep.join(
f'{line[0]} {input_output_separator} {line[1]}'
for line in out)
+ os.linesep)
elif out:
streamout.write(os.linesep.join(out) + os.linesep)
if __name__ == '__main__': # pragma: nocover
main()