2025-12-01

This commit is contained in:
2026-03-17 14:58:51 -06:00
parent 183e865f8b
commit 4b82b57113
6846 changed files with 954887 additions and 162606 deletions
@@ -0,0 +1,225 @@
# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Test of the espeak backend"""
# pylint: disable=missing-docstring
# pylint: disable=redefined-outer-name
import os
import shutil
import pytest
from phonemizer.backend import EspeakBackend
from phonemizer.backend.espeak.wrapper import EspeakWrapper
from phonemizer.separator import Separator, default_separator
def test_bad_text():
backend = EspeakBackend('en-us')
text = 'hello world'
with pytest.raises(RuntimeError) as err:
backend.phonemize(text, default_separator, True)
assert 'input text to phonemize() is str' in str(err)
assert backend.phonemize(
[text], default_separator, True) == ['həloʊ wɜːld']
def test_english():
backend = EspeakBackend('en-us')
text = ['hello world', 'goodbye', 'third line', 'yet another']
out = backend.phonemize(text, default_separator, True)
assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ']
def test_stress():
backend = EspeakBackend('en-us', with_stress=False)
assert backend.phonemize(
['hello world'], default_separator, True) == ['həloʊ wɜːld']
backend = EspeakBackend('en-us', with_stress=True)
assert backend.phonemize(
['hello world'], default_separator, True) == ['həlˈoʊ wˈɜːld']
def test_french():
backend = EspeakBackend('fr-fr')
text = ['bonjour le monde']
sep = Separator(word=';eword ', syllable=None, phone=' ')
expected = ['b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword ']
out = backend.phonemize(text, sep, False)
assert out == expected
@pytest.mark.skipif(
(
not EspeakBackend.is_espeak_ng() or
# Arabic is not supported by the Windows msi installer from espeak-ng
# github release
not EspeakBackend.is_supported_language('ar')),
reason='Arabic is not supported')
def test_arabic():
backend = EspeakBackend('ar')
text = ['السلام عليكم']
sep = Separator()
# Arabic seems to have changed starting at espeak-ng-1.49.3
if EspeakBackend.version() >= (1, 49, 3):
expected = ['ʔassalaːm ʕliːkm ']
else:
expected = ['ʔassalaam ʕaliijkum ']
out = backend.phonemize(text, sep, False)
assert out == expected
# see https://github.com/bootphon/phonemizer/issues/31
def test_phone_separator_simple():
text = ['The lion and the tiger ran']
sep = Separator(phone='_')
backend = EspeakBackend('en-us')
output = backend.phonemize(text, separator=sep, strip=True)
expected = ['ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n']
assert expected == output
output = backend.phonemize(text, separator=sep, strip=False)
expected = ['ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ ']
assert expected == output
@pytest.mark.parametrize(
'text, expected',
(('the hello but the', 'ð_ə h_ə_l_oʊ b_ʌ_t ð_ə'),
# ('Here there and everywhere', 'h_ɪɹ ð_ɛɹ æ_n_d ɛ_v_ɹ_ɪ_w_ɛɹ'),
# ('He was hungry and tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i æ_n_d t_aɪɚ_d'),
('He was hungry but tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i b_ʌ_t t_aɪɚ_d')))
def test_phone_separator(text, expected):
sep = Separator(phone='_')
backend = EspeakBackend('en-us')
output = backend.phonemize([text], separator=sep, strip=True)[0]
assert output == expected
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_good():
espeak = EspeakBackend.library()
try:
EspeakBackend.set_library(None)
assert espeak == EspeakBackend.library()
library = EspeakWrapper().library_path
EspeakBackend.set_library(library)
test_english()
# restore the espeak path to default
finally:
EspeakBackend.set_library(None)
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_bad():
try:
# corrupt the default espeak path, try to use python executable instead
binary = shutil.which('python')
EspeakBackend.set_library(binary)
with pytest.raises(RuntimeError):
EspeakBackend('en-us')
with pytest.raises(RuntimeError):
EspeakBackend.version()
EspeakBackend.set_library(__file__)
with pytest.raises(RuntimeError):
EspeakBackend('en-us')
# restore the espeak path to default
finally:
EspeakBackend.set_library(None)
@pytest.mark.skipif(
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
reason='cannot modify environment')
def test_path_venv():
try:
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = (
shutil.which('python'))
with pytest.raises(RuntimeError):
EspeakBackend('en-us').phonemize(['hello'])
with pytest.raises(RuntimeError):
EspeakBackend.version()
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = __file__
with pytest.raises(RuntimeError):
EspeakBackend.version()
finally:
try:
del os.environ['PHONEMIZER_ESPEAK_LIBRARY']
except KeyError:
pass
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
@pytest.mark.parametrize(
'tie, expected', [
(False, 'dʒ_æ_k_i_ tʃ_æ_n_ '),
(True, 'd͡ʒæki t͡ʃæn '),
('8', 'd8ʒæki t8ʃæn ')])
def test_tie_simple(caplog, tie, expected):
backend = EspeakBackend('en-us', tie=tie)
assert backend.phonemize(
['Jackie Chan'],
separator=Separator(word=' ', phone='_'))[0] == expected
if tie:
messages = [msg[2] for msg in caplog.record_tuples]
assert (
'cannot use ties AND phone separation, ignoring phone separator'
in messages)
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
def test_tie_utf8():
# NOTE this is a bug in espeak to append ties on (en) language switch
# flags. For now phonemizer does not fix it.
backend = EspeakBackend('fr-fr', tie=True)
# used to be 'bɔ̃͡ʒuʁ '
assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ ']
# used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)'
assert backend.phonemize(
['tu aimes le football']) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) ']
assert backend.phonemize(
['bonjour apple']) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) ']
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='tie only compatible with espeak-ng')
def test_tie_bad():
with pytest.raises(RuntimeError):
EspeakBackend('en-us', tie='abc')