# Copyright 2015-2021 Mathieu Bernard # # This file is part of phonemizer: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # Phonemizer is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with phonemizer. If not, see . """Test of the espeak backend""" # pylint: disable=missing-docstring # pylint: disable=redefined-outer-name import os import shutil import pytest from phonemizer.backend import EspeakBackend from phonemizer.backend.espeak.wrapper import EspeakWrapper from phonemizer.separator import Separator, default_separator def test_bad_text(): backend = EspeakBackend('en-us') text = 'hello world' with pytest.raises(RuntimeError) as err: backend.phonemize(text, default_separator, True) assert 'input text to phonemize() is str' in str(err) assert backend.phonemize( [text], default_separator, True) == ['həloʊ wɜːld'] def test_english(): backend = EspeakBackend('en-us') text = ['hello world', 'goodbye', 'third line', 'yet another'] out = backend.phonemize(text, default_separator, True) assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ'] def test_stress(): backend = EspeakBackend('en-us', with_stress=False) assert backend.phonemize( ['hello world'], default_separator, True) == ['həloʊ wɜːld'] backend = EspeakBackend('en-us', with_stress=True) assert backend.phonemize( ['hello world'], default_separator, True) == ['həlˈoʊ wˈɜːld'] def test_french(): backend = EspeakBackend('fr-fr') text = ['bonjour le monde'] sep = Separator(word=';eword ', syllable=None, phone=' ') expected = ['b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword '] out = backend.phonemize(text, sep, False) assert out == expected @pytest.mark.skipif( ( not EspeakBackend.is_espeak_ng() or # Arabic is not supported by the Windows msi installer from espeak-ng # github release not EspeakBackend.is_supported_language('ar')), reason='Arabic is not supported') def test_arabic(): backend = EspeakBackend('ar') text = ['السلام عليكم'] sep = Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if EspeakBackend.version() >= (1, 49, 3): expected = ['ʔassalaːm ʕliːkm '] else: expected = ['ʔassalaam ʕaliijkum '] out = backend.phonemize(text, sep, False) assert out == expected # see https://github.com/bootphon/phonemizer/issues/31 def test_phone_separator_simple(): text = ['The lion and the tiger ran'] sep = Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize(text, separator=sep, strip=True) expected = ['ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n'] assert expected == output output = backend.phonemize(text, separator=sep, strip=False) expected = ['ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ '] assert expected == output @pytest.mark.parametrize( 'text, expected', (('the hello but the', 'ð_ə h_ə_l_oʊ b_ʌ_t ð_ə'), # ('Here there and everywhere', 'h_ɪɹ ð_ɛɹ æ_n_d ɛ_v_ɹ_ɪ_w_ɛɹ'), # ('He was hungry and tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i æ_n_d t_aɪɚ_d'), ('He was hungry but tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i b_ʌ_t t_aɪɚ_d'))) def test_phone_separator(text, expected): sep = Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize([text], separator=sep, strip=True)[0] assert output == expected @pytest.mark.skipif( 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ, reason='cannot modify environment') def test_path_good(): espeak = EspeakBackend.library() try: EspeakBackend.set_library(None) assert espeak == EspeakBackend.library() library = EspeakWrapper().library_path EspeakBackend.set_library(library) test_english() # restore the espeak path to default finally: EspeakBackend.set_library(None) @pytest.mark.skipif( 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ, reason='cannot modify environment') def test_path_bad(): try: # corrupt the default espeak path, try to use python executable instead binary = shutil.which('python') EspeakBackend.set_library(binary) with pytest.raises(RuntimeError): EspeakBackend('en-us') with pytest.raises(RuntimeError): EspeakBackend.version() EspeakBackend.set_library(__file__) with pytest.raises(RuntimeError): EspeakBackend('en-us') # restore the espeak path to default finally: EspeakBackend.set_library(None) @pytest.mark.skipif( 'PHONEMIZER_ESPEAK_LIBRARY' in os.environ, reason='cannot modify environment') def test_path_venv(): try: os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = ( shutil.which('python')) with pytest.raises(RuntimeError): EspeakBackend('en-us').phonemize(['hello']) with pytest.raises(RuntimeError): EspeakBackend.version() os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = __file__ with pytest.raises(RuntimeError): EspeakBackend.version() finally: try: del os.environ['PHONEMIZER_ESPEAK_LIBRARY'] except KeyError: pass @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='tie only compatible with espeak-ng') @pytest.mark.parametrize( 'tie, expected', [ (False, 'dʒ_æ_k_i_ tʃ_æ_n_ '), (True, 'd͡ʒæki t͡ʃæn '), ('8', 'd8ʒæki t8ʃæn ')]) def test_tie_simple(caplog, tie, expected): backend = EspeakBackend('en-us', tie=tie) assert backend.phonemize( ['Jackie Chan'], separator=Separator(word=' ', phone='_'))[0] == expected if tie: messages = [msg[2] for msg in caplog.record_tuples] assert ( 'cannot use ties AND phone separation, ignoring phone separator' in messages) @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='tie only compatible with espeak-ng') def test_tie_utf8(): # NOTE this is a bug in espeak to append ties on (en) language switch # flags. For now phonemizer does not fix it. backend = EspeakBackend('fr-fr', tie=True) # used to be 'bɔ̃͡ʒuʁ ' assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ '] # used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)' assert backend.phonemize( ['tu aimes le football']) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) '] assert backend.phonemize( ['bonjour apple']) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) '] @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='tie only compatible with espeak-ng') def test_tie_bad(): with pytest.raises(RuntimeError): EspeakBackend('en-us', tie='abc')