2025-12-01
This commit is contained in:
@@ -0,0 +1,225 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the espeak backend"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import pytest
|
||||
|
||||
from phonemizer.backend import EspeakBackend
|
||||
from phonemizer.backend.espeak.wrapper import EspeakWrapper
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
|
||||
|
||||
def test_bad_text():
|
||||
backend = EspeakBackend('en-us')
|
||||
text = 'hello world'
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
backend.phonemize(text, default_separator, True)
|
||||
assert 'input text to phonemize() is str' in str(err)
|
||||
|
||||
assert backend.phonemize(
|
||||
[text], default_separator, True) == ['həloʊ wɜːld']
|
||||
|
||||
|
||||
def test_english():
|
||||
backend = EspeakBackend('en-us')
|
||||
text = ['hello world', 'goodbye', 'third line', 'yet another']
|
||||
out = backend.phonemize(text, default_separator, True)
|
||||
assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ']
|
||||
|
||||
|
||||
def test_stress():
|
||||
backend = EspeakBackend('en-us', with_stress=False)
|
||||
assert backend.phonemize(
|
||||
['hello world'], default_separator, True) == ['həloʊ wɜːld']
|
||||
|
||||
backend = EspeakBackend('en-us', with_stress=True)
|
||||
assert backend.phonemize(
|
||||
['hello world'], default_separator, True) == ['həlˈoʊ wˈɜːld']
|
||||
|
||||
|
||||
def test_french():
|
||||
backend = EspeakBackend('fr-fr')
|
||||
text = ['bonjour le monde']
|
||||
sep = Separator(word=';eword ', syllable=None, phone=' ')
|
||||
expected = ['b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword ']
|
||||
out = backend.phonemize(text, sep, False)
|
||||
assert out == expected
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
(
|
||||
not EspeakBackend.is_espeak_ng() or
|
||||
# Arabic is not supported by the Windows msi installer from espeak-ng
|
||||
# github release
|
||||
not EspeakBackend.is_supported_language('ar')),
|
||||
reason='Arabic is not supported')
|
||||
def test_arabic():
|
||||
backend = EspeakBackend('ar')
|
||||
text = ['السلام عليكم']
|
||||
sep = Separator()
|
||||
|
||||
# Arabic seems to have changed starting at espeak-ng-1.49.3
|
||||
if EspeakBackend.version() >= (1, 49, 3):
|
||||
expected = ['ʔassalaːm ʕliːkm ']
|
||||
else:
|
||||
expected = ['ʔassalaam ʕaliijkum ']
|
||||
out = backend.phonemize(text, sep, False)
|
||||
assert out == expected
|
||||
|
||||
|
||||
# see https://github.com/bootphon/phonemizer/issues/31
|
||||
def test_phone_separator_simple():
|
||||
text = ['The lion and the tiger ran']
|
||||
sep = Separator(phone='_')
|
||||
backend = EspeakBackend('en-us')
|
||||
|
||||
output = backend.phonemize(text, separator=sep, strip=True)
|
||||
expected = ['ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n']
|
||||
assert expected == output
|
||||
|
||||
output = backend.phonemize(text, separator=sep, strip=False)
|
||||
expected = ['ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ ']
|
||||
assert expected == output
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'text, expected',
|
||||
(('the hello but the', 'ð_ə h_ə_l_oʊ b_ʌ_t ð_ə'),
|
||||
# ('Here there and everywhere', 'h_ɪɹ ð_ɛɹ æ_n_d ɛ_v_ɹ_ɪ_w_ɛɹ'),
|
||||
# ('He was hungry and tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i æ_n_d t_aɪɚ_d'),
|
||||
('He was hungry but tired.', 'h_iː w_ʌ_z h_ʌ_ŋ_ɡ_ɹ_i b_ʌ_t t_aɪɚ_d')))
|
||||
def test_phone_separator(text, expected):
|
||||
sep = Separator(phone='_')
|
||||
backend = EspeakBackend('en-us')
|
||||
output = backend.phonemize([text], separator=sep, strip=True)[0]
|
||||
assert output == expected
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
|
||||
reason='cannot modify environment')
|
||||
def test_path_good():
|
||||
espeak = EspeakBackend.library()
|
||||
try:
|
||||
EspeakBackend.set_library(None)
|
||||
assert espeak == EspeakBackend.library()
|
||||
|
||||
library = EspeakWrapper().library_path
|
||||
EspeakBackend.set_library(library)
|
||||
|
||||
test_english()
|
||||
|
||||
# restore the espeak path to default
|
||||
finally:
|
||||
EspeakBackend.set_library(None)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
|
||||
reason='cannot modify environment')
|
||||
def test_path_bad():
|
||||
try:
|
||||
# corrupt the default espeak path, try to use python executable instead
|
||||
binary = shutil.which('python')
|
||||
EspeakBackend.set_library(binary)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend('en-us')
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend.version()
|
||||
|
||||
EspeakBackend.set_library(__file__)
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend('en-us')
|
||||
|
||||
# restore the espeak path to default
|
||||
finally:
|
||||
EspeakBackend.set_library(None)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'PHONEMIZER_ESPEAK_LIBRARY' in os.environ,
|
||||
reason='cannot modify environment')
|
||||
def test_path_venv():
|
||||
try:
|
||||
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = (
|
||||
shutil.which('python'))
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend('en-us').phonemize(['hello'])
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend.version()
|
||||
|
||||
os.environ['PHONEMIZER_ESPEAK_LIBRARY'] = __file__
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend.version()
|
||||
|
||||
finally:
|
||||
try:
|
||||
del os.environ['PHONEMIZER_ESPEAK_LIBRARY']
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='tie only compatible with espeak-ng')
|
||||
@pytest.mark.parametrize(
|
||||
'tie, expected', [
|
||||
(False, 'dʒ_æ_k_i_ tʃ_æ_n_ '),
|
||||
(True, 'd͡ʒæki t͡ʃæn '),
|
||||
('8', 'd8ʒæki t8ʃæn ')])
|
||||
def test_tie_simple(caplog, tie, expected):
|
||||
backend = EspeakBackend('en-us', tie=tie)
|
||||
assert backend.phonemize(
|
||||
['Jackie Chan'],
|
||||
separator=Separator(word=' ', phone='_'))[0] == expected
|
||||
|
||||
if tie:
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert (
|
||||
'cannot use ties AND phone separation, ignoring phone separator'
|
||||
in messages)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='tie only compatible with espeak-ng')
|
||||
def test_tie_utf8():
|
||||
# NOTE this is a bug in espeak to append ties on (en) language switch
|
||||
# flags. For now phonemizer does not fix it.
|
||||
backend = EspeakBackend('fr-fr', tie=True)
|
||||
|
||||
# used to be 'bɔ̃͡ʒuʁ '
|
||||
assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ ']
|
||||
|
||||
# used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)'
|
||||
assert backend.phonemize(
|
||||
['tu aimes le football']) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) ']
|
||||
|
||||
assert backend.phonemize(
|
||||
['bonjour apple']) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) ']
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='tie only compatible with espeak-ng')
|
||||
def test_tie_bad():
|
||||
with pytest.raises(RuntimeError):
|
||||
EspeakBackend('en-us', tie='abc')
|
||||
@@ -0,0 +1,142 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the espeak backend language switch processing"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.backend import EspeakBackend
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def langswitch_text():
|
||||
return [
|
||||
"j'aime l'anglais",
|
||||
"j'aime le football",
|
||||
"football",
|
||||
"surtout le real madrid",
|
||||
"n'utilise pas google"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
@pytest.mark.parametrize('njobs', [1, 3])
|
||||
def test_language_switch_keep_flags(caplog, langswitch_text, njobs):
|
||||
backend = EspeakBackend('fr-fr', language_switch='keep-flags')
|
||||
out = backend.phonemize(
|
||||
langswitch_text, separator=Separator(), strip=True, njobs=njobs)
|
||||
assert out == [
|
||||
'ʒɛm lɑ̃ɡlɛ',
|
||||
'ʒɛm lə (en)fʊtbɔːl(fr)',
|
||||
'(en)fʊtbɔːl(fr)',
|
||||
'syʁtu lə (en)ɹiəl(fr) madʁid',
|
||||
'nytiliz pa (en)ɡuːɡəl(fr)']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert (
|
||||
'4 utterances containing language switches on lines 2, 3, 4, 5'
|
||||
in messages)
|
||||
assert (
|
||||
'language switch flags have been kept (applying "keep-flags" policy)'
|
||||
in messages)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
@pytest.mark.parametrize('njobs', [1, 3])
|
||||
def test_language_switch_default(caplog, langswitch_text, njobs):
|
||||
# default behavior is to keep the flags
|
||||
backend = EspeakBackend('fr-fr')
|
||||
out = backend.phonemize(
|
||||
langswitch_text, separator=Separator(), strip=True, njobs=njobs)
|
||||
assert out == [
|
||||
'ʒɛm lɑ̃ɡlɛ',
|
||||
'ʒɛm lə (en)fʊtbɔːl(fr)',
|
||||
'(en)fʊtbɔːl(fr)',
|
||||
'syʁtu lə (en)ɹiəl(fr) madʁid',
|
||||
'nytiliz pa (en)ɡuːɡəl(fr)']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert (
|
||||
'4 utterances containing language switches on lines 2, 3, 4, 5'
|
||||
in messages)
|
||||
assert (
|
||||
'language switch flags have been kept (applying "keep-flags" policy)'
|
||||
in messages)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
@pytest.mark.parametrize('njobs', [1, 3])
|
||||
def test_language_switch_remove_flags(caplog, langswitch_text, njobs):
|
||||
backend = EspeakBackend('fr-fr', language_switch='remove-flags')
|
||||
out = backend.phonemize(
|
||||
langswitch_text, separator=Separator(), strip=True, njobs=njobs)
|
||||
assert out == [
|
||||
'ʒɛm lɑ̃ɡlɛ',
|
||||
'ʒɛm lə fʊtbɔːl',
|
||||
'fʊtbɔːl',
|
||||
'syʁtu lə ɹiəl madʁid',
|
||||
'nytiliz pa ɡuːɡəl']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert (
|
||||
'4 utterances containing language switches on lines 2, 3, 4, 5'
|
||||
in messages)
|
||||
assert (
|
||||
'language switch flags have been removed '
|
||||
'(applying "remove-flags" policy)'
|
||||
in messages)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
@pytest.mark.parametrize('njobs', [1, 3])
|
||||
def test_language_switch_remove_utterance(caplog, langswitch_text, njobs):
|
||||
backend = EspeakBackend('fr-fr', language_switch='remove-utterance')
|
||||
out = backend.phonemize(
|
||||
langswitch_text, separator=Separator(), strip=True, njobs=njobs)
|
||||
assert out == ['ʒɛm lɑ̃ɡlɛ', '', '', '', '']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert (
|
||||
'removed 4 utterances containing language switches '
|
||||
'(applying "remove-utterance" policy)'
|
||||
in messages)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
backend = EspeakBackend('fr-fr', language_switch='foo')
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
@pytest.mark.parametrize(
|
||||
'policy', ('keep-flags', 'remove-flags', 'remove-utterance'))
|
||||
def test_no_switch(policy, caplog):
|
||||
text = ["j'aime l'anglais", "tu parles le français"]
|
||||
backend = EspeakBackend('fr-fr', language_switch=policy)
|
||||
out = backend.phonemize(text, separator=Separator(), strip=True)
|
||||
assert out == ['ʒɛm lɑ̃ɡlɛ', 'ty paʁl lə fʁɑ̃sɛ']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert not messages
|
||||
@@ -0,0 +1,79 @@
|
||||
"""Tests of the phonemizer.backend.espeak.words_mismatch module"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import pytest
|
||||
|
||||
import re
|
||||
|
||||
from phonemizer import phonemize
|
||||
from phonemizer.backend.espeak.words_mismatch import Ignore
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def text():
|
||||
return ["How are you?", "I have been busy", "I won't have time"]
|
||||
|
||||
|
||||
def test_count_words():
|
||||
# pylint: disable=protected-access
|
||||
count_words = lambda phn: Ignore._count_words(
|
||||
phn, wordsep=default_separator.word)
|
||||
assert count_words(['']) == [0]
|
||||
assert count_words(['a']) == [1]
|
||||
assert count_words(['aaa']) == [1]
|
||||
assert count_words([' aaa ']) == [1]
|
||||
assert count_words([' a a \taa ']) == [3]
|
||||
|
||||
|
||||
def test_bad():
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', words_mismatch='foo')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', backend='festival', words_mismatch='remove')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('mode', ['ignore', 'warn', 'remove'])
|
||||
def test_mismatch(caplog, text, mode):
|
||||
phn = phonemize(
|
||||
text, backend='espeak', language='en-us', words_mismatch=mode)
|
||||
|
||||
if mode == 'ignore':
|
||||
assert phn == ['haʊ ɑːɹ juː ', 'aɪ hɐvbɪn bɪzi ', 'aɪ woʊntɐv taɪm ']
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert len(messages) == 1
|
||||
assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
|
||||
elif mode == 'remove':
|
||||
assert phn == ['haʊ ɑːɹ juː ', '', '']
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert len(messages) == 2
|
||||
assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
|
||||
assert 'removing the mismatched lines' in messages
|
||||
elif mode == 'warn':
|
||||
assert phn == ['haʊ ɑːɹ juː ', 'aɪ hɐvbɪn bɪzi ', 'aɪ woʊntɐv taɪm ']
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert len(messages) == 3
|
||||
assert (
|
||||
'words count mismatch on line 2 (expected 4 words but get 3)'
|
||||
in messages)
|
||||
assert (
|
||||
'words count mismatch on line 3 (expected 4 words but get 3)'
|
||||
in messages)
|
||||
assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
|
||||
|
||||
|
||||
# from https://github.com/bootphon/phonemizer/issues/169
|
||||
def test_custom_separator(caplog):
|
||||
phn = phonemize(
|
||||
'try',
|
||||
backend='espeak',
|
||||
language='en-us',
|
||||
separator=Separator(word='|', phone=' '),
|
||||
words_mismatch='warn')
|
||||
|
||||
assert phn == 't ɹ aɪ |'
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert len(messages) == 0
|
||||
@@ -0,0 +1,134 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the EspeakWrapper class"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import pickle
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.backend.espeak.wrapper import EspeakWrapper
|
||||
from phonemizer.backend import EspeakMbrolaBackend
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def wrapper():
|
||||
return EspeakWrapper()
|
||||
|
||||
|
||||
def test_basic(wrapper):
|
||||
assert wrapper.version >= (1, 48)
|
||||
assert 'espeak' in str(wrapper.library_path)
|
||||
assert os.path.isabs(wrapper.library_path)
|
||||
assert os.path.isabs(wrapper.data_path) # not None, no raise
|
||||
|
||||
|
||||
def test_available_voices(wrapper):
|
||||
espeak = set(wrapper.available_voices())
|
||||
assert espeak
|
||||
|
||||
mbrola = set(wrapper.available_voices('mbrola'))
|
||||
# can be empty if no mbrola voice installed (occurs only on Windows, at
|
||||
# least within the github CI pipeline)
|
||||
if mbrola:
|
||||
assert not espeak.intersection(mbrola)
|
||||
|
||||
|
||||
def test_set_get_voice(wrapper):
|
||||
assert wrapper.voice is None
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
wrapper.set_voice('')
|
||||
assert 'invalid voice code ""' in str(err)
|
||||
|
||||
wrapper.set_voice('fr-fr')
|
||||
assert wrapper.voice.language == 'fr-fr'
|
||||
assert wrapper.voice.name in (
|
||||
'French (France)', # >1.48.3
|
||||
'french') # older espeak
|
||||
|
||||
wrapper.set_voice('en-us')
|
||||
assert wrapper.voice.language == 'en-us'
|
||||
assert wrapper.voice.name in (
|
||||
'English (America)', # >1.48.3
|
||||
'english-us') # older espeak
|
||||
|
||||
# no mbrola voices available on Windows by default (at least on the github
|
||||
# CI pipeline)
|
||||
if sys.platform != 'win32':
|
||||
wrapper.set_voice('mb-af1')
|
||||
assert wrapper.voice.language == 'af'
|
||||
assert wrapper.voice.name == 'afrikaans-mbrola-1'
|
||||
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
wrapper.set_voice('some non existant voice code')
|
||||
assert 'invalid voice code' in str(err)
|
||||
|
||||
|
||||
def _test_pickle(voice):
|
||||
# the wrapper is pickled when using espeak backend on multiple jobs
|
||||
wrapper = EspeakWrapper()
|
||||
wrapper.set_voice(voice)
|
||||
|
||||
dump = pickle.dumps(wrapper)
|
||||
wrapper2 = pickle.loads(dump)
|
||||
|
||||
assert wrapper.version == wrapper2.version
|
||||
assert wrapper.library_path == wrapper2.library_path
|
||||
assert wrapper.data_path == wrapper2.data_path
|
||||
assert wrapper.voice == wrapper2.voice
|
||||
|
||||
|
||||
def test_pickle_en_us():
|
||||
_test_pickle('en-us')
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available() or
|
||||
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
|
||||
reason='mbrola or mb-fr1 voice not installed')
|
||||
def test_pickle_mb_fr1():
|
||||
_test_pickle('mb-fr1')
|
||||
|
||||
|
||||
def test_twice():
|
||||
wrapper1 = EspeakWrapper()
|
||||
wrapper2 = EspeakWrapper()
|
||||
|
||||
assert wrapper1.data_path == wrapper2.data_path
|
||||
assert wrapper1.version == wrapper2.version
|
||||
assert wrapper1.library_path == wrapper2.library_path
|
||||
|
||||
wrapper1.set_voice('fr-fr')
|
||||
assert wrapper1.voice.language == 'fr-fr'
|
||||
wrapper2.set_voice('en-us')
|
||||
assert wrapper2.voice.language == 'en-us'
|
||||
assert wrapper1.voice.language == 'fr-fr'
|
||||
|
||||
# pylint: disable=protected-access
|
||||
assert wrapper1._espeak._tempdir != wrapper2._espeak._tempdir
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == 'win32', reason='not supported on Windows')
|
||||
def test_deletion():
|
||||
# pylint: disable=protected-access
|
||||
wrapper = EspeakWrapper()
|
||||
path = pathlib.Path(wrapper._espeak._tempdir)
|
||||
del wrapper
|
||||
assert not path.exists()
|
||||
@@ -0,0 +1,119 @@
|
||||
# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the festival backend"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.separator import Separator
|
||||
from phonemizer.backend import FestivalBackend
|
||||
|
||||
|
||||
def _test(text, separator=Separator(
|
||||
word=' ', syllable='|', phone='-')):
|
||||
backend = FestivalBackend('en-us')
|
||||
# pylint: disable=protected-access
|
||||
return backend._phonemize_aux(text, 0, separator, True)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
FestivalBackend.version() <= (2, 1),
|
||||
reason='festival-2.1 gives different results than further versions '
|
||||
'for syllable boundaries')
|
||||
def test_hello():
|
||||
assert _test(['hello world']) == ['hh-ax|l-ow w-er-l-d']
|
||||
assert _test(['hello', 'world']) == ['hh-ax|l-ow', 'w-er-l-d']
|
||||
|
||||
|
||||
@pytest.mark.parametrize('text', ['', ' ', ' ', '(', '()', '"', "'"])
|
||||
def test_bad_input(text):
|
||||
assert _test(text) == []
|
||||
|
||||
|
||||
def test_quote():
|
||||
assert _test(["it's"]) == ['ih-t-s']
|
||||
assert _test(["its"]) == ['ih-t-s']
|
||||
assert _test(["it s"]) == ['ih-t eh-s']
|
||||
assert _test(['it "s']) == ['ih-t eh-s']
|
||||
|
||||
|
||||
def test_im():
|
||||
sep = Separator(word=' ', syllable='', phone='')
|
||||
assert _test(["I'm looking for an image"], sep) \
|
||||
== ['aym luhkaxng faor axn ihmaxjh']
|
||||
assert _test(["Im looking for an image"], sep) \
|
||||
== ['ihm luhkaxng faor axn ihmaxjh']
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not shutil.which('festival'), reason='festival not in PATH')
|
||||
def test_path_good():
|
||||
try:
|
||||
binary = shutil.which('festival')
|
||||
FestivalBackend.set_executable(binary)
|
||||
assert FestivalBackend('en-us').executable() == pathlib.Path(binary)
|
||||
# restore the festival path to default
|
||||
finally:
|
||||
FestivalBackend.set_executable(None)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'PHONEMIZER_FESTIVAL_EXECUTABLE' in os.environ,
|
||||
reason='environment variable precedence')
|
||||
def test_path_bad():
|
||||
try:
|
||||
# corrupt the default espeak path, try to use python executable instead
|
||||
binary = shutil.which('python')
|
||||
FestivalBackend.set_executable(binary)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend('en-us').phonemize(['hello'])
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend.version()
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend.set_executable(__file__)
|
||||
|
||||
# restore the festival path to default
|
||||
finally:
|
||||
FestivalBackend.set_executable(None)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'PHONEMIZER_FESTIVAL_EXECUTABLE' in os.environ,
|
||||
reason='cannot modify environment')
|
||||
def test_path_venv():
|
||||
try:
|
||||
os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE'] = shutil.which('python')
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend('en-us').phonemize(['hello'])
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend.version()
|
||||
|
||||
os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE'] = __file__
|
||||
with pytest.raises(RuntimeError):
|
||||
FestivalBackend.version()
|
||||
|
||||
finally:
|
||||
try:
|
||||
del os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE']
|
||||
except KeyError:
|
||||
pass
|
||||
@@ -0,0 +1,14 @@
|
||||
"""Tests to import the phonemize function"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=import-outside-toplevel
|
||||
|
||||
|
||||
def test_relative():
|
||||
from phonemizer import phonemize
|
||||
assert phonemize('a') == 'eɪ '
|
||||
|
||||
|
||||
def test_absolute():
|
||||
from phonemizer.phonemize import phonemize
|
||||
assert phonemize('a') == 'eɪ '
|
||||
@@ -0,0 +1,158 @@
|
||||
# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the command line interface"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
|
||||
import os
|
||||
import pathlib
|
||||
import tempfile
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.backend import EspeakMbrolaBackend, EspeakBackend
|
||||
from phonemizer import main, backend, logger
|
||||
|
||||
|
||||
def _test(text, expected_output, args=''):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
input_file = pathlib.Path(tmpdir) / 'input.txt'
|
||||
output_file = pathlib.Path(tmpdir) / 'output.txt'
|
||||
with open(input_file, 'wb') as finput:
|
||||
finput.write(text.encode('utf8'))
|
||||
|
||||
sys.argv = ['unused', f'{input_file}', '-o', f'{output_file}']
|
||||
if args:
|
||||
sys.argv += shlex.split(args)
|
||||
main.main()
|
||||
|
||||
with open(output_file, 'rb') as foutput:
|
||||
output = foutput.read().decode()
|
||||
|
||||
# silly fix for windows
|
||||
assert output.replace('\r', '').strip(os.linesep) \
|
||||
== expected_output.replace('\r', '')
|
||||
|
||||
|
||||
def test_help():
|
||||
sys.argv = ['foo', '-h']
|
||||
with pytest.raises(SystemExit):
|
||||
main.main()
|
||||
|
||||
|
||||
def test_version():
|
||||
sys.argv = ['foo', '--version']
|
||||
main.main()
|
||||
|
||||
|
||||
def test_list_languages():
|
||||
sys.argv = ['foo', '--list-languages']
|
||||
main.main()
|
||||
|
||||
|
||||
def test_readme():
|
||||
_test('hello world', 'həloʊ wɜːld ', '--verbose')
|
||||
_test('hello world', 'həloʊ wɜːld ', '--quiet')
|
||||
_test('hello world', 'hello world | həloʊ wɜːld ', '--prepend-text')
|
||||
_test('hello world', 'hhaxlow werld', '-b festival --strip')
|
||||
_test('bonjour le monde', 'bɔ̃ʒuʁ lə mɔ̃d ', '-l fr-fr')
|
||||
_test('bonjour le monde', 'b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword ',
|
||||
'-l fr-fr -p " " -w ";eword "')
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
'2.1' in backend.FestivalBackend.version(),
|
||||
reason='festival-2.1 gives different results than further versions '
|
||||
'for syllable boundaries')
|
||||
def test_readme_festival_syll():
|
||||
_test('hello world',
|
||||
'hh ax ;esyll l ow ;esyll ;eword w er l d ;esyll ;eword ',
|
||||
"-p ' ' -s ';esyll ' -w ';eword ' -b festival -l en-us")
|
||||
|
||||
|
||||
@pytest.mark.parametrize('njobs', [1, 6])
|
||||
def test_njobs(njobs):
|
||||
_test(
|
||||
os.linesep.join((
|
||||
'hello world',
|
||||
'goodbye',
|
||||
'third line',
|
||||
'yet another')),
|
||||
os.linesep.join((
|
||||
'h-ə-l-oʊ w-ɜː-l-d',
|
||||
'ɡ-ʊ-d-b-aɪ',
|
||||
'θ-ɜː-d l-aɪ-n',
|
||||
'j-ɛ-t ɐ-n-ʌ-ð-ɚ')),
|
||||
f'--strip -j {njobs} -l en-us -b espeak -p "-" -s "|" -w " "')
|
||||
|
||||
|
||||
def test_unicode():
|
||||
_test('untuʼule', 'untṵːle ', '-l yucatec -b segments')
|
||||
|
||||
|
||||
def test_logger():
|
||||
with pytest.raises(RuntimeError):
|
||||
logger.get_logger(verbosity=1)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available() or
|
||||
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
|
||||
reason='mbrola or mb-fr1 voice not installed')
|
||||
def test_espeak_mbrola():
|
||||
_test('coucou toi!', 'k u k u t w a ',
|
||||
'-b espeak-mbrola -l mb-fr1 -p" " --preserve-punctuation')
|
||||
|
||||
|
||||
def test_espeak_path():
|
||||
espeak = pathlib.Path(backend.EspeakBackend.library())
|
||||
if sys.platform == 'win32':
|
||||
espeak = str(espeak).replace('\\', '\\\\').replace(' ', '\\ ')
|
||||
_test('hello world', 'həloʊ wɜːld ', f'--espeak-library={espeak}')
|
||||
|
||||
|
||||
def test_festival_path():
|
||||
festival = pathlib.Path(backend.FestivalBackend.executable())
|
||||
if sys.platform == 'win32':
|
||||
festival = str(festival).replace('\\', '\\\\').replace(' ', '\\ ')
|
||||
|
||||
_test('hello world', 'hhaxlow werld ',
|
||||
f'--festival-executable={festival} -b festival')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'args, expected', [
|
||||
('',
|
||||
'həloʊ wɜːld θɹiː ziəɹoʊziəɹoʊ ziəɹoʊ ɔːɹ tuː fɪfti həloʊ '),
|
||||
('--preserve-punctuation',
|
||||
'həloʊ, ,wɜːld? θɹiː,ziəɹoʊziəɹoʊ ziəɹoʊ, ɔːɹ tuː.fɪfti. ¿həloʊ? '),
|
||||
('--preserve-punctuation '
|
||||
'--punctuation-marks-is-regex '
|
||||
'--punctuation-marks "[^a-zA-ZÀ-ÖØ-öø-ÿ0-9\'\\-]"',
|
||||
'həloʊ, ,wɜːld? ‡ θɹiː,ziəɹoʊziəɹoʊ ziəɹoʊ, ɔːɹ tuː.fɪfti. ¿həloʊ? '),
|
||||
('--preserve-punctuation '
|
||||
'--punctuation-marks-is-regex '
|
||||
'--punctuation-marks "[;:\\!?¡¿—…\\\"«»“”]|[,.](?!\\d)"',
|
||||
'həloʊ, ,wɜːld? θɹiː θaʊzənd, ɔːɹ tuː pɔɪnt faɪv ziəɹoʊ. ¿həloʊ? ')])
|
||||
def test_punctuation_is_regex(args, expected):
|
||||
print(args)
|
||||
_test("hello, ,world? ‡ 3,000, or 2.50. ¿hello?", expected, args)
|
||||
|
||||
|
||||
def test_invalid_punctuation_regex():
|
||||
with pytest.raises(SystemExit):
|
||||
_test('hello world', None, '--punctuation-marks-is-regex --punctuation-marks "[*,"')
|
||||
@@ -0,0 +1,109 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the espeak-mbrola backend"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
# pylint: disable=redefined-outer-name
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.backend import EspeakMbrolaBackend
|
||||
from phonemizer.separator import Separator
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def backend():
|
||||
return EspeakMbrolaBackend('mb-fr1')
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available() or
|
||||
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
|
||||
reason='mbrola or mb-fr1 voice not installed')
|
||||
@pytest.mark.parametrize(
|
||||
'text, expected',
|
||||
[
|
||||
# plosives
|
||||
('pont', 'po~'),
|
||||
('bon', 'bo~'),
|
||||
('temps', 'ta~'),
|
||||
('dans', 'da~'),
|
||||
('quand', 'ka~'),
|
||||
('gant', 'ga~'),
|
||||
# fricatives
|
||||
('femme', 'fam'),
|
||||
('vent', 'va~'),
|
||||
('sans', 'sa~'),
|
||||
('champ', 'Sa~'),
|
||||
('gens', 'Za~'),
|
||||
('ion', 'jo~'),
|
||||
# nasals
|
||||
('mont', 'mo~'),
|
||||
('nom', 'no~'),
|
||||
('oignon', 'onjo~'),
|
||||
('ping', 'piN'),
|
||||
# liquid glides
|
||||
('long', 'lo~'),
|
||||
('rond', 'Ro~'),
|
||||
('coin', 'kwe~'),
|
||||
('juin', 'Zye~'),
|
||||
('pierre', 'pjER'),
|
||||
# vowels
|
||||
('si', 'si'),
|
||||
('ses', 'se'),
|
||||
('seize', 'sEz'),
|
||||
('patte', 'pat'),
|
||||
('pâte', 'pat'),
|
||||
('comme', 'kOm'),
|
||||
('gros', 'gRo'),
|
||||
('doux', 'du'),
|
||||
('du', 'dy'),
|
||||
('deux', 'd2'),
|
||||
('neuf', 'n9f'),
|
||||
('justement', 'Zystma~'),
|
||||
('vin', 've~'),
|
||||
('vent', 'va~'),
|
||||
('bon', 'bo~'),
|
||||
('brun', 'bR9~')])
|
||||
def test_sampa_fr(backend, text, expected):
|
||||
assert expected == backend.phonemize(
|
||||
[text], strip=True, separator=Separator(phone=''))[0]
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available() or
|
||||
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
|
||||
reason='mbrola or mb-fr1 voice not installed')
|
||||
def test_french_sampa(backend):
|
||||
text = ['bonjour le monde']
|
||||
sep = Separator(word=None, phone=' ')
|
||||
|
||||
expected = ['b o~ Z u R l @ m o~ d ']
|
||||
out = backend.phonemize(text, separator=sep, strip=False)
|
||||
assert out == expected
|
||||
|
||||
expected = ['b o~ Z u R l @ m o~ d']
|
||||
out = backend.phonemize(text, separator=sep, strip=True)
|
||||
assert out == expected
|
||||
|
||||
assert backend.phonemize([''], separator=sep, strip=True) == ['']
|
||||
assert backend.phonemize(['"'], separator=sep, strip=True) == ['']
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available(),
|
||||
reason='mbrola not installed')
|
||||
def test_mbrola_bad_language():
|
||||
assert not EspeakMbrolaBackend.is_supported_language('foo-bar')
|
||||
@@ -0,0 +1,291 @@
|
||||
# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the phonemizer.phonemize function"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from phonemizer.phonemize import phonemize
|
||||
from phonemizer.separator import Separator
|
||||
from phonemizer.backend import EspeakBackend, EspeakMbrolaBackend
|
||||
|
||||
|
||||
def test_bad_backend():
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', backend='fetiv')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', backend='foo')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', tie=True, backend='festival')
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', tie=True, backend='mbrola')
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', tie=True, backend='segments')
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize(
|
||||
'', tie=True, backend='espeak',
|
||||
separator=Separator(' ', None, '-'))
|
||||
|
||||
|
||||
def test_bad_language():
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', language='fr-fr', backend='festival')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', language='ffr', backend='espeak')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', language='/path/to/nonexisting/file', backend='segments')
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize('', language='creep', backend='segments')
|
||||
|
||||
|
||||
def test_text_type():
|
||||
text1 = ['one two', 'three', 'four five']
|
||||
text2 = os.linesep.join(text1)
|
||||
|
||||
phn1 = phonemize(text1, language='en-us', backend='espeak', strip=True)
|
||||
phn2 = phonemize(text2, language='en-us', backend='espeak', strip=True)
|
||||
out3 = phonemize(text2, language='en-us', backend='espeak', strip=True,
|
||||
prepend_text=True)
|
||||
text3 = [o[0] for o in out3]
|
||||
phn3 = [o[1] for o in out3]
|
||||
|
||||
assert isinstance(phn1, list)
|
||||
assert isinstance(phn2, str)
|
||||
assert os.linesep.join(phn1) == phn2
|
||||
assert os.linesep.join(phn3) == phn2
|
||||
assert text3 == text1
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakBackend.is_espeak_ng(),
|
||||
reason='language switch only exists for espeak-ng')
|
||||
def test_lang_switch():
|
||||
text = ['bonjour apple', 'bonjour toi']
|
||||
out = phonemize(
|
||||
text,
|
||||
language='fr-fr',
|
||||
backend='espeak',
|
||||
prepend_text=True,
|
||||
language_switch='remove-utterance')
|
||||
assert out == [('bonjour apple', ''), ('bonjour toi', 'bɔ̃ʒuʁ twa ')]
|
||||
|
||||
|
||||
@pytest.mark.parametrize('njobs', [2, 4])
|
||||
def test_espeak(njobs):
|
||||
text = ['one two', 'three', 'four five']
|
||||
|
||||
out = phonemize(
|
||||
text, language='en-us', backend='espeak',
|
||||
strip=True, njobs=njobs)
|
||||
assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv']
|
||||
|
||||
out = phonemize(
|
||||
' '.join(text), language='en-us', backend='espeak',
|
||||
strip=False, njobs=njobs)
|
||||
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv '])
|
||||
|
||||
out = phonemize(
|
||||
os.linesep.join(text), language='en-us', backend='espeak',
|
||||
strip=False, njobs=njobs)
|
||||
assert out == os.linesep.join(['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv '])
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not EspeakMbrolaBackend.is_available() or
|
||||
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
|
||||
reason='mbrola or mb-fr1 voice not installed')
|
||||
@pytest.mark.parametrize('njobs', [2, 4])
|
||||
def test_espeak_mbrola(caplog, njobs):
|
||||
text = ['un deux', 'trois', 'quatre cinq']
|
||||
|
||||
out = phonemize(
|
||||
text,
|
||||
language='mb-fr1',
|
||||
backend='espeak-mbrola',
|
||||
njobs=njobs,
|
||||
preserve_punctuation=True)
|
||||
assert out == ['9~d2', 'tRwa', 'katRse~k']
|
||||
|
||||
messages = [msg[2] for msg in caplog.record_tuples]
|
||||
assert 'espeak-mbrola backend cannot preserve punctuation' in messages
|
||||
assert 'espeak-mbrola backend cannot preserve word separation' in messages
|
||||
|
||||
|
||||
@pytest.mark.parametrize('njobs', [2, 4])
|
||||
def test_festival(njobs):
|
||||
text = ['one two', 'three', 'four five']
|
||||
|
||||
out = phonemize(
|
||||
text, language='en-us', backend='festival',
|
||||
strip=False, njobs=njobs)
|
||||
assert out == ['wahn tuw ', 'thriy ', 'faor fayv ']
|
||||
|
||||
out = phonemize(
|
||||
' '.join(text), language='en-us', backend='festival',
|
||||
strip=True, njobs=njobs)
|
||||
assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv'])
|
||||
|
||||
out = phonemize(
|
||||
os.linesep.join(text), language='en-us', backend='festival',
|
||||
strip=True, njobs=njobs)
|
||||
assert out == os.linesep.join(['wahn tuw', 'thriy', 'faor fayv'])
|
||||
|
||||
|
||||
def test_festival_bad():
|
||||
# cannot use options valid for espeak only
|
||||
text = ['one two', 'three', 'four five']
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize(
|
||||
text, language='en-us', backend='festival', with_stress=True)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
phonemize(
|
||||
text, language='en-us', backend='festival',
|
||||
language_switch='remove-flags')
|
||||
|
||||
|
||||
@pytest.mark.parametrize('njobs', [2, 4])
|
||||
def test_segments(njobs):
|
||||
# one two three four five in Maya Yucatec
|
||||
text = ['untuʼuleʼ kaʼapʼeʼel', 'oʼoxpʼeʼel', 'kantuʼuloʼon chincho']
|
||||
|
||||
out = phonemize(
|
||||
text, language='yucatec', backend='segments',
|
||||
strip=False, njobs=njobs)
|
||||
assert out == [
|
||||
'untṵːlḛ ka̰ːpʼḛːl ', 'o̰ːʃpʼḛːl ', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo ']
|
||||
out = phonemize(
|
||||
' '.join(text), language='yucatec', backend='segments',
|
||||
strip=False, njobs=njobs)
|
||||
assert out == ' '.join(
|
||||
['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo '])
|
||||
|
||||
out = phonemize(
|
||||
os.linesep.join(text), language='yucatec', backend='segments',
|
||||
strip=True, njobs=njobs)
|
||||
assert out == os.linesep.join(
|
||||
['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo'])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'backend, empty_lines, punctuation, prepend_text, text, expected', [
|
||||
('espeak', False, False, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['həloʊ wɜːld ', 'ɡʊdbaɪ ']),
|
||||
('espeak', False, True, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['həloʊ wɜːld! ', 'ɡʊdbaɪ ']),
|
||||
('espeak', True, False, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['həloʊ wɜːld ', '', 'ɡʊdbaɪ ']),
|
||||
('espeak', True, True, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['həloʊ wɜːld! ', '', 'ɡʊdbaɪ ']),
|
||||
('segments', False, False, False,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[u'ʌtʃɪ ʌtʃʊ ', u'ʌtʃɪ ʌtʃʊ ']),
|
||||
('segments', False, True, False,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[u'ʌtʃɪ ʌtʃʊ? ', u'ʌtʃɪ ʌtʃʊ ']),
|
||||
('segments', True, False, False,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[u'ʌtʃɪ ʌtʃʊ ', '', u'ʌtʃɪ ʌtʃʊ ']),
|
||||
('segments', True, True, False,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[u'ʌtʃɪ ʌtʃʊ? ', '', u'ʌtʃɪ ʌtʃʊ ']),
|
||||
('festival', False, False, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['hhaxlow werld ', 'guhdbay ']),
|
||||
('festival', False, True, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['hhaxlow werld! ', 'guhdbay ']),
|
||||
('festival', True, False, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['hhaxlow werld ', '', 'guhdbay ']),
|
||||
('festival', True, True, False,
|
||||
['hello world!', '', 'goodbye'],
|
||||
['hhaxlow werld! ', '', 'guhdbay ']),
|
||||
('espeak', False, False, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'həloʊ wɜːld '), ('goodbye', 'ɡʊdbaɪ ')]),
|
||||
('espeak', False, True, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'həloʊ wɜːld! '), ('goodbye', 'ɡʊdbaɪ ')]),
|
||||
('espeak', True, False, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'həloʊ wɜːld '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]),
|
||||
('espeak', True, True, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'həloʊ wɜːld! '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]),
|
||||
('segments', False, False, True,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[('achi acho?', 'ʌtʃɪ ʌtʃʊ '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
|
||||
('segments', False, True, True,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[('achi acho?', 'ʌtʃɪ ʌtʃʊ? '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
|
||||
('segments', True, False, True,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[('achi acho?', u'ʌtʃɪ ʌtʃʊ '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
|
||||
('segments', True, True, True,
|
||||
['achi acho?', '', 'achi acho'],
|
||||
[('achi acho?', u'ʌtʃɪ ʌtʃʊ? '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
|
||||
('festival', False, False, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'hhaxlow werld '), ('goodbye', 'guhdbay ')]),
|
||||
('festival', False, True, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'hhaxlow werld! '), ('goodbye', 'guhdbay ')]),
|
||||
('festival', True, False, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'hhaxlow werld '), ('', ''), ('goodbye', 'guhdbay ')]),
|
||||
('festival', True, True, True,
|
||||
['hello world!', '', 'goodbye'],
|
||||
[('hello world!', 'hhaxlow werld! '), ('', ''), ('goodbye', 'guhdbay ')])])
|
||||
def test_preserve_empty_lines(backend, empty_lines, punctuation, prepend_text, text, expected):
|
||||
language = 'cree' if backend == 'segments' else 'en-us'
|
||||
|
||||
assert expected == phonemize(
|
||||
text, language=language, backend=backend, prepend_text=prepend_text,
|
||||
preserve_punctuation=punctuation, preserve_empty_lines=empty_lines)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'backend, empty_lines, punctuation, text, expected', [
|
||||
('espeak', False, False, [''], []),
|
||||
('espeak', False, True, [''], []),
|
||||
('espeak', True, False, [''], ['']),
|
||||
('espeak', True, True, [''], ['']),
|
||||
('segments', False, False, [''], []),
|
||||
('segments', False, True, [''], []),
|
||||
('segments', True, False, [''], ['']),
|
||||
('segments', True, True, [''], ['']),
|
||||
('festival', False, False, [''], []),
|
||||
('festival', False, True, [''], []),
|
||||
('festival', True, False, [''], ['']),
|
||||
('festival', True, True, [''], [''])])
|
||||
def test_empty_input(backend, empty_lines, punctuation, text, expected):
|
||||
language = 'cree' if backend == 'segments' else 'en-us'
|
||||
|
||||
assert expected == phonemize(
|
||||
text, language=language, backend=backend,
|
||||
preserve_punctuation=punctuation, preserve_empty_lines=empty_lines)
|
||||
@@ -0,0 +1,274 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the punctuation processing"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from phonemizer.backend import EspeakBackend, FestivalBackend, SegmentsBackend
|
||||
from phonemizer.punctuation import Punctuation
|
||||
from phonemizer.phonemize import phonemize
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
|
||||
# True if we are using espeak>=1.50
|
||||
ESPEAK_150 = (EspeakBackend.version() >= (1, 50))
|
||||
|
||||
# True if we are using espeak>=1.49.3
|
||||
ESPEAK_149 = (EspeakBackend.version() >= (1, 49, 3))
|
||||
|
||||
# True if we are using festival>=2.5
|
||||
FESTIVAL_25 = (FestivalBackend.version() >= (2, 5))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'inp, out', [
|
||||
('a, b,c.', 'a b c'),
|
||||
('abc de', 'abc de'),
|
||||
('!d.d. dd?? d!', 'd d dd d')])
|
||||
def test_remove(inp, out):
|
||||
assert Punctuation().remove(inp) == out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'inp', [
|
||||
['.a.b.c.'],
|
||||
['a, a?', 'b, b'],
|
||||
['a, a?', 'b, b', '!'],
|
||||
['a, a?', '!?', 'b, b'],
|
||||
['!?', 'a, a?', 'b, b'],
|
||||
['a, a, a'],
|
||||
['a, a?', 'aaa bb', '.bb, b', 'c', '!d.d. dd?? d!'],
|
||||
['Truly replied, "Yes".'],
|
||||
['hi; ho,"'],
|
||||
["!?"],
|
||||
["!'"],
|
||||
["It is ! (I think so)"],
|
||||
["This {is} right"],
|
||||
["[He] is right"],
|
||||
])
|
||||
def test_preserve(inp):
|
||||
punct = Punctuation()
|
||||
text, marks = punct.preserve(inp)
|
||||
assert inp == punct.restore(text, marks, sep=default_separator, strip=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'text, expected_restore, expected_output', [
|
||||
(['hi; hi,"'], ['hi; hi," '], ['haɪ; haɪ, ']),
|
||||
(['hi; "hi,'], ['hi; "hi, '], ['haɪ; haɪ, '] if ESPEAK_149 else ['haɪ; haɪ, ']),
|
||||
(['"hi; hi,'], ['"hi; hi, '], ['haɪ; haɪ, '] if ESPEAK_149 else [' haɪ; haɪ, '])])
|
||||
def test_preserve_2(text, expected_restore, expected_output):
|
||||
marks = ".!;:,?"
|
||||
punct = Punctuation(marks=marks)
|
||||
assert expected_restore == punct.restore(
|
||||
*punct.preserve(text), sep=default_separator, strip=False)
|
||||
|
||||
output = phonemize(
|
||||
text, backend="espeak",
|
||||
preserve_punctuation=True, punctuation_marks=marks)
|
||||
assert output == expected_output
|
||||
|
||||
|
||||
def test_custom():
|
||||
punct = Punctuation()
|
||||
assert set(punct.marks) == set(punct.default_marks())
|
||||
assert punct.remove('a,b.c') == 'a b c'
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
punct.marks = ['?', '.']
|
||||
punct.marks = '?.'
|
||||
assert len(punct.marks) == 2
|
||||
assert punct.remove('a,b.c') == 'a,b c'
|
||||
|
||||
|
||||
def test_espeak():
|
||||
text = 'hello, world!'
|
||||
expected1 = 'həloʊ wɜːld'
|
||||
expected2 = 'həloʊ, wɜːld!'
|
||||
expected3 = 'həloʊ wɜːld '
|
||||
expected4 = 'həloʊ, wɜːld! '
|
||||
|
||||
out1 = EspeakBackend('en-us', preserve_punctuation=False).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out1 == expected1
|
||||
|
||||
out2 = EspeakBackend('en-us', preserve_punctuation=True).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out2 == expected2
|
||||
|
||||
out3 = EspeakBackend('en-us', preserve_punctuation=False).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out3 == expected3
|
||||
|
||||
out4 = EspeakBackend('en-us', preserve_punctuation=True).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out4 == expected4
|
||||
|
||||
|
||||
def test_festival():
|
||||
text = 'hello, world!'
|
||||
expected1 = 'hhaxlow werld'
|
||||
expected2 = 'hhaxlow, werld!'
|
||||
expected3 = 'hhaxlow werld '
|
||||
expected4 = 'hhaxlow, werld! '
|
||||
|
||||
out1 = FestivalBackend('en-us', preserve_punctuation=False).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out1 == expected1
|
||||
|
||||
out2 = FestivalBackend('en-us', preserve_punctuation=True).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out2 == expected2
|
||||
|
||||
out3 = FestivalBackend('en-us', preserve_punctuation=False).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out3 == expected3
|
||||
|
||||
out4 = FestivalBackend('en-us', preserve_punctuation=True).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out4 == expected4
|
||||
|
||||
|
||||
def test_segments():
|
||||
text = 'achi, acho!'
|
||||
expected1 = 'ʌtʃɪ ʌtʃʊ'
|
||||
expected2 = 'ʌtʃɪ, ʌtʃʊ!'
|
||||
expected3 = 'ʌtʃɪ ʌtʃʊ '
|
||||
expected4 = 'ʌtʃɪ, ʌtʃʊ! '
|
||||
|
||||
out1 = SegmentsBackend('cree', preserve_punctuation=False).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out1 == expected1
|
||||
|
||||
out2 = SegmentsBackend('cree', preserve_punctuation=True).phonemize(
|
||||
[text], strip=True)[0]
|
||||
assert out2 == expected2
|
||||
|
||||
out3 = SegmentsBackend('cree', preserve_punctuation=False).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out3 == expected3
|
||||
|
||||
out4 = SegmentsBackend('cree', preserve_punctuation=True).phonemize(
|
||||
[text], strip=False)[0]
|
||||
assert out4 == expected4
|
||||
|
||||
|
||||
# see https://github.com/bootphon/phonemizer/issues/54
|
||||
@pytest.mark.parametrize(
|
||||
'text, expected', [("!'", "! "), ("'!", "! "), ("!'!", "!! "), ("'!'", "! ")])
|
||||
def test_issue_54(text, expected):
|
||||
output = phonemize(
|
||||
[text], language='en-us', backend='espeak',
|
||||
preserve_punctuation=True)[0]
|
||||
assert expected == output
|
||||
|
||||
|
||||
# see https://github.com/bootphon/phonemizer/issues/55
|
||||
@pytest.mark.parametrize(
|
||||
'backend, marks, text, expected', [
|
||||
('espeak', 'default', ['"Hey! "', '"hey,"'], ['"heɪ! " ', '"heɪ," ']),
|
||||
('espeak', '.!;:,?', ['"Hey! " ', '"hey," '],
|
||||
['heɪ! ', 'heɪ, '] if ESPEAK_150 else [' heɪ! ', ' heɪ, ']),
|
||||
('espeak', 'default', ['! ?', 'hey!'], ['! ? ', 'heɪ! ']),
|
||||
('espeak', '!', ['! ?', 'hey!'], ['! ', 'heɪ! ']),
|
||||
('segments', 'default', ['! ?', 'hey!'], ['! ? ', 'heːj! ']),
|
||||
('segments', '!', ['! ?', 'hey!'], ValueError),
|
||||
('festival', 'default', ['! ?', 'hey!'], ['! ? ', 'hhey! ']),
|
||||
('festival', '!', ['! ?', 'hey!'], ['! ', 'hhey! '])])
|
||||
def test_issue55(backend, marks, text, expected):
|
||||
if marks == 'default':
|
||||
marks = Punctuation.default_marks()
|
||||
language = 'cree' if backend == 'segments' else 'en-us'
|
||||
|
||||
try:
|
||||
with pytest.raises(expected):
|
||||
phonemize(
|
||||
text, language=language, backend=backend,
|
||||
preserve_punctuation=True, punctuation_marks=marks)
|
||||
except TypeError:
|
||||
try:
|
||||
assert expected == phonemize(
|
||||
text, language=language, backend=backend,
|
||||
preserve_punctuation=True, punctuation_marks=marks)
|
||||
except RuntimeError:
|
||||
if backend == 'festival':
|
||||
# TODO on some installations festival fails to phonemize "?".
|
||||
# It ends with a segmentation fault. This seems to only appear
|
||||
# with festival-2.5 (but is working on travis and docker image)
|
||||
pass
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'punctuation_marks, text, expected', [
|
||||
(';:,.!?¡—…"«»“”',
|
||||
'hello, ,world? ‡ 3,000, or 2.50. ¿hello?',
|
||||
'həloʊ, ,wɜːld? θɹiː,ziəɹoʊziəɹoʊ ziəɹoʊ, ɔːɹ tuː.fɪfti. həloʊ? '),
|
||||
(re.compile(r"[^a-zA-ZÀ-ÖØ-öø-ÿ0-9'$@&+%\-=/\\]"),
|
||||
'hello, ,world? ‡ 3,000, or 2.50. ¿hello?',
|
||||
'həloʊ, ,wɜːld? ‡ θɹiː,ziəɹoʊziəɹoʊ ziəɹoʊ, ɔːɹ tuː.fɪfti. ¿həloʊ? '),
|
||||
(re.compile(r"[^a-zA-ZÀ-ÖØ-öø-ÿ0-9',.$@&+%\-=/\\]|[,.](?!\d)"),
|
||||
'hello, ,world? ‡ 3,000, or 2.50. ¿hello?',
|
||||
'həloʊ, ,wɜːld? ‡ θɹiː θaʊzənd, ɔːɹ tuː pɔɪnt faɪv ziəɹoʊ. ¿həloʊ? ')
|
||||
])
|
||||
def test_punctuation_marks_regex(punctuation_marks, text, expected):
|
||||
assert expected == phonemize(
|
||||
text, preserve_punctuation=True, punctuation_marks=punctuation_marks)
|
||||
|
||||
|
||||
def test_marks_getter_with_regex():
|
||||
marks_re = re.compile(r"[^a-zA-Z0-9]")
|
||||
punct = Punctuation(marks_re)
|
||||
with pytest.raises(ValueError):
|
||||
punct.marks == marks_re
|
||||
|
||||
|
||||
def test_long_document():
|
||||
# testing issue raised by #108
|
||||
DATA_FOLDER = Path(__file__).parent / "data"
|
||||
with open(DATA_FOLDER / "pg67147.txt") as txt_file:
|
||||
phonemize(txt_file.read().split("\n"), backend="espeak", preserve_punctuation=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'text', [
|
||||
([
|
||||
'worked david ford i started in deloitte and i was immediately',
|
||||
]
|
||||
),
|
||||
([
|
||||
'worked david ford i started in deloitte, and i was immediately',
|
||||
]
|
||||
),
|
||||
([
|
||||
'worked david ford i started in deloitte and i was immediately',
|
||||
'an offer of price waterhouse cooper and here i take may',
|
||||
'we are now as maximum plan for a customer time and',
|
||||
"they're going to meet all the xvin so great it"
|
||||
]
|
||||
),
|
||||
([
|
||||
'worked david ford i started in deloitte, and i was immediately',
|
||||
'an offer of price waterhouse cooper and here i take may',
|
||||
'we are now as maximum plan for a customer time and',
|
||||
"they're going to meet all the xvin so great it."
|
||||
]
|
||||
),
|
||||
])
|
||||
def test_multiline_punctuation(text):
|
||||
phonemized = phonemize(text, preserve_punctuation=True)
|
||||
assert len(text) == len(phonemized)
|
||||
@@ -0,0 +1,113 @@
|
||||
# Copyright 2015-2021 Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the segments backend"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
|
||||
import os
|
||||
import pytest
|
||||
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
from phonemizer.backend import SegmentsBackend
|
||||
from phonemizer.utils import get_package_resource
|
||||
|
||||
|
||||
def test_multiline():
|
||||
backend = SegmentsBackend('cree')
|
||||
assert backend.language == 'cree'
|
||||
|
||||
assert backend.phonemize(['a']) == [u'ʌ ']
|
||||
assert backend.phonemize(['aa']) == [u'ʌʌ ']
|
||||
assert backend.phonemize(['a\n']) == [u'ʌ ']
|
||||
assert backend.phonemize(['a\na']) == [u'ʌ ʌ ']
|
||||
assert backend.phonemize(['a\na\n']) == [u'ʌ ʌ ']
|
||||
assert backend.phonemize(['a', 'a']) == [u'ʌ ', 'ʌ ']
|
||||
assert backend.phonemize(['a\n', 'a\n']) == [u'ʌ ', 'ʌ ']
|
||||
|
||||
|
||||
def test_bad_morpheme():
|
||||
backend = SegmentsBackend('cree')
|
||||
with pytest.raises(ValueError):
|
||||
backend.phonemize(['A'])
|
||||
|
||||
|
||||
def test_separator():
|
||||
backend = SegmentsBackend('cree')
|
||||
text = ['achi acho']
|
||||
|
||||
sep = default_separator
|
||||
assert backend.phonemize(text, separator=sep) == [u'ʌtʃɪ ʌtʃʊ ']
|
||||
assert backend.phonemize(text, separator=sep, strip=True) == [u'ʌtʃɪ ʌtʃʊ']
|
||||
|
||||
|
||||
def test_separator_2():
|
||||
backend = SegmentsBackend('cree')
|
||||
text = ['achi acho']
|
||||
|
||||
sep = Separator(word='_', phone=' ')
|
||||
assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ _ʌ tʃ ʊ _']
|
||||
assert backend.phonemize(text, separator=sep, strip=True) \
|
||||
== [u'ʌ tʃ ɪ_ʌ tʃ ʊ']
|
||||
|
||||
|
||||
def test_separator_3():
|
||||
backend = SegmentsBackend('cree')
|
||||
text = ['achi acho']
|
||||
|
||||
sep = Separator(word=' ', syllable=None, phone='_')
|
||||
assert backend.phonemize(text, separator=sep) == [u'ʌ_tʃ_ɪ_ ʌ_tʃ_ʊ_ ']
|
||||
assert backend.phonemize(text, separator=sep, strip=True) \
|
||||
== [u'ʌ_tʃ_ɪ ʌ_tʃ_ʊ']
|
||||
|
||||
|
||||
def test_separator_4():
|
||||
backend = SegmentsBackend('cree')
|
||||
text = ['achi acho']
|
||||
|
||||
# TODO bug when sep.phone == ' ' with no sep.word
|
||||
sep = Separator(phone=' ', word='')
|
||||
assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ ʌ tʃ ʊ ']
|
||||
assert backend.phonemize(text, separator=sep, strip=True) \
|
||||
== [u'ʌ tʃ ɪʌ tʃ ʊ']
|
||||
|
||||
|
||||
def test_separator_5():
|
||||
backend = SegmentsBackend('cree')
|
||||
text = ['achi acho']
|
||||
|
||||
sep = Separator(phone=' ', word='_')
|
||||
assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ _ʌ tʃ ʊ _']
|
||||
assert backend.phonemize(text, separator=sep, strip=True) \
|
||||
== [u'ʌ tʃ ɪ_ʌ tʃ ʊ']
|
||||
|
||||
|
||||
def test_language(tmpdir):
|
||||
# check languages by name
|
||||
assert SegmentsBackend.is_supported_language('cree')
|
||||
assert not SegmentsBackend.is_supported_language('unexisting')
|
||||
|
||||
# check languages by g2p file
|
||||
directory = get_package_resource('segments')
|
||||
assert SegmentsBackend.is_supported_language(
|
||||
os.path.join(directory, 'cree.g2p'))
|
||||
assert not SegmentsBackend.is_supported_language(
|
||||
os.path.join(directory, 'cree'))
|
||||
assert not SegmentsBackend.is_supported_language(
|
||||
os.path.join(directory, 'unexisting.g2p'))
|
||||
|
||||
# bad syntax in g2p file
|
||||
g2p = tmpdir.join('foo.g2p')
|
||||
g2p.write('\n'.join(['a a', 'b b b', 'c']))
|
||||
assert not SegmentsBackend.is_supported_language(g2p)
|
||||
@@ -0,0 +1,82 @@
|
||||
# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard
|
||||
#
|
||||
# This file is part of phonemizer: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License as
|
||||
# published by the Free Software Foundation, either version 3 of the
|
||||
# License, or (at your option) any later version.
|
||||
#
|
||||
# Phonemizer is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""Test of the Separator class"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
|
||||
import pytest
|
||||
|
||||
from phonemizer.separator import Separator, default_separator
|
||||
|
||||
|
||||
def test_prop():
|
||||
# read only attributes
|
||||
with pytest.raises(AttributeError):
|
||||
default_separator.phone = 'a'
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
default_separator.syllable = 'a'
|
||||
|
||||
with pytest.raises(AttributeError):
|
||||
default_separator.word = 'a'
|
||||
|
||||
|
||||
@pytest.mark.parametrize('val', [None, '', False])
|
||||
def test_empty(val):
|
||||
s = Separator(val, val, val)
|
||||
assert s.phone == ''
|
||||
assert s.syllable == ''
|
||||
assert s.word == ''
|
||||
|
||||
|
||||
def test_same():
|
||||
with pytest.raises(ValueError):
|
||||
Separator(word=' ', phone=' ')
|
||||
|
||||
|
||||
def test_str():
|
||||
separator = Separator(word='w', syllable='s', phone='p')
|
||||
assert str(separator) == '(phone: "p", syllable: "s", word: "w")'
|
||||
assert str(default_separator) == '(phone: "", syllable: "", word: " ")'
|
||||
|
||||
|
||||
def test_equal():
|
||||
assert Separator() == Separator()
|
||||
assert default_separator == Separator(phone='', syllable='', word=' ')
|
||||
assert Separator(word=' ') != default_separator
|
||||
|
||||
|
||||
def test_field_separator():
|
||||
sep = Separator(word='w', syllable='s', phone='p')
|
||||
assert 'w' in sep
|
||||
assert 'p' in sep
|
||||
assert 'wp' not in sep
|
||||
assert ' ' not in sep
|
||||
|
||||
assert sep.input_output_separator(False) is False
|
||||
assert sep.input_output_separator(None) is False
|
||||
assert sep.input_output_separator('') is False
|
||||
assert sep.input_output_separator(True) == '|'
|
||||
assert sep.input_output_separator('io') == 'io'
|
||||
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
sep.input_output_separator([1, 2])
|
||||
assert 'invalid input/output separator' in str(err)
|
||||
with pytest.raises(RuntimeError) as err:
|
||||
sep.input_output_separator('w')
|
||||
assert 'cannot prepend input with "w"' in str(err)
|
||||
|
||||
sep = Separator(phone='|', syllable='||', word='|||')
|
||||
assert sep.input_output_separator(True) == '||||'
|
||||
@@ -0,0 +1,52 @@
|
||||
"""Test of the phonemizer.utils module"""
|
||||
|
||||
# pylint: disable=missing-docstring
|
||||
import os
|
||||
|
||||
from phonemizer.utils import chunks, cumsum, str2list, list2str
|
||||
|
||||
|
||||
def test_cumsum():
|
||||
assert cumsum([]) == []
|
||||
assert cumsum([0]) == [0]
|
||||
assert cumsum([1, 2, 3]) == [1, 3, 6]
|
||||
|
||||
|
||||
def test_list2str():
|
||||
assert list2str('') == ''
|
||||
assert list2str([]) == ''
|
||||
assert list2str(['']) == ''
|
||||
assert list2str(['abc']) == 'abc'
|
||||
assert list2str(['a', 'b', 'c']) == os.linesep.join('abc')
|
||||
|
||||
|
||||
def test_str2list():
|
||||
assert str2list('') == ['']
|
||||
assert str2list('a') == ['a']
|
||||
assert str2list('ab') == ['ab']
|
||||
assert str2list('a b') == ['a b']
|
||||
assert str2list(f'a{os.linesep}b') == ['a', 'b']
|
||||
assert str2list(
|
||||
f'a{os.linesep}{os.linesep}b{os.linesep}') == ['a', '', 'b']
|
||||
|
||||
|
||||
def test_chunks():
|
||||
for i in range(1, 5):
|
||||
assert chunks(['a'], i) == ([['a']], [0])
|
||||
|
||||
assert chunks(['a', 'a'], 1) == ([['a', 'a']], [0])
|
||||
assert chunks(['a', 'a'], 2) == ([['a'], ['a']], [0, 1])
|
||||
assert chunks(['a', 'a'], 10) == ([['a'], ['a']], [0, 1])
|
||||
|
||||
assert chunks(['a', 'a', 'a'], 1) == ([['a', 'a', 'a']], [0])
|
||||
assert chunks(['a', 'a', 'a'], 2) == ([['a'], ['a', 'a']], [0, 1])
|
||||
assert chunks(['a', 'a', 'a'], 3) == ([['a'], ['a'], ['a']], [0, 1, 2])
|
||||
assert chunks(['a', 'a', 'a'], 10) == ([['a'], ['a'], ['a']], [0, 1, 2])
|
||||
|
||||
assert chunks(['a', 'a', 'a', 'a'], 1) == ([['a', 'a', 'a', 'a']], [0])
|
||||
assert chunks(['a', 'a', 'a', 'a'], 2) == (
|
||||
[['a', 'a'], ['a', 'a']], [0, 2])
|
||||
assert chunks(['a', 'a', 'a', 'a'], 3) == (
|
||||
[['a'], ['a'], ['a', 'a']], [0, 1, 2])
|
||||
assert chunks(['a', 'a', 'a', 'a'], 10) == (
|
||||
[['a'], ['a'], ['a'], ['a']], [0, 1, 2, 3])
|
||||
Reference in New Issue
Block a user