Files
blender-portable-repo/extensions/.local/lib/python3.11/site-packages/test/test_phonemize.py
T
2026-03-17 14:58:51 -06:00

292 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Test of the phonemizer.phonemize function"""
# pylint: disable=missing-docstring
import os
import pytest
from phonemizer.phonemize import phonemize
from phonemizer.separator import Separator
from phonemizer.backend import EspeakBackend, EspeakMbrolaBackend
def test_bad_backend():
with pytest.raises(RuntimeError):
phonemize('', backend='fetiv')
with pytest.raises(RuntimeError):
phonemize('', backend='foo')
with pytest.raises(RuntimeError):
phonemize('', tie=True, backend='festival')
with pytest.raises(RuntimeError):
phonemize('', tie=True, backend='mbrola')
with pytest.raises(RuntimeError):
phonemize('', tie=True, backend='segments')
with pytest.raises(RuntimeError):
phonemize(
'', tie=True, backend='espeak',
separator=Separator(' ', None, '-'))
def test_bad_language():
with pytest.raises(RuntimeError):
phonemize('', language='fr-fr', backend='festival')
with pytest.raises(RuntimeError):
phonemize('', language='ffr', backend='espeak')
with pytest.raises(RuntimeError):
phonemize('', language='/path/to/nonexisting/file', backend='segments')
with pytest.raises(RuntimeError):
phonemize('', language='creep', backend='segments')
def test_text_type():
text1 = ['one two', 'three', 'four five']
text2 = os.linesep.join(text1)
phn1 = phonemize(text1, language='en-us', backend='espeak', strip=True)
phn2 = phonemize(text2, language='en-us', backend='espeak', strip=True)
out3 = phonemize(text2, language='en-us', backend='espeak', strip=True,
prepend_text=True)
text3 = [o[0] for o in out3]
phn3 = [o[1] for o in out3]
assert isinstance(phn1, list)
assert isinstance(phn2, str)
assert os.linesep.join(phn1) == phn2
assert os.linesep.join(phn3) == phn2
assert text3 == text1
@pytest.mark.skipif(
not EspeakBackend.is_espeak_ng(),
reason='language switch only exists for espeak-ng')
def test_lang_switch():
text = ['bonjour apple', 'bonjour toi']
out = phonemize(
text,
language='fr-fr',
backend='espeak',
prepend_text=True,
language_switch='remove-utterance')
assert out == [('bonjour apple', ''), ('bonjour toi', 'bɔ̃ʒuʁ twa ')]
@pytest.mark.parametrize('njobs', [2, 4])
def test_espeak(njobs):
text = ['one two', 'three', 'four five']
out = phonemize(
text, language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv']
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv '])
out = phonemize(
os.linesep.join(text), language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == os.linesep.join(['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv '])
@pytest.mark.skipif(
not EspeakMbrolaBackend.is_available() or
not EspeakMbrolaBackend.is_supported_language('mb-fr1'),
reason='mbrola or mb-fr1 voice not installed')
@pytest.mark.parametrize('njobs', [2, 4])
def test_espeak_mbrola(caplog, njobs):
text = ['un deux', 'trois', 'quatre cinq']
out = phonemize(
text,
language='mb-fr1',
backend='espeak-mbrola',
njobs=njobs,
preserve_punctuation=True)
assert out == ['9~d2', 'tRwa', 'katRse~k']
messages = [msg[2] for msg in caplog.record_tuples]
assert 'espeak-mbrola backend cannot preserve punctuation' in messages
assert 'espeak-mbrola backend cannot preserve word separation' in messages
@pytest.mark.parametrize('njobs', [2, 4])
def test_festival(njobs):
text = ['one two', 'three', 'four five']
out = phonemize(
text, language='en-us', backend='festival',
strip=False, njobs=njobs)
assert out == ['wahn tuw ', 'thriy ', 'faor fayv ']
out = phonemize(
' '.join(text), language='en-us', backend='festival',
strip=True, njobs=njobs)
assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv'])
out = phonemize(
os.linesep.join(text), language='en-us', backend='festival',
strip=True, njobs=njobs)
assert out == os.linesep.join(['wahn tuw', 'thriy', 'faor fayv'])
def test_festival_bad():
# cannot use options valid for espeak only
text = ['one two', 'three', 'four five']
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', with_stress=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival',
language_switch='remove-flags')
@pytest.mark.parametrize('njobs', [2, 4])
def test_segments(njobs):
# one two three four five in Maya Yucatec
text = ['untuʼuleʼ kaʼapʼeʼel', 'oʼoxpʼeʼel', 'kantuʼuloʼon chincho']
out = phonemize(
text, language='yucatec', backend='segments',
strip=False, njobs=njobs)
assert out == [
'untṵːlḛ ka̰ːpʼḛːl ', 'o̰ːʃpʼḛːl ', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo ']
out = phonemize(
' '.join(text), language='yucatec', backend='segments',
strip=False, njobs=njobs)
assert out == ' '.join(
['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo '])
out = phonemize(
os.linesep.join(text), language='yucatec', backend='segments',
strip=True, njobs=njobs)
assert out == os.linesep.join(
['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo'])
@pytest.mark.parametrize(
'backend, empty_lines, punctuation, prepend_text, text, expected', [
('espeak', False, False, False,
['hello world!', '', 'goodbye'],
['həloʊ wɜːld ', 'ɡʊdbaɪ ']),
('espeak', False, True, False,
['hello world!', '', 'goodbye'],
['həloʊ wɜːld! ', 'ɡʊdbaɪ ']),
('espeak', True, False, False,
['hello world!', '', 'goodbye'],
['həloʊ wɜːld ', '', 'ɡʊdbaɪ ']),
('espeak', True, True, False,
['hello world!', '', 'goodbye'],
['həloʊ wɜːld! ', '', 'ɡʊdbaɪ ']),
('segments', False, False, False,
['achi acho?', '', 'achi acho'],
[u'ʌtʃɪ ʌtʃʊ ', u'ʌtʃɪ ʌtʃʊ ']),
('segments', False, True, False,
['achi acho?', '', 'achi acho'],
[u'ʌtʃɪ ʌtʃʊ? ', u'ʌtʃɪ ʌtʃʊ ']),
('segments', True, False, False,
['achi acho?', '', 'achi acho'],
[u'ʌtʃɪ ʌtʃʊ ', '', u'ʌtʃɪ ʌtʃʊ ']),
('segments', True, True, False,
['achi acho?', '', 'achi acho'],
[u'ʌtʃɪ ʌtʃʊ? ', '', u'ʌtʃɪ ʌtʃʊ ']),
('festival', False, False, False,
['hello world!', '', 'goodbye'],
['hhaxlow werld ', 'guhdbay ']),
('festival', False, True, False,
['hello world!', '', 'goodbye'],
['hhaxlow werld! ', 'guhdbay ']),
('festival', True, False, False,
['hello world!', '', 'goodbye'],
['hhaxlow werld ', '', 'guhdbay ']),
('festival', True, True, False,
['hello world!', '', 'goodbye'],
['hhaxlow werld! ', '', 'guhdbay ']),
('espeak', False, False, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'həloʊ wɜːld '), ('goodbye', 'ɡʊdbaɪ ')]),
('espeak', False, True, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'həloʊ wɜːld! '), ('goodbye', 'ɡʊdbaɪ ')]),
('espeak', True, False, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'həloʊ wɜːld '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]),
('espeak', True, True, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'həloʊ wɜːld! '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]),
('segments', False, False, True,
['achi acho?', '', 'achi acho'],
[('achi acho?', 'ʌtʃɪ ʌtʃʊ '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
('segments', False, True, True,
['achi acho?', '', 'achi acho'],
[('achi acho?', 'ʌtʃɪ ʌtʃʊ? '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
('segments', True, False, True,
['achi acho?', '', 'achi acho'],
[('achi acho?', u'ʌtʃɪ ʌtʃʊ '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
('segments', True, True, True,
['achi acho?', '', 'achi acho'],
[('achi acho?', u'ʌtʃɪ ʌtʃʊ? '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]),
('festival', False, False, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'hhaxlow werld '), ('goodbye', 'guhdbay ')]),
('festival', False, True, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'hhaxlow werld! '), ('goodbye', 'guhdbay ')]),
('festival', True, False, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'hhaxlow werld '), ('', ''), ('goodbye', 'guhdbay ')]),
('festival', True, True, True,
['hello world!', '', 'goodbye'],
[('hello world!', 'hhaxlow werld! '), ('', ''), ('goodbye', 'guhdbay ')])])
def test_preserve_empty_lines(backend, empty_lines, punctuation, prepend_text, text, expected):
language = 'cree' if backend == 'segments' else 'en-us'
assert expected == phonemize(
text, language=language, backend=backend, prepend_text=prepend_text,
preserve_punctuation=punctuation, preserve_empty_lines=empty_lines)
@pytest.mark.parametrize(
'backend, empty_lines, punctuation, text, expected', [
('espeak', False, False, [''], []),
('espeak', False, True, [''], []),
('espeak', True, False, [''], ['']),
('espeak', True, True, [''], ['']),
('segments', False, False, [''], []),
('segments', False, True, [''], []),
('segments', True, False, [''], ['']),
('segments', True, True, [''], ['']),
('festival', False, False, [''], []),
('festival', False, True, [''], []),
('festival', True, False, [''], ['']),
('festival', True, True, [''], [''])])
def test_empty_input(backend, empty_lines, punctuation, text, expected):
language = 'cree' if backend == 'segments' else 'en-us'
assert expected == phonemize(
text, language=language, backend=backend,
preserve_punctuation=punctuation, preserve_empty_lines=empty_lines)