# Copyright 2015-2021 Thomas Schatz, Xuan Nga Cao, Mathieu Bernard # # This file is part of phonemizer: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # Phonemizer is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with phonemizer. If not, see . """Test of the phonemizer.phonemize function""" # pylint: disable=missing-docstring import os import pytest from phonemizer.phonemize import phonemize from phonemizer.separator import Separator from phonemizer.backend import EspeakBackend, EspeakMbrolaBackend def test_bad_backend(): with pytest.raises(RuntimeError): phonemize('', backend='fetiv') with pytest.raises(RuntimeError): phonemize('', backend='foo') with pytest.raises(RuntimeError): phonemize('', tie=True, backend='festival') with pytest.raises(RuntimeError): phonemize('', tie=True, backend='mbrola') with pytest.raises(RuntimeError): phonemize('', tie=True, backend='segments') with pytest.raises(RuntimeError): phonemize( '', tie=True, backend='espeak', separator=Separator(' ', None, '-')) def test_bad_language(): with pytest.raises(RuntimeError): phonemize('', language='fr-fr', backend='festival') with pytest.raises(RuntimeError): phonemize('', language='ffr', backend='espeak') with pytest.raises(RuntimeError): phonemize('', language='/path/to/nonexisting/file', backend='segments') with pytest.raises(RuntimeError): phonemize('', language='creep', backend='segments') def test_text_type(): text1 = ['one two', 'three', 'four five'] text2 = os.linesep.join(text1) phn1 = phonemize(text1, language='en-us', backend='espeak', strip=True) phn2 = phonemize(text2, language='en-us', backend='espeak', strip=True) out3 = phonemize(text2, language='en-us', backend='espeak', strip=True, prepend_text=True) text3 = [o[0] for o in out3] phn3 = [o[1] for o in out3] assert isinstance(phn1, list) assert isinstance(phn2, str) assert os.linesep.join(phn1) == phn2 assert os.linesep.join(phn3) == phn2 assert text3 == text1 @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='language switch only exists for espeak-ng') def test_lang_switch(): text = ['bonjour apple', 'bonjour toi'] out = phonemize( text, language='fr-fr', backend='espeak', prepend_text=True, language_switch='remove-utterance') assert out == [('bonjour apple', ''), ('bonjour toi', 'bɔ̃ʒuʁ twa ')] @pytest.mark.parametrize('njobs', [2, 4]) def test_espeak(njobs): text = ['one two', 'three', 'four five'] out = phonemize( text, language='en-us', backend='espeak', strip=True, njobs=njobs) assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv'] out = phonemize( ' '.join(text), language='en-us', backend='espeak', strip=False, njobs=njobs) assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv ']) out = phonemize( os.linesep.join(text), language='en-us', backend='espeak', strip=False, njobs=njobs) assert out == os.linesep.join(['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv ']) @pytest.mark.skipif( not EspeakMbrolaBackend.is_available() or not EspeakMbrolaBackend.is_supported_language('mb-fr1'), reason='mbrola or mb-fr1 voice not installed') @pytest.mark.parametrize('njobs', [2, 4]) def test_espeak_mbrola(caplog, njobs): text = ['un deux', 'trois', 'quatre cinq'] out = phonemize( text, language='mb-fr1', backend='espeak-mbrola', njobs=njobs, preserve_punctuation=True) assert out == ['9~d2', 'tRwa', 'katRse~k'] messages = [msg[2] for msg in caplog.record_tuples] assert 'espeak-mbrola backend cannot preserve punctuation' in messages assert 'espeak-mbrola backend cannot preserve word separation' in messages @pytest.mark.parametrize('njobs', [2, 4]) def test_festival(njobs): text = ['one two', 'three', 'four five'] out = phonemize( text, language='en-us', backend='festival', strip=False, njobs=njobs) assert out == ['wahn tuw ', 'thriy ', 'faor fayv '] out = phonemize( ' '.join(text), language='en-us', backend='festival', strip=True, njobs=njobs) assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv']) out = phonemize( os.linesep.join(text), language='en-us', backend='festival', strip=True, njobs=njobs) assert out == os.linesep.join(['wahn tuw', 'thriy', 'faor fayv']) def test_festival_bad(): # cannot use options valid for espeak only text = ['one two', 'three', 'four five'] with pytest.raises(RuntimeError): phonemize( text, language='en-us', backend='festival', with_stress=True) with pytest.raises(RuntimeError): phonemize( text, language='en-us', backend='festival', language_switch='remove-flags') @pytest.mark.parametrize('njobs', [2, 4]) def test_segments(njobs): # one two three four five in Maya Yucatec text = ['untuʼuleʼ kaʼapʼeʼel', 'oʼoxpʼeʼel', 'kantuʼuloʼon chincho'] out = phonemize( text, language='yucatec', backend='segments', strip=False, njobs=njobs) assert out == [ 'untṵːlḛ ka̰ːpʼḛːl ', 'o̰ːʃpʼḛːl ', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo '] out = phonemize( ' '.join(text), language='yucatec', backend='segments', strip=False, njobs=njobs) assert out == ' '.join( ['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo ']) out = phonemize( os.linesep.join(text), language='yucatec', backend='segments', strip=True, njobs=njobs) assert out == os.linesep.join( ['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo']) @pytest.mark.parametrize( 'backend, empty_lines, punctuation, prepend_text, text, expected', [ ('espeak', False, False, False, ['hello world!', '', 'goodbye'], ['həloʊ wɜːld ', 'ɡʊdbaɪ ']), ('espeak', False, True, False, ['hello world!', '', 'goodbye'], ['həloʊ wɜːld! ', 'ɡʊdbaɪ ']), ('espeak', True, False, False, ['hello world!', '', 'goodbye'], ['həloʊ wɜːld ', '', 'ɡʊdbaɪ ']), ('espeak', True, True, False, ['hello world!', '', 'goodbye'], ['həloʊ wɜːld! ', '', 'ɡʊdbaɪ ']), ('segments', False, False, False, ['achi acho?', '', 'achi acho'], [u'ʌtʃɪ ʌtʃʊ ', u'ʌtʃɪ ʌtʃʊ ']), ('segments', False, True, False, ['achi acho?', '', 'achi acho'], [u'ʌtʃɪ ʌtʃʊ? ', u'ʌtʃɪ ʌtʃʊ ']), ('segments', True, False, False, ['achi acho?', '', 'achi acho'], [u'ʌtʃɪ ʌtʃʊ ', '', u'ʌtʃɪ ʌtʃʊ ']), ('segments', True, True, False, ['achi acho?', '', 'achi acho'], [u'ʌtʃɪ ʌtʃʊ? ', '', u'ʌtʃɪ ʌtʃʊ ']), ('festival', False, False, False, ['hello world!', '', 'goodbye'], ['hhaxlow werld ', 'guhdbay ']), ('festival', False, True, False, ['hello world!', '', 'goodbye'], ['hhaxlow werld! ', 'guhdbay ']), ('festival', True, False, False, ['hello world!', '', 'goodbye'], ['hhaxlow werld ', '', 'guhdbay ']), ('festival', True, True, False, ['hello world!', '', 'goodbye'], ['hhaxlow werld! ', '', 'guhdbay ']), ('espeak', False, False, True, ['hello world!', '', 'goodbye'], [('hello world!', 'həloʊ wɜːld '), ('goodbye', 'ɡʊdbaɪ ')]), ('espeak', False, True, True, ['hello world!', '', 'goodbye'], [('hello world!', 'həloʊ wɜːld! '), ('goodbye', 'ɡʊdbaɪ ')]), ('espeak', True, False, True, ['hello world!', '', 'goodbye'], [('hello world!', 'həloʊ wɜːld '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]), ('espeak', True, True, True, ['hello world!', '', 'goodbye'], [('hello world!', 'həloʊ wɜːld! '), ('', ''), ('goodbye', 'ɡʊdbaɪ ')]), ('segments', False, False, True, ['achi acho?', '', 'achi acho'], [('achi acho?', 'ʌtʃɪ ʌtʃʊ '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]), ('segments', False, True, True, ['achi acho?', '', 'achi acho'], [('achi acho?', 'ʌtʃɪ ʌtʃʊ? '), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]), ('segments', True, False, True, ['achi acho?', '', 'achi acho'], [('achi acho?', u'ʌtʃɪ ʌtʃʊ '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]), ('segments', True, True, True, ['achi acho?', '', 'achi acho'], [('achi acho?', u'ʌtʃɪ ʌtʃʊ? '), ('', ''), ('achi acho', u'ʌtʃɪ ʌtʃʊ ')]), ('festival', False, False, True, ['hello world!', '', 'goodbye'], [('hello world!', 'hhaxlow werld '), ('goodbye', 'guhdbay ')]), ('festival', False, True, True, ['hello world!', '', 'goodbye'], [('hello world!', 'hhaxlow werld! '), ('goodbye', 'guhdbay ')]), ('festival', True, False, True, ['hello world!', '', 'goodbye'], [('hello world!', 'hhaxlow werld '), ('', ''), ('goodbye', 'guhdbay ')]), ('festival', True, True, True, ['hello world!', '', 'goodbye'], [('hello world!', 'hhaxlow werld! '), ('', ''), ('goodbye', 'guhdbay ')])]) def test_preserve_empty_lines(backend, empty_lines, punctuation, prepend_text, text, expected): language = 'cree' if backend == 'segments' else 'en-us' assert expected == phonemize( text, language=language, backend=backend, prepend_text=prepend_text, preserve_punctuation=punctuation, preserve_empty_lines=empty_lines) @pytest.mark.parametrize( 'backend, empty_lines, punctuation, text, expected', [ ('espeak', False, False, [''], []), ('espeak', False, True, [''], []), ('espeak', True, False, [''], ['']), ('espeak', True, True, [''], ['']), ('segments', False, False, [''], []), ('segments', False, True, [''], []), ('segments', True, False, [''], ['']), ('segments', True, True, [''], ['']), ('festival', False, False, [''], []), ('festival', False, True, [''], []), ('festival', True, False, [''], ['']), ('festival', True, True, [''], [''])]) def test_empty_input(backend, empty_lines, punctuation, text, expected): language = 'cree' if backend == 'segments' else 'en-us' assert expected == phonemize( text, language=language, backend=backend, preserve_punctuation=punctuation, preserve_empty_lines=empty_lines)