# Copyright 2015-2021 Mathieu Bernard # # This file is part of phonemizer: you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # Phonemizer is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with phonemizer. If not, see . """Test of the segments backend""" # pylint: disable=missing-docstring import os import pytest from phonemizer.separator import Separator, default_separator from phonemizer.backend import SegmentsBackend from phonemizer.utils import get_package_resource def test_multiline(): backend = SegmentsBackend('cree') assert backend.language == 'cree' assert backend.phonemize(['a']) == [u'ʌ '] assert backend.phonemize(['aa']) == [u'ʌʌ '] assert backend.phonemize(['a\n']) == [u'ʌ '] assert backend.phonemize(['a\na']) == [u'ʌ ʌ '] assert backend.phonemize(['a\na\n']) == [u'ʌ ʌ '] assert backend.phonemize(['a', 'a']) == [u'ʌ ', 'ʌ '] assert backend.phonemize(['a\n', 'a\n']) == [u'ʌ ', 'ʌ '] def test_bad_morpheme(): backend = SegmentsBackend('cree') with pytest.raises(ValueError): backend.phonemize(['A']) def test_separator(): backend = SegmentsBackend('cree') text = ['achi acho'] sep = default_separator assert backend.phonemize(text, separator=sep) == [u'ʌtʃɪ ʌtʃʊ '] assert backend.phonemize(text, separator=sep, strip=True) == [u'ʌtʃɪ ʌtʃʊ'] def test_separator_2(): backend = SegmentsBackend('cree') text = ['achi acho'] sep = Separator(word='_', phone=' ') assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ _ʌ tʃ ʊ _'] assert backend.phonemize(text, separator=sep, strip=True) \ == [u'ʌ tʃ ɪ_ʌ tʃ ʊ'] def test_separator_3(): backend = SegmentsBackend('cree') text = ['achi acho'] sep = Separator(word=' ', syllable=None, phone='_') assert backend.phonemize(text, separator=sep) == [u'ʌ_tʃ_ɪ_ ʌ_tʃ_ʊ_ '] assert backend.phonemize(text, separator=sep, strip=True) \ == [u'ʌ_tʃ_ɪ ʌ_tʃ_ʊ'] def test_separator_4(): backend = SegmentsBackend('cree') text = ['achi acho'] # TODO bug when sep.phone == ' ' with no sep.word sep = Separator(phone=' ', word='') assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ ʌ tʃ ʊ '] assert backend.phonemize(text, separator=sep, strip=True) \ == [u'ʌ tʃ ɪʌ tʃ ʊ'] def test_separator_5(): backend = SegmentsBackend('cree') text = ['achi acho'] sep = Separator(phone=' ', word='_') assert backend.phonemize(text, separator=sep) == [u'ʌ tʃ ɪ _ʌ tʃ ʊ _'] assert backend.phonemize(text, separator=sep, strip=True) \ == [u'ʌ tʃ ɪ_ʌ tʃ ʊ'] def test_language(tmpdir): # check languages by name assert SegmentsBackend.is_supported_language('cree') assert not SegmentsBackend.is_supported_language('unexisting') # check languages by g2p file directory = get_package_resource('segments') assert SegmentsBackend.is_supported_language( os.path.join(directory, 'cree.g2p')) assert not SegmentsBackend.is_supported_language( os.path.join(directory, 'cree')) assert not SegmentsBackend.is_supported_language( os.path.join(directory, 'unexisting.g2p')) # bad syntax in g2p file g2p = tmpdir.join('foo.g2p') g2p.write('\n'.join(['a a', 'b b b', 'c'])) assert not SegmentsBackend.is_supported_language(g2p)