Files
blender-portable-repo/extensions/.local/lib/python3.11/site-packages/phonemizer/separator.py
T
2026-03-17 14:58:51 -06:00

119 lines
4.1 KiB
Python

# Copyright 2015-2021 Mathieu Bernard
#
# This file is part of phonemizer: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# Phonemizer is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with phonemizer. If not, see <http://www.gnu.org/licenses/>.
"""Provides the Separator tuple and its default value"""
from typing import Optional, Union
class Separator:
"""Defines phone, syllable and word boundary tokens"""
def __init__(self, word: str = ' ',
syllable: Optional[str] = None,
phone: Optional[str] = None):
# check we have different separators, None excluded
sep1 = list(sep for sep in (phone, syllable, word) if sep)
sep2 = set(sep for sep in (phone, syllable, word) if sep)
if len(sep1) != len(sep2):
raise ValueError(
'illegal separator with word="{}", syllable="{}" and '
'phone="{}", must be all differents if not empty'
.format(phone, syllable, word))
self._phone = str(phone) if phone else ''
self._syllable = str(syllable) if syllable else ''
self._word = str(word) if word else ''
def __eq__(self, other: 'Separator'):
return (
self.phone == other.phone
and self.syllable == other.syllable
and self.word == other.word)
def __str__(self):
return (
f'(phone: "{self.phone}", '
f'syllable: "{self.syllable}", '
f'word: "{self.word}")')
@property
def phone(self):
"""Phones separator"""
return self._phone
@property
def syllable(self):
"""Syllables separator"""
return self._syllable
@property
def word(self):
"""Words separator"""
return self._word
def __contains__(self, value: str):
"""Returns True if the separator has `value` as token separation"""
return value in {self.phone, self.syllable, self.word}
def input_output_separator(self, field_separator: Union[str, bool]) \
-> Union[str, bool]:
"""Returns a suitable input/output separator based on token separator
The input/output separator split orthographic and phonetic texts when
using the --prepend-text option from command-line.
Parameters
----------
field_separator: bool or str
If str, ensures it's value is not
already defined as a token separator. If True choose one of "|",
"||", "|||", "||||" (the first one that is not defined as a token
separator)
Returns
-------
The input/output separator, or False if ``field_separator`` is False
Raises
------
RuntimeError
if ``field_separator`` is a str but is already registered as token separator
"""
if not field_separator:
return False
if isinstance(field_separator, str):
if field_separator in self:
raise RuntimeError(
f'cannot prepend input with "{field_separator}" because '
f'it is already a token separator: {self}')
return field_separator
if field_separator is True:
field_separator = '|'
while field_separator in self:
field_separator += '|'
return field_separator
# not a bool nor a str
raise RuntimeError(
'invalid input/output separator, must be bool or str but is'
f'{field_separator}')
default_separator = Separator(phone='', syllable='', word=' ')
"""The default separation characters for phonemes, syllables and words"""