vits-simple-api

Sleeping

App Files Files Community

Artrajz commited on Jul 6, 2023

Commit

bd5f307

1 Parent(s): ff648e9

text

Browse files

Files changed (14) hide show

text/__init__.py +23 -23
text/__pycache__/__init__.cpython-310.pyc +0 -0
text/__pycache__/cantonese.cpython-310.pyc +0 -0
text/__pycache__/cleaners.cpython-310.pyc +0 -0
text/__pycache__/english.cpython-310.pyc +0 -0
text/__pycache__/japanese.cpython-310.pyc +0 -0
text/__pycache__/korean.cpython-310.pyc +0 -0
text/__pycache__/mandarin.cpython-310.pyc +0 -0
text/__pycache__/ngu_dialect.cpython-310.pyc +0 -0
text/__pycache__/shanghainese.cpython-310.pyc +0 -0
text/cantonese.py +1 -19
text/cleaners.py +14 -0
text/mandarin.py +18 -3
text/shanghainese.py +1 -19

text/__init__.py CHANGED Viewed

@@ -2,31 +2,31 @@
 from text import cleaners
-def text_to_sequence(text, symbols, cleaner_names):
-  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
-    Args:
-      text: string to convert to a sequence
-      cleaner_names: names of the cleaner functions to run the text through
-    Returns:
-      List of integers corresponding to the symbols in the text
-  '''
-  _symbol_to_id = {s: i for i, s in enumerate(symbols)}
-  sequence = []
-  clean_text = _clean_text(text, cleaner_names)
-  for symbol in clean_text:
-    if symbol not in _symbol_to_id.keys():
-      continue
-    symbol_id = _symbol_to_id[symbol]
-    sequence += [symbol_id]
-  return sequence
 def _clean_text(text, cleaner_names):
-  for name in cleaner_names:
-    cleaner = getattr(cleaners, name)
-    if not cleaner:
-      raise Exception('Unknown cleaner: %s' % name)
-    text = cleaner(text)
-  return text

 from text import cleaners
+def text_to_sequence(text, symbols, cleaner_names, bert_embedding=False):
+    '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
+      Args:
+        text: string to convert to a sequence
+        cleaner_names: names of the cleaner functions to run the text through
+      Returns:
+        List of integers corresponding to the symbols in the text
+    '''
+    _symbol_to_id = {s: i for i, s in enumerate(symbols)}
+    if bert_embedding:
+        cleaned_text, char_embeds = _clean_text(text, cleaner_names)
+        sequence = [_symbol_to_id[symbol] for symbol in cleaned_text.split()]
+        return sequence, char_embeds
+    else:
+        cleaned_text = _clean_text(text, cleaner_names)
+        sequence = [_symbol_to_id[symbol] for symbol in cleaned_text if symbol in _symbol_to_id.keys()]
+        return sequence
 def _clean_text(text, cleaner_names):
+    for name in cleaner_names:
+        cleaner = getattr(cleaners, name)
+        if not cleaner:
+            raise Exception('Unknown cleaner: %s' % name)
+        text = cleaner(text)
+    return text

text/__pycache__/__init__.cpython-310.pyc DELETED Viewed

Binary file (1.21 kB)

text/__pycache__/cantonese.cpython-310.pyc DELETED Viewed

Binary file (2.34 kB)

text/__pycache__/cleaners.cpython-310.pyc DELETED Viewed

Binary file (11 kB)

text/__pycache__/english.cpython-310.pyc DELETED Viewed

Binary file (4.69 kB)

text/__pycache__/japanese.cpython-310.pyc DELETED Viewed

Binary file (4.13 kB)

text/__pycache__/korean.cpython-310.pyc DELETED Viewed

Binary file (5.58 kB)

text/__pycache__/mandarin.cpython-310.pyc DELETED Viewed

Binary file (6.53 kB)

text/__pycache__/ngu_dialect.cpython-310.pyc DELETED Viewed

Binary file (1.17 kB)

text/__pycache__/shanghainese.cpython-310.pyc DELETED Viewed

Binary file (2.51 kB)

text/cantonese.py CHANGED Viewed

@@ -35,25 +35,6 @@ _latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
     ('Z', 'iː˨sɛːt̚˥')
 ]]
-_symbols_to_chinese = [(re.compile(f'{x[0]}'), x[1]) for x in [
-    ('([0-9]+(?:\.?[0-9]+)?)%', r'百分之\1'),
-    ('([0-9]+)/([0-9]+)', r'\2分之\1'),
-    ('\+', r'加'),
-    ('([0-9]+)-([0-9]+)', r'\1减\2'),
-    ('×', r'乘以'),
-    ('([0-9]+)x([0-9]+)', r'\1乘以\2'),
-    ('([0-9]+)\*([0-9]+)', r'\1乘以\2'),
-    ('÷', r'除以'),
-    ('=', r'等于'),
-    ('≠', r'不等于'),
-]]
-def symbols_to_chinese(text):
-    for regex, replacement in _symbols_to_chinese:
-        text = re.sub(regex, replacement, text)
-    return text
 def number_to_cantonese(text):
     return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
@@ -66,6 +47,7 @@ def latin_to_ipa(text):
 def cantonese_to_ipa(text):
     text = symbols_to_chinese(text)
     text = number_to_cantonese(text.upper())
     text = converter.convert(text).replace('-', '').replace('$', ' ')

     ('Z', 'iː˨sɛːt̚˥')
 ]]
 def number_to_cantonese(text):
     return re.sub(r'\d+(?:\.?\d+)?', lambda x: cn2an.an2cn(x.group()), text)
 def cantonese_to_ipa(text):
+    from text.mandarin import symbols_to_chinese
     text = symbols_to_chinese(text)
     text = number_to_cantonese(text.upper())
     text = converter.convert(text).replace('-', '').replace('$', ' ')

text/cleaners.py CHANGED Viewed

@@ -247,3 +247,17 @@ def chinese_dialect_cleaners(text):
     text = re.sub(r'\s+$', '', text)
     text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
     return text

     text = re.sub(r'\s+$', '', text)
     text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
     return text
+def bert_chinese_cleaners(text):
+    from text import mandarin
+    matches = re.findall(r"\[ZH\](.*?)\[ZH\]", text)
+    text = "".join(matches)
+    if text[-1] not in [".", "。", ",", "，"]: text += "."
+    text = mandarin.symbols_to_chinese(text)
+    text = mandarin.number_transform_to_chinese(text)
+    if not hasattr(bert_chinese_cleaners, "tts_front"):
+        bert_chinese_cleaners.tts_front = mandarin.VITS_PinYin_model()
+    tts_front = bert_chinese_cleaners.tts_front
+    cleaned_text, char_embeds = tts_front.chinese_to_phonemes(text)
+    return cleaned_text, char_embeds

text/mandarin.py CHANGED Viewed

@@ -262,6 +262,11 @@ def number_to_chinese(text):
     return text
 def chinese_to_bopomofo(text):
     text = text.replace('、', '，').replace('；', '，').replace('：', '，')
     words = jieba.lcut(text, cut_all=False)
@@ -305,7 +310,7 @@ def bopomofo_to_ipa2(text):
 def chinese_to_romaji(text):
     text = symbols_to_chinese(text)
-    text = number_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_romaji(text)
@@ -326,7 +331,7 @@ def chinese_to_lazy_ipa(text):
 def chinese_to_ipa(text):
     text = symbols_to_chinese(text)
-    text = number_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_ipa(text)
@@ -340,7 +345,7 @@ def chinese_to_ipa(text):
 def chinese_to_ipa2(text):
     text = symbols_to_chinese(text)
-    text = number_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_ipa2(text)
@@ -349,3 +354,13 @@ def chinese_to_ipa2(text):
     text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text)
     text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text)
     return text

     return text
+def number_transform_to_chinese(text):
+    text = cn2an.transform(text, "an2cn")
+    return text
 def chinese_to_bopomofo(text):
     text = text.replace('、', '，').replace('；', '，').replace('：', '，')
     words = jieba.lcut(text, cut_all=False)
 def chinese_to_romaji(text):
     text = symbols_to_chinese(text)
+    text = number_transform_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_romaji(text)
 def chinese_to_ipa(text):
     text = symbols_to_chinese(text)
+    text = number_transform_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_ipa(text)
 def chinese_to_ipa2(text):
     text = symbols_to_chinese(text)
+    text = number_transform_to_chinese(text)
     text = chinese_to_bopomofo(text)
     text = latin_to_bopomofo(text)
     text = bopomofo_to_ipa2(text)
     text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text)
     text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text)
     return text
+def VITS_PinYin_model():
+    import torch
+    import config
+    from vits_pinyin import VITS_PinYin
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # pinyin
+    tts_front = VITS_PinYin(f"{config.ABS_PATH}/bert", device)
+    return tts_front

text/shanghainese.py CHANGED Viewed

@@ -35,25 +35,6 @@ _latin_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
     ('Z', 'zᴇ')
 ]]
-_symbols_to_chinese = [(re.compile(f'{x[0]}'), x[1]) for x in [
-    ('([0-9]+(?:\.?[0-9]+)?)%', r'百分之\1'),
-    ('([0-9]+)/([0-9]+)', r'\2分之\1'),
-    ('\+', r'加'),
-    ('([0-9]+)-([0-9]+)', r'\1减\2'),
-    ('×', r'乘以'),
-    ('([0-9]+)x([0-9]+)', r'\1乘以\2'),
-    ('([0-9]+)\*([0-9]+)', r'\1乘以\2'),
-    ('÷', r'除以'),
-    ('=', r'等于'),
-    ('≠', r'不等于'),
-]]
-def symbols_to_chinese(text):
-    for regex, replacement in _symbols_to_chinese:
-        text = re.sub(regex, replacement, text)
-    return text
 def _number_to_shanghainese(num):
     num = cn2an.an2cn(num).replace('一十', '十').replace('二十', '廿').replace('二', '两')
@@ -71,6 +52,7 @@ def latin_to_ipa(text):
 def shanghainese_to_ipa(text):
     text = symbols_to_chinese(text)
     text = number_to_shanghainese(text.upper())
     text = converter.convert(text).replace('-', '').replace('$', ' ')

     ('Z', 'zᴇ')
 ]]
 def _number_to_shanghainese(num):
     num = cn2an.an2cn(num).replace('一十', '十').replace('二十', '廿').replace('二', '两')
 def shanghainese_to_ipa(text):
+    from text.mandarin import symbols_to_chinese
     text = symbols_to_chinese(text)
     text = number_to_shanghainese(text.upper())
     text = converter.convert(text).replace('-', '').replace('$', ' ')