|
|
import re |
|
|
|
|
|
from textstat import textstat |
|
|
|
|
|
|
|
|
def contains_chinese(text): |
|
|
|
|
|
if re.search(r'[\u4e00-\u9fff0-9]', text): |
|
|
return True |
|
|
return False |
|
|
|
|
|
|
|
|
def get_text_syllable_num(text): |
|
|
chinese_char_pattern = re.compile(r'[\u4e00-\u9fff]') |
|
|
number_char_pattern = re.compile(r'[0-9]') |
|
|
syllable_num = 0 |
|
|
tokens = re.findall(r'[\u4e00-\u9fff]+|[a-zA-Z]+|[0-9]+', text) |
|
|
|
|
|
if contains_chinese(text): |
|
|
for token in tokens: |
|
|
if chinese_char_pattern.search(token) or number_char_pattern.search(token): |
|
|
syllable_num += len(token) |
|
|
else: |
|
|
syllable_num += textstat.syllable_count(token) |
|
|
else: |
|
|
syllable_num = textstat.syllable_count(text) |
|
|
|
|
|
return syllable_num |
|
|
|
|
|
|
|
|
def get_text_tts_dur(text): |
|
|
min_speed = 3 |
|
|
max_speed = 5.50 |
|
|
|
|
|
ratio = 0.8517 if contains_chinese(text) else 1.0 |
|
|
|
|
|
syllable_num = get_text_syllable_num(text) |
|
|
max_dur = syllable_num * ratio / max_speed |
|
|
min_dur = syllable_num * ratio / min_speed |
|
|
|
|
|
return max_dur, min_dur |