Spaces:
Running
Running
Commit
Β·
9cb346b
1
Parent(s):
0768c8a
(wip)debug
Browse files
tts.py
CHANGED
|
@@ -3,6 +3,8 @@ from dotenv import load_dotenv
|
|
| 3 |
import random
|
| 4 |
from gradio_client import Client, handle_file,file
|
| 5 |
from huggingface_hub.constants import HF_TOKEN_PATH
|
|
|
|
|
|
|
| 6 |
|
| 7 |
load_dotenv()
|
| 8 |
|
|
@@ -151,6 +153,23 @@ def predict_gpt_sovits_v2(text, user_token=None,reference_audio_path=None):
|
|
| 151 |
return result
|
| 152 |
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
def predict_tts(text, model, user_token=None, reference_audio_path=None):
|
| 155 |
print(f"Predicting TTS for {model}, user_token: {user_token}, reference_audio_path: {reference_audio_path}")
|
| 156 |
# Exceptions: special models that shouldn't be passed to the router
|
|
@@ -166,7 +185,10 @@ def predict_tts(text, model, user_token=None, reference_audio_path=None):
|
|
| 166 |
result = predict_gpt_sovits_v2(text, user_token, reference_audio_path)
|
| 167 |
else:
|
| 168 |
raise ValueError(f"Model {model} not found")
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
if __name__ == "__main__":
|
| 172 |
-
pass
|
|
|
|
| 3 |
import random
|
| 4 |
from gradio_client import Client, handle_file,file
|
| 5 |
from huggingface_hub.constants import HF_TOKEN_PATH
|
| 6 |
+
from pydub import AudioSegment
|
| 7 |
+
import os.path
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
|
|
|
| 153 |
return result
|
| 154 |
|
| 155 |
|
| 156 |
+
def normalize_audio_volume(audio_path):
|
| 157 |
+
"""ζε€§ει³ι’ι³ι"""
|
| 158 |
+
# θ·εζδ»Άζ©ε±ε
|
| 159 |
+
file_name, ext = os.path.splitext(audio_path)
|
| 160 |
+
normalized_path = f"{file_name}_normalized{ext}"
|
| 161 |
+
|
| 162 |
+
# θ―»ει³ι’ζδ»Ά
|
| 163 |
+
sound = AudioSegment.from_file(audio_path)
|
| 164 |
+
|
| 165 |
+
# ζε€§ει³ι (ζ εε)
|
| 166 |
+
normalized_sound = sound.normalize()
|
| 167 |
+
|
| 168 |
+
# δΏεε€ηεηι³ι’
|
| 169 |
+
normalized_sound.export(normalized_path, format=ext.replace('.', ''))
|
| 170 |
+
|
| 171 |
+
return normalized_path
|
| 172 |
+
|
| 173 |
def predict_tts(text, model, user_token=None, reference_audio_path=None):
|
| 174 |
print(f"Predicting TTS for {model}, user_token: {user_token}, reference_audio_path: {reference_audio_path}")
|
| 175 |
# Exceptions: special models that shouldn't be passed to the router
|
|
|
|
| 185 |
result = predict_gpt_sovits_v2(text, user_token, reference_audio_path)
|
| 186 |
else:
|
| 187 |
raise ValueError(f"Model {model} not found")
|
| 188 |
+
|
| 189 |
+
# ε―Ήηζηι³ι’θΏθ‘ι³ιζε€§εε€η
|
| 190 |
+
normalized_result = normalize_audio_volume(result)
|
| 191 |
+
return normalized_result
|
| 192 |
|
| 193 |
if __name__ == "__main__":
|
| 194 |
+
pass
|