Update app.py
Browse files
app.py
CHANGED
|
@@ -283,8 +283,9 @@ if __name__ == "__main__":
|
|
| 283 |
label="Optional Prefix Audio (continue from this audio)",
|
| 284 |
type="filepath",
|
| 285 |
)
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
with gr.Column(scale=3):
|
| 289 |
cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
|
| 290 |
min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
|
|
@@ -304,8 +305,8 @@ if __name__ == "__main__":
|
|
| 304 |
value=False,
|
| 305 |
info="Note; this is not a pre-processing step, it is a conditioning value that the model understands. Check this box if your input audio is noisy."
|
| 306 |
)
|
| 307 |
-
|
| 308 |
-
|
| 309 |
|
| 310 |
def on_enhanced_change(use_enhance: bool) -> Dict[str, Any]:
|
| 311 |
update_dict = {"enabled": not use_enhance}
|
|
@@ -322,6 +323,7 @@ if __name__ == "__main__":
|
|
| 322 |
-1200, 1200, -44.99, 0.01, label="Speaker Pitch Shift (Cents)",
|
| 323 |
info="A pitch shift to apply to speaker audio before extracting embeddings. A slight down-shift of ~45 cents tends to produce a more accurate voice cloning."
|
| 324 |
)
|
|
|
|
| 325 |
speaker_audio = gr.Audio(
|
| 326 |
label="Optional Speaker Audio (for cloning)",
|
| 327 |
type="filepath",
|
|
@@ -445,10 +447,10 @@ if __name__ == "__main__":
|
|
| 445 |
emotion_uncond,
|
| 446 |
speaker_uncond,
|
| 447 |
speaker_pitch_shift,
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
enhanced_checkbox,
|
| 453 |
],
|
| 454 |
outputs=[output_audio, seed_number],
|
|
|
|
| 283 |
label="Optional Prefix Audio (continue from this audio)",
|
| 284 |
type="filepath",
|
| 285 |
)
|
| 286 |
+
prefix_equalize_checkbox = gr.Checkbox(label="Equalize Prefix Audio", value=True)
|
| 287 |
+
prefix_enhance_checkbox = gr.Checkbox(label="Enhance Prefix Audio with DeepFilterNet", value=True)
|
| 288 |
+
|
| 289 |
with gr.Column(scale=3):
|
| 290 |
cfg_scale_slider = gr.Slider(1.0, 5.0, 2.0, 0.1, label="CFG Scale")
|
| 291 |
min_p_slider = gr.Slider(0.0, 1.0, 0.15, 0.01, label="Min P")
|
|
|
|
| 305 |
value=False,
|
| 306 |
info="Note; this is not a pre-processing step, it is a conditioning value that the model understands. Check this box if your input audio is noisy."
|
| 307 |
)
|
| 308 |
+
speaker_equalize_checkbox = gr.Checkbox(label="Equalize Speaker Audio", value=True)
|
| 309 |
+
speaker_enhance_checkbox = gr.Checkbox(label="Enhance Speaker Audio with DeepFilterNet", value=True)
|
| 310 |
|
| 311 |
def on_enhanced_change(use_enhance: bool) -> Dict[str, Any]:
|
| 312 |
update_dict = {"enabled": not use_enhance}
|
|
|
|
| 323 |
-1200, 1200, -44.99, 0.01, label="Speaker Pitch Shift (Cents)",
|
| 324 |
info="A pitch shift to apply to speaker audio before extracting embeddings. A slight down-shift of ~45 cents tends to produce a more accurate voice cloning."
|
| 325 |
)
|
| 326 |
+
|
| 327 |
speaker_audio = gr.Audio(
|
| 328 |
label="Optional Speaker Audio (for cloning)",
|
| 329 |
type="filepath",
|
|
|
|
| 447 |
emotion_uncond,
|
| 448 |
speaker_uncond,
|
| 449 |
speaker_pitch_shift,
|
| 450 |
+
speaker_equalize_checkbox,
|
| 451 |
+
speaker_enhance_checkbox,
|
| 452 |
+
prefix_equalize_checkbox,
|
| 453 |
+
prefix_enhance_checkbox,
|
| 454 |
enhanced_checkbox,
|
| 455 |
],
|
| 456 |
outputs=[output_audio, seed_number],
|