feat: worked vixtts
Browse files
app.py
CHANGED
|
@@ -6,7 +6,7 @@ import torch
|
|
| 6 |
import torchaudio
|
| 7 |
|
| 8 |
# download for mecab
|
| 9 |
-
|
| 10 |
|
| 11 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
@@ -394,159 +394,6 @@ Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh
|
|
| 394 |
article = """
|
| 395 |
|
| 396 |
"""
|
| 397 |
-
examples = [
|
| 398 |
-
[
|
| 399 |
-
"Once when I was six years old I saw a magnificent picture",
|
| 400 |
-
"en",
|
| 401 |
-
"examples/female.wav",
|
| 402 |
-
None,
|
| 403 |
-
False,
|
| 404 |
-
False,
|
| 405 |
-
False,
|
| 406 |
-
True,
|
| 407 |
-
],
|
| 408 |
-
[
|
| 409 |
-
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 410 |
-
"fr",
|
| 411 |
-
"examples/male.wav",
|
| 412 |
-
None,
|
| 413 |
-
False,
|
| 414 |
-
False,
|
| 415 |
-
False,
|
| 416 |
-
True,
|
| 417 |
-
],
|
| 418 |
-
[
|
| 419 |
-
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
| 420 |
-
"de",
|
| 421 |
-
"examples/female.wav",
|
| 422 |
-
None,
|
| 423 |
-
False,
|
| 424 |
-
False,
|
| 425 |
-
False,
|
| 426 |
-
True,
|
| 427 |
-
],
|
| 428 |
-
[
|
| 429 |
-
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 430 |
-
"es",
|
| 431 |
-
"examples/male.wav",
|
| 432 |
-
None,
|
| 433 |
-
False,
|
| 434 |
-
False,
|
| 435 |
-
False,
|
| 436 |
-
True,
|
| 437 |
-
],
|
| 438 |
-
[
|
| 439 |
-
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
| 440 |
-
"pt",
|
| 441 |
-
"examples/female.wav",
|
| 442 |
-
None,
|
| 443 |
-
False,
|
| 444 |
-
False,
|
| 445 |
-
False,
|
| 446 |
-
True,
|
| 447 |
-
],
|
| 448 |
-
[
|
| 449 |
-
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 450 |
-
"pl",
|
| 451 |
-
"examples/male.wav",
|
| 452 |
-
None,
|
| 453 |
-
False,
|
| 454 |
-
False,
|
| 455 |
-
False,
|
| 456 |
-
True,
|
| 457 |
-
],
|
| 458 |
-
[
|
| 459 |
-
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
| 460 |
-
"it",
|
| 461 |
-
"examples/female.wav",
|
| 462 |
-
None,
|
| 463 |
-
False,
|
| 464 |
-
False,
|
| 465 |
-
False,
|
| 466 |
-
True,
|
| 467 |
-
],
|
| 468 |
-
[
|
| 469 |
-
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
| 470 |
-
"tr",
|
| 471 |
-
"examples/female.wav",
|
| 472 |
-
None,
|
| 473 |
-
False,
|
| 474 |
-
False,
|
| 475 |
-
False,
|
| 476 |
-
True,
|
| 477 |
-
],
|
| 478 |
-
[
|
| 479 |
-
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
| 480 |
-
"ru",
|
| 481 |
-
"examples/female.wav",
|
| 482 |
-
None,
|
| 483 |
-
False,
|
| 484 |
-
False,
|
| 485 |
-
False,
|
| 486 |
-
True,
|
| 487 |
-
],
|
| 488 |
-
[
|
| 489 |
-
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 490 |
-
"nl",
|
| 491 |
-
"examples/male.wav",
|
| 492 |
-
None,
|
| 493 |
-
False,
|
| 494 |
-
False,
|
| 495 |
-
False,
|
| 496 |
-
True,
|
| 497 |
-
],
|
| 498 |
-
[
|
| 499 |
-
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
| 500 |
-
"cs",
|
| 501 |
-
"examples/female.wav",
|
| 502 |
-
None,
|
| 503 |
-
False,
|
| 504 |
-
False,
|
| 505 |
-
False,
|
| 506 |
-
True,
|
| 507 |
-
],
|
| 508 |
-
[
|
| 509 |
-
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
| 510 |
-
"zh-cn",
|
| 511 |
-
"examples/female.wav",
|
| 512 |
-
None,
|
| 513 |
-
False,
|
| 514 |
-
False,
|
| 515 |
-
False,
|
| 516 |
-
True,
|
| 517 |
-
],
|
| 518 |
-
[
|
| 519 |
-
"かつて 六歳のとき、素晴らしい絵を見ました",
|
| 520 |
-
"ja",
|
| 521 |
-
"examples/female.wav",
|
| 522 |
-
None,
|
| 523 |
-
False,
|
| 524 |
-
True,
|
| 525 |
-
False,
|
| 526 |
-
True,
|
| 527 |
-
],
|
| 528 |
-
[
|
| 529 |
-
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
| 530 |
-
"ko",
|
| 531 |
-
"examples/female.wav",
|
| 532 |
-
None,
|
| 533 |
-
False,
|
| 534 |
-
True,
|
| 535 |
-
False,
|
| 536 |
-
True,
|
| 537 |
-
],
|
| 538 |
-
[
|
| 539 |
-
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 540 |
-
"hu",
|
| 541 |
-
"examples/male.wav",
|
| 542 |
-
None,
|
| 543 |
-
False,
|
| 544 |
-
True,
|
| 545 |
-
False,
|
| 546 |
-
True,
|
| 547 |
-
],
|
| 548 |
-
]
|
| 549 |
-
|
| 550 |
|
| 551 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 552 |
with gr.Row():
|
|
@@ -601,7 +448,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 601 |
label="Reference Audio",
|
| 602 |
info="Click on the ✎ button to upload your own target speaker audio",
|
| 603 |
type="filepath",
|
| 604 |
-
value="
|
| 605 |
)
|
| 606 |
mic_gr = gr.Audio(
|
| 607 |
source="microphone",
|
|
@@ -638,25 +485,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 638 |
out_text_gr = gr.Text(label="Metrics")
|
| 639 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
| 640 |
|
| 641 |
-
with gr.Row():
|
| 642 |
-
gr.Examples(
|
| 643 |
-
examples,
|
| 644 |
-
label="Examples",
|
| 645 |
-
inputs=[
|
| 646 |
-
input_text_gr,
|
| 647 |
-
language_gr,
|
| 648 |
-
ref_gr,
|
| 649 |
-
mic_gr,
|
| 650 |
-
use_mic_gr,
|
| 651 |
-
clean_ref_gr,
|
| 652 |
-
auto_det_lang_gr,
|
| 653 |
-
tos_gr,
|
| 654 |
-
],
|
| 655 |
-
outputs=[video_gr, audio_gr, out_text_gr, ref_audio_gr],
|
| 656 |
-
fn=predict,
|
| 657 |
-
cache_examples=False,
|
| 658 |
-
)
|
| 659 |
-
|
| 660 |
tts_button.click(
|
| 661 |
predict,
|
| 662 |
[
|
|
|
|
| 6 |
import torchaudio
|
| 7 |
|
| 8 |
# download for mecab
|
| 9 |
+
os.system("python -m unidic download")
|
| 10 |
|
| 11 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 12 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
|
|
|
| 394 |
article = """
|
| 395 |
|
| 396 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
with gr.Blocks(analytics_enabled=False) as demo:
|
| 399 |
with gr.Row():
|
|
|
|
| 448 |
label="Reference Audio",
|
| 449 |
info="Click on the ✎ button to upload your own target speaker audio",
|
| 450 |
type="filepath",
|
| 451 |
+
value="model/samples/nu-luu-loat.wav",
|
| 452 |
)
|
| 453 |
mic_gr = gr.Audio(
|
| 454 |
source="microphone",
|
|
|
|
| 485 |
out_text_gr = gr.Text(label="Metrics")
|
| 486 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
| 487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
tts_button.click(
|
| 489 |
predict,
|
| 490 |
[
|