Spaces:
Runtime error
Runtime error
Update duplex.py
Browse files
duplex.py
CHANGED
|
@@ -19,6 +19,7 @@ from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2P
|
|
| 19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
| 20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
| 21 |
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
|
|
|
| 22 |
|
| 23 |
HEADER = """
|
| 24 |
# Poor Man's Duplex
|
|
@@ -34,8 +35,8 @@ FOOTER = """
|
|
| 34 |
""".strip()
|
| 35 |
|
| 36 |
asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
| 37 |
-
model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es)
|
| 38 |
-
processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es)
|
| 39 |
asr_es = pipeline(
|
| 40 |
"automatic-speech-recognition",
|
| 41 |
model=model_instance_es,
|
|
@@ -44,7 +45,7 @@ asr_es = pipeline(
|
|
| 44 |
decoder=processor_es.decoder
|
| 45 |
)
|
| 46 |
tts_model_name = "facebook/tts_transformer-es-css10"
|
| 47 |
-
speak_es = gr.Interface.load(f"huggingface/{tts_model_name}")
|
| 48 |
transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 49 |
def generate_es(text, **kwargs):
|
| 50 |
# max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
|
|
@@ -68,13 +69,13 @@ asr_en = pipeline(
|
|
| 68 |
decoder=processor_en.decoder
|
| 69 |
)
|
| 70 |
tts_model_name = "facebook/fastspeech2-en-ljspeech"
|
| 71 |
-
speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
|
| 72 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 73 |
-
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
| 74 |
|
| 75 |
empty_audio = 'empty.flac'
|
| 76 |
sf.write(empty_audio, [], 16000)
|
| 77 |
-
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
|
| 78 |
|
| 79 |
def generate_en(text, **kwargs):
|
| 80 |
response = generate_iface(text)
|
|
|
|
| 19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
| 20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
| 21 |
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
| 22 |
+
HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
|
| 23 |
|
| 24 |
HEADER = """
|
| 25 |
# Poor Man's Duplex
|
|
|
|
| 35 |
""".strip()
|
| 36 |
|
| 37 |
asr_model_name_es = "jonatasgrosman/wav2vec2-large-xlsr-53-spanish"
|
| 38 |
+
model_instance_es = AutoModelForCTC.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
|
| 39 |
+
processor_es = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model_name_es, use_auth_token=HF_AUTH_TOKEN)
|
| 40 |
asr_es = pipeline(
|
| 41 |
"automatic-speech-recognition",
|
| 42 |
model=model_instance_es,
|
|
|
|
| 45 |
decoder=processor_es.decoder
|
| 46 |
)
|
| 47 |
tts_model_name = "facebook/tts_transformer-es-css10"
|
| 48 |
+
speak_es = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
|
| 49 |
transcribe_es = lambda input_file: asr_es(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 50 |
def generate_es(text, **kwargs):
|
| 51 |
# max_length=100, top_k=100, top_p=50, temperature=0.95, do_sample=True, do_clean=True
|
|
|
|
| 69 |
decoder=processor_en.decoder
|
| 70 |
)
|
| 71 |
tts_model_name = "facebook/fastspeech2-en-ljspeech"
|
| 72 |
+
speak_en = gr.Interface.load(f"huggingface/{tts_model_name}", api_key=HF_AUTH_TOKEN)
|
| 73 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 74 |
+
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B", api_key=HF_AUTH_TOKEN)
|
| 75 |
|
| 76 |
empty_audio = 'empty.flac'
|
| 77 |
sf.write(empty_audio, [], 16000)
|
| 78 |
+
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser", api_key=HF_AUTH_TOKEN)
|
| 79 |
|
| 80 |
def generate_en(text, **kwargs):
|
| 81 |
response = generate_iface(text)
|