Spaces:
Runtime error
Runtime error
Update duplex.py
Browse files
duplex.py
CHANGED
|
@@ -3,8 +3,11 @@ import json
|
|
| 3 |
import random
|
| 4 |
import string
|
| 5 |
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
import requests
|
|
|
|
|
|
|
| 8 |
from transformers import pipeline, set_seed
|
| 9 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 10 |
import logging
|
|
@@ -14,11 +17,14 @@ import gradio as gr
|
|
| 14 |
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
|
| 15 |
|
| 16 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
| 17 |
-
HF_AUTH_TOKEN = os.environ.get("HF_AUTH_TOKEN", None)
|
| 18 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
|
|
|
| 19 |
|
| 20 |
HEADER = """
|
| 21 |
# Poor Man's Duplex
|
|
|
|
|
|
|
|
|
|
| 22 |
""".strip()
|
| 23 |
|
| 24 |
FOOTER = """
|
|
@@ -45,7 +51,8 @@ def generate_es(text, **kwargs):
|
|
| 45 |
api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
|
| 46 |
response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
|
| 47 |
if response.ok:
|
| 48 |
-
|
|
|
|
| 49 |
return response.json()["data"][0]
|
| 50 |
else:
|
| 51 |
return ""
|
|
@@ -65,9 +72,14 @@ speak_en = gr.Interface.load(f"huggingface/{tts_model_name}")
|
|
| 65 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 66 |
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
def generate_en(text, **kwargs):
|
| 69 |
response = generate_iface(text)
|
| 70 |
-
|
|
|
|
| 71 |
return response or ""
|
| 72 |
|
| 73 |
|
|
@@ -97,11 +109,28 @@ def select_lang_vars(lang):
|
|
| 97 |
return AGENT, USER, CONTEXT
|
| 98 |
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
|
|
|
|
|
|
| 102 |
generate, transcribe, speak = select_lang(lang)
|
| 103 |
AGENT, USER, _ = select_lang_vars(lang)
|
| 104 |
-
user_message = transcribe(audio_in)
|
| 105 |
# agent = AGENT
|
| 106 |
# user = USER
|
| 107 |
generation_kwargs = {
|
|
@@ -156,17 +185,17 @@ def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
|
| 156 |
if not response.strip():
|
| 157 |
response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
|
| 158 |
history.append((user_message, response))
|
| 159 |
-
return history, history, speak(response)
|
| 160 |
|
| 161 |
|
| 162 |
with gr.Blocks() as demo:
|
| 163 |
gr.Markdown(HEADER)
|
| 164 |
-
lang = gr.Radio(label="Language", choices=["English", "Spanish"],
|
| 165 |
-
AGENT, USER, CONTEXT = select_lang_vars(
|
| 166 |
context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
|
| 167 |
with gr.Row():
|
| 168 |
audio_in = gr.Audio(label="User", source="microphone", type="filepath")
|
| 169 |
-
audio_out = gr.Audio(label="Agent", interactive=False)
|
| 170 |
# chat_btn = gr.Button("Submit")
|
| 171 |
with gr.Row():
|
| 172 |
user = gr.Textbox(label="User", value=USER)
|
|
@@ -175,7 +204,8 @@ with gr.Blocks() as demo:
|
|
| 175 |
history = gr.Variable(value=[])
|
| 176 |
chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
|
| 177 |
# chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
|
| 178 |
-
|
|
|
|
| 179 |
gr.Markdown(FOOTER)
|
| 180 |
|
| 181 |
demo.launch()
|
|
|
|
| 3 |
import random
|
| 4 |
import string
|
| 5 |
|
| 6 |
+
import numpy as np
|
| 7 |
import gradio as gr
|
| 8 |
import requests
|
| 9 |
+
import soundfile as sf
|
| 10 |
+
|
| 11 |
from transformers import pipeline, set_seed
|
| 12 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 13 |
import logging
|
|
|
|
| 17 |
from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM
|
| 18 |
|
| 19 |
DEBUG = os.environ.get("DEBUG", "false")[0] in "ty1"
|
|
|
|
| 20 |
MAX_LENGTH = int(os.environ.get("MAX_LENGTH", 1024))
|
| 21 |
+
DEFAULT_LANG = os.environ.get("DEFAULT_LANG", "English")
|
| 22 |
|
| 23 |
HEADER = """
|
| 24 |
# Poor Man's Duplex
|
| 25 |
+
|
| 26 |
+
Talk to a language model like you talk on a Walkie-Talkie! Well, with larger latencies.
|
| 27 |
+
The models are [EleutherAI's GPT-J-6B](https://huggingface.co/EleutherAI/gpt-j-6B) for English, and [BERTIN GPT-J-6B](https://huggingface.co/bertin-project/bertin-gpt-j-6B) for Spanish.
|
| 28 |
""".strip()
|
| 29 |
|
| 30 |
FOOTER = """
|
|
|
|
| 51 |
api_uri = "https://hf.space/embed/bertin-project/bertin-gpt-j-6B/+/api/predict/"
|
| 52 |
response = requests.post(api_uri, data=json.dumps({"data": [text, 100, 100, 50, 0.95, True, True]}))
|
| 53 |
if response.ok:
|
| 54 |
+
if DEBUG:
|
| 55 |
+
print(response.json())
|
| 56 |
return response.json()["data"][0]
|
| 57 |
else:
|
| 58 |
return ""
|
|
|
|
| 72 |
transcribe_en = lambda input_file: asr_en(input_file, chunk_length_s=5, stride_length_s=1)["text"]
|
| 73 |
generate_iface = gr.Interface.load("huggingface/EleutherAI/gpt-j-6B")
|
| 74 |
|
| 75 |
+
empty_audio = 'empty.flac'
|
| 76 |
+
sf.write(empty_audio, [], 16000)
|
| 77 |
+
deuncase = gr.Interface.load("huggingface/pere/DeUnCaser")
|
| 78 |
+
|
| 79 |
def generate_en(text, **kwargs):
|
| 80 |
response = generate_iface(text)
|
| 81 |
+
if DEBUG:
|
| 82 |
+
print(response)
|
| 83 |
return response or ""
|
| 84 |
|
| 85 |
|
|
|
|
| 109 |
return AGENT, USER, CONTEXT
|
| 110 |
|
| 111 |
|
| 112 |
+
def format_chat(history):
|
| 113 |
+
interventions = []
|
| 114 |
+
for user, bot in history:
|
| 115 |
+
interventions.append(f"""
|
| 116 |
+
<div data-testid="user" style="background-color:#16a34a" class="px-3 py-2 rounded-[22px] rounded-bl-none place-self-start text-white ml-7 text-sm">{user}</div>
|
| 117 |
+
<div data-testid="bot" style="background-color:gray" class="px-3 py-2 rounded-[22px] rounded-br-none text-white ml-7 text-sm">{bot}</div>
|
| 118 |
+
""")
|
| 119 |
+
return f"""<details><summary>Conversation log</summary>
|
| 120 |
+
<div class="overflow-y-auto h-[40vh]">
|
| 121 |
+
<div class="flex flex-col items-end space-y-4 p-3">
|
| 122 |
+
{"".join(interventions)}
|
| 123 |
+
</div>
|
| 124 |
+
</div>
|
| 125 |
+
</summary>"""
|
| 126 |
+
|
| 127 |
|
| 128 |
def chat_with_gpt(lang, agent, user, context, audio_in, history):
|
| 129 |
+
if not audio_in:
|
| 130 |
+
return history, history, empty_audio, format_chat(history)
|
| 131 |
generate, transcribe, speak = select_lang(lang)
|
| 132 |
AGENT, USER, _ = select_lang_vars(lang)
|
| 133 |
+
user_message = deuncase(transcribe(audio_in))
|
| 134 |
# agent = AGENT
|
| 135 |
# user = USER
|
| 136 |
generation_kwargs = {
|
|
|
|
| 185 |
if not response.strip():
|
| 186 |
response = "Lo siento, no puedo hablar ahora" if lang.lower() == "Spanish" else "Sorry, can't talk right now"
|
| 187 |
history.append((user_message, response))
|
| 188 |
+
return history, history, speak(response), format_chat(history)
|
| 189 |
|
| 190 |
|
| 191 |
with gr.Blocks() as demo:
|
| 192 |
gr.Markdown(HEADER)
|
| 193 |
+
lang = gr.Radio(label="Language", choices=["English", "Spanish"], value=DEFAULT_LANG, type="value")
|
| 194 |
+
AGENT, USER, CONTEXT = select_lang_vars(DEFAULT_LANG)
|
| 195 |
context = gr.Textbox(label="Context", lines=5, value=CONTEXT)
|
| 196 |
with gr.Row():
|
| 197 |
audio_in = gr.Audio(label="User", source="microphone", type="filepath")
|
| 198 |
+
audio_out = gr.Audio(label="Agent", interactive=False, value=empty_audio)
|
| 199 |
# chat_btn = gr.Button("Submit")
|
| 200 |
with gr.Row():
|
| 201 |
user = gr.Textbox(label="User", value=USER)
|
|
|
|
| 204 |
history = gr.Variable(value=[])
|
| 205 |
chatbot = gr.Variable() # gr.Chatbot(color_map=("green", "gray"), visible=False)
|
| 206 |
# chat_btn.click(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out])
|
| 207 |
+
log = gr.HTML()
|
| 208 |
+
audio_in.change(chat_with_gpt, inputs=[lang, agent, user, context, audio_in, history], outputs=[chatbot, history, audio_out, log])
|
| 209 |
gr.Markdown(FOOTER)
|
| 210 |
|
| 211 |
demo.launch()
|