Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,17 +6,24 @@ from gtts import gTTS
|
|
| 6 |
from transformers import pipeline
|
| 7 |
from huggingface_hub import InferenceClient
|
| 8 |
|
|
|
|
| 9 |
ASR_MODEL_NAME = "openai/whisper-small"
|
| 10 |
LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 11 |
|
|
|
|
| 12 |
system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
| 15 |
|
|
|
|
| 16 |
client = InferenceClient(LLM_MODEL_NAME)
|
| 17 |
|
|
|
|
| 18 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 19 |
|
|
|
|
| 20 |
pipe = pipeline(
|
| 21 |
task="automatic-speech-recognition",
|
| 22 |
model=ASR_MODEL_NAME,
|
|
@@ -44,7 +51,9 @@ def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95,
|
|
| 44 |
return output
|
| 45 |
|
| 46 |
@spaces.GPU(duration=60)
|
| 47 |
-
def transcribe(audio
|
|
|
|
|
|
|
| 48 |
sr, y = audio
|
| 49 |
y = y.astype(np.float32)
|
| 50 |
y /= np.max(np.abs(y))
|
|
@@ -53,27 +62,25 @@ def transcribe(audio, instruct_history=instruct_history):
|
|
| 53 |
transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
|
| 54 |
|
| 55 |
# Append user input to history
|
| 56 |
-
formatted_history
|
| 57 |
-
instruct_history += f"
|
| 58 |
|
| 59 |
# Generate LLM response
|
| 60 |
llm_response = generate(instruct_history)
|
| 61 |
|
| 62 |
# Append AI response to history
|
| 63 |
-
instruct_history += f"
|
| 64 |
-
formatted_history += f"
|
| 65 |
|
| 66 |
# Convert AI response to audio
|
| 67 |
audio_response = gTTS(llm_response)
|
| 68 |
audio_response.save("response.mp3")
|
| 69 |
|
| 70 |
-
#
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
return "response.mp3", full_history
|
| 74 |
|
| 75 |
with gr.Blocks() as demo:
|
| 76 |
-
gr.HTML("<center><h1>Friday: AI Virtual Assistant
|
| 77 |
|
| 78 |
with gr.Row():
|
| 79 |
audio_input = gr.Audio(label="Human", sources="microphone")
|
|
|
|
| 6 |
from transformers import pipeline
|
| 7 |
from huggingface_hub import InferenceClient
|
| 8 |
|
| 9 |
+
# Model names
|
| 10 |
ASR_MODEL_NAME = "openai/whisper-small"
|
| 11 |
LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
|
| 12 |
|
| 13 |
+
# Initial system prompt
|
| 14 |
system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
|
| 15 |
|
| 16 |
+
# Global variables for history
|
| 17 |
+
instruct_history = system_prompt
|
| 18 |
+
formatted_history = ""
|
| 19 |
|
| 20 |
+
# Create inference client for text generation
|
| 21 |
client = InferenceClient(LLM_MODEL_NAME)
|
| 22 |
|
| 23 |
+
# Set device for ASR pipeline
|
| 24 |
device = 0 if torch.cuda.is_available() else "cpu"
|
| 25 |
|
| 26 |
+
# ASR pipeline
|
| 27 |
pipe = pipeline(
|
| 28 |
task="automatic-speech-recognition",
|
| 29 |
model=ASR_MODEL_NAME,
|
|
|
|
| 51 |
return output
|
| 52 |
|
| 53 |
@spaces.GPU(duration=60)
|
| 54 |
+
def transcribe(audio):
|
| 55 |
+
global instruct_history, formatted_history
|
| 56 |
+
|
| 57 |
sr, y = audio
|
| 58 |
y = y.astype(np.float32)
|
| 59 |
y /= np.max(np.abs(y))
|
|
|
|
| 62 |
transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
|
| 63 |
|
| 64 |
# Append user input to history
|
| 65 |
+
formatted_history += f"π Human: {transcribed_user_audio}\n\n"
|
| 66 |
+
instruct_history += f"<s>[INST] {transcribed_user_audio} [/INST] "
|
| 67 |
|
| 68 |
# Generate LLM response
|
| 69 |
llm_response = generate(instruct_history)
|
| 70 |
|
| 71 |
# Append AI response to history
|
| 72 |
+
instruct_history += f" {llm_response}</s>"
|
| 73 |
+
formatted_history += f"π€ Friday: {llm_response}\n\n"
|
| 74 |
|
| 75 |
# Convert AI response to audio
|
| 76 |
audio_response = gTTS(llm_response)
|
| 77 |
audio_response.save("response.mp3")
|
| 78 |
|
| 79 |
+
# Return the full conversation history
|
| 80 |
+
return "response.mp3", formatted_history
|
|
|
|
|
|
|
| 81 |
|
| 82 |
with gr.Blocks() as demo:
|
| 83 |
+
gr.HTML("<center><h1>Friday: AI Virtual Assistant π€</h1><center>")
|
| 84 |
|
| 85 |
with gr.Row():
|
| 86 |
audio_input = gr.Audio(label="Human", sources="microphone")
|