Spaces:

gospacedev
/

friday

Sleeping

App Files Files Community

friday / app.py

gospacedev

create formatted chat history

1b34aa5 over 1 year ago

raw

history blame

2.49 kB

	import torch
	import spaces
	import numpy as np
	import gradio as gr
	from gtts import gTTS
	from transformers import pipeline
	from huggingface_hub import InferenceClient


	ASR_MODEL_NAME = "openai/whisper-small"
	LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"


	system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant and You respond with one to two sentences. [/INST] Hello there! I'm friday how can I help you?</s>"""

	chat_history = system_prompt + """"""

	formatted_history = """"""

	client = InferenceClient(LLM_MODEL_NAME)

	device = 0 if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=ASR_MODEL_NAME,
	device=device,
	)


	def generate(user_prompt, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)

	chat_history += f""" <s>[INST] {user_prompt} [/INST] """

	output = client.text_generation(
	chat_history, **generate_kwargs, stream=False, details=False, return_full_text=False)

	print(output)
	return output


	@spaces.GPU(duration=60)
	def transcribe(audio):
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	inputs = pipe({"sampling_rate": sr, "raw": y})["text"]

	formatted_history += f"""Human: {inputs}\n"""

	llm_response = generate(inputs)

	chat_history += f""" {llm_response}</s>"""

	formatted_history += f"""Friday: {llm_response}\n"""

	audio_response = gTTS(llm_response)
	audio_response.save("response.mp3")

	print(formatted_history)

	return "response.mp3"


	with gr.Blocks() as demo:
	gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")

	with gr.Row():
	audio_input = gr.Audio(label="Human", sources="microphone")
	output_audio = gr.Audio(label="Friday", type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio")

	transcribe_btn = gr.Button("Transcribe")
	transcribe_btn.click(fn=transcribe, inputs=audio_input,
	outputs=output_audio)


	demo.queue()
	demo.launch()