# from unsloth import FastLanguageModel # import torch import os from llama_index.core.storage.storage_context import StorageContext from llama_index.core.indices.loading import load_index_from_storage from openai import OpenAI import gradio as gr """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ # model,tokenizer = FastLanguageModel.from_pretrained('./unified_model') # client = FastLanguageModel.for_inference(model) dir = 'aila_indices' # Initialize OpenAI client client = OpenAI() storage_context = StorageContext.from_defaults(persist_dir=dir) index = load_index_from_storage(storage_context) query_engine = index.as_query_engine(similarity_top_k=8) # def generate_response(instruction,chat_history): # """Generates a response using your fine-tuned model.""" # # FastLanguageModel.for_inference(model) # Enable native 2x faster inference within the function # prompt = f"""### Instruction: # Answer the following question. # ### Question: # {instruction} # Provide a unique, concise, and non-repetitive answer. # ### Answer:""" # inputs = tokenizer(prompt, return_tensors="pt").to("cuda") # with torch.no_grad(): # outputs = model.generate(**inputs,early_stopping=True,min_length=50,length_penalty=2,do_sample=True,max_new_tokens=300, # top_p=0.95, # top_k=50, # temperature=0.7, # repetition_penalty=1.2, # num_return_sequences=1 # ) # response = tokenizer.decode(outputs[0], skip_special_tokens=True) # response = response.split("### Answer:")[-1] # return response def generate_response_rag_index(instruction,chat_history): response = query_engine.query(instruction) return str(response) def update_chat_history(chat_history, user_message, bot_message): """Update chat history to maintain relevance and avoid excessive growth.""" chat_history['user'].append(user_message) chat_history['bot'].append(bot_message) # Keep only the last N interactions if len(chat_history['user']) > 5: chat_history['user'] = chat_history['user'][-5:] chat_history['bot'] = chat_history['bot'][-5:] return chat_history def chatbot(input_text,chat_history): messages = { "user": [], "bot": [], } for user_msg, bot_msg in chat_history: messages["user"].append(user_msg) messages["bot"].append(bot_msg) # bot_response = generate_response(input_text,messages) bot_response = generate_response_rag_index(input_text,messages) chat_history.append(("User: " + input_text, bot_response)) messages = update_chat_history(messages, input_text, bot_response) return "", chat_history with gr.Blocks() as demo: gr.Markdown('## AILA INTERFACE DEMO') with gr.Row(): gr.Image(value="up_2017_logo_en.png", interactive=False, label="Upatras Logo",width=150,height=100) gr.Image(value="aila_new.png", interactive=False, label="AILA project Logo",width=150,height=100) gr.Image(value="banner-horizontal-default-en.png", interactive=False, label="AUTH Logo",width=150,height=100) with gr.Row(): user_input = gr.Textbox( placeholder = "Type your message here...", label = "Your Message", lines = 1 ) submit_button = gr.Button('Submit') chat_history = gr.Chatbot() submit_button.click( chatbot, inputs = [user_input,chat_history], outputs = [user_input, chat_history] ) demo.launch()