| import argparse | |
| import gradio as gr | |
| from openai import OpenAI | |
| import os | |
| # Argument parser setup | |
| #parser = argparse.ArgumentParser( | |
| #description='Chatbot Interface with Customizable Parameters') | |
| #parser.add_argument('--model-url', | |
| #type=str, | |
| #default='http://134.28.190.100:8000/v1', | |
| #help='Model URL') | |
| #parser.add_argument('-m', | |
| #'--model', | |
| #type=str, | |
| #required=True, | |
| #default='TheBloke/Mistral-7B-Instruct-v0.2-AWQ', | |
| #help='Model name for the chatbot') | |
| #parser.add_argument('--temp', | |
| #type=float, | |
| #default=0.8, | |
| #help='Temperature for text generation') | |
| ##parser.add_argument('--stop-token-ids', | |
| #type=str, | |
| #default='', | |
| #help='Comma-separated stop token IDs') | |
| #parser.add_argument("--host", type=str, default=None) | |
| #parser.add_argument("--port", type=int, default=8001) | |
| # Parse the arguments | |
| #args = parser.parse_args() | |
| model_url = os.getenv('MODEL_URL', 'http://localhost:8000/v1') | |
| model_name = os.getenv('MODEL_NAME', 'default-model-name') # Make sure to set this in the environment | |
| temperature = float(os.getenv('TEMPERATURE', 0.8)) | |
| stop_token_ids = os.getenv('STOP_TOKEN_IDS', '') | |
| #host = os.getenv('HOST','0.0.0.0') | |
| #port_str = os.getenv('PORT', '8001') | |
| #try: | |
| #port = int(port_str) | |
| #except ValueError: | |
| #port = 8001 | |
| #port = int(os.getenv('PORT', 8001)) | |
| # Set OpenAI's API key and API base to use vLLM's API server. | |
| openai_api_key = "EMPTY" | |
| openai_api_base = model_url | |
| # Create an OpenAI client to interact with the API server | |
| client = OpenAI( | |
| api_key=openai_api_key, | |
| base_url=openai_api_base, | |
| ) | |
| # def add_document(): | |
| def predict(message, history): | |
| # Convert chat history to OpenAI format | |
| history_openai_format = []#[{ | |
| #"role": "system", | |
| #"content": "You are a great ai assistant." | |
| #}] | |
| for human, assistant in history: | |
| history_openai_format.append({"role": "user", "content": human}) | |
| history_openai_format.append({ | |
| "role": "assistant", | |
| "content": assistant | |
| }) | |
| history_openai_format.append({"role": "user", "content": message}) | |
| # Create a chat completion request and send it to the API server | |
| stream = client.chat.completions.create( | |
| model=args.model, # Model name to use | |
| messages=history_openai_format, # Chat history | |
| temperature=args.temp, # Temperature for text generation | |
| stream=True, # Stream response | |
| extra_body={ | |
| 'repetition_penalty': | |
| 1, | |
| 'stop_token_ids': [ | |
| int(id.strip()) for id in args.stop_token_ids.split(',') | |
| if id.strip() | |
| ] if args.stop_token_ids else [] | |
| }) | |
| # Read and return generated text from response stream | |
| partial_message = "" | |
| for chunk in stream: | |
| partial_message += (chunk.choices[0].delta.content or "") | |
| yield partial_message | |
| with gr.Blocks(title="MethodAI 0.15", theme="Soft") as demo: | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.UploadButton("Click to upload PDFs",file_types=[".pdf"]) | |
| with gr.Column(scale=4): | |
| # Create and launch a chat interface with Gradio | |
| gr.ChatInterface(predict).queue() | |
| # with demo: | |
| # btn.upload(render_file, inputs=[btn], outputs=[show_img]) | |
| demo.launch(share=True) | |