Spaces:
Sleeping
Sleeping
| # encoding = "utf-8" | |
| ''' | |
| This is a mediator: a gradio server for OpenAI APIs | |
| ''' | |
| import os | |
| import json | |
| import argparse | |
| import gradio as gr | |
| import requests | |
| import openai | |
| from openai import OpenAI, Stream,AzureOpenAI | |
| def http_bot(messages, argsbox): | |
| args = json.loads(argsbox) | |
| messages = json.loads(messages) | |
| print(messages) | |
| print(argsbox) | |
| if args["model"]=="gpt-35-turbo": | |
| baseurl="https://hkust.azure-api.net" | |
| client = AzureOpenAI( | |
| api_version="2023-07-01-preview", | |
| azure_endpoint=baseurl, | |
| api_key=args["api_key"], | |
| ) | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=args["model"], | |
| temperature=float(args["temperature"]), | |
| max_tokens=int(args["max_tokens"]) | |
| ) | |
| print(chat_completion) | |
| else: | |
| client = OpenAI(api_key=args["api_key"], base_url = args["base_url"]) | |
| chat_completion = client.chat.completions.create( | |
| messages=messages, | |
| model=args["model"], | |
| temperature=float(args["temperature"]), | |
| max_tokens=int(args["max_tokens"]) | |
| ) | |
| print(chat_completion) | |
| return chat_completion.choices[0].message.content | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# vLLM text completion demo\n") | |
| inputbox = gr.Textbox(label="Input", | |
| placeholder="Dict of infos") | |
| argsbox = gr.Textbox(label="Args", placeholder="a dict of {api_key, base_url, model, temperature, max_tokens,....}") | |
| outputbox = gr.Textbox(label="Output", | |
| placeholder="Generated result from the model") | |
| submit = gr.Button("Submit") | |
| submit.click(http_bot, [inputbox, argsbox], [outputbox], api_name="submit") | |
| demo.launch(share=True) | |