Spaces:
Runtime error
Runtime error
| import spaces | |
| import os | |
| import json | |
| from vllm import LLM, SamplingParams | |
| from transformers import AutoTokenizer | |
| def predict(message, history, system_prompt, temperature, max_tokens): | |
| messages = [{"role": "system", "content": system_prompt}] | |
| for human, assistant in history: | |
| messages.append({"role": "user", "content": human}) | |
| messages.append({"role": "assistant", "content": assistant}) | |
| messages.append({"role": "user", "content": message}) | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| stop_tokens = ["<|im_end|>", "<|endoftext|>", "<|im_start|>"] | |
| sampling_params = SamplingParams(temperature=temperature, top_p=1, max_tokens=max_tokens, stop=stop_tokens) | |
| completions = llm.generate(prompt, sampling_params) | |
| for output in completions: | |
| prompt = output.prompt | |
| print('==========================question=============================') | |
| print(prompt) | |
| generated_text = output.outputs[0].text | |
| print('===========================answer=============================') | |
| print(generated_text) | |
| for idx in range(len(generated_text)): | |
| yield generated_text[:idx+1] | |
| if __name__ == "__main__": | |
| path = "stabilityai/stablelm-2-12b-chat" | |
| tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) | |
| llm = LLM(model=path, tensor_parallel_size=1, trust_remote_code=True) | |
| gr.ChatInterface( | |
| predict, | |
| title="LLM playground", | |
| description="This is a LLM playground for StableLM", | |
| theme="soft", | |
| chatbot=gr.Chatbot(height=1400, label="Chat History",), | |
| textbox=gr.Textbox(placeholder="input", container=False, scale=7), | |
| retry_btn=None, | |
| undo_btn="Delete Previous", | |
| clear_btn="Clear", | |
| additional_inputs=[ | |
| gr.Textbox("You are a hepful assistant.", label="System Prompt"), | |
| gr.Slider(0, 1, 0.7, label="Temperature"), | |
| gr.Slider(100, 2048, 1024, label="Max Tokens"), | |
| ], | |
| additional_inputs_accordion_name="Parameters", | |
| examples=[ | |
| ["implement snake game using pygame"], | |
| ["Can you explain briefly to me what is the Python programming language?"], | |
| ["write a program to find the factorial of a number"], | |
| ], | |
| ).queue().launch() |