stablelm-2-chat

Runtime error

App Files Files Community

pvduy commited on Apr 4, 2024

Commit

5081c38

1 Parent(s): d6662ca

init stablelm 2 chat

Browse files

Files changed (1) hide show

app.py +11 -10

app.py CHANGED Viewed

@@ -23,11 +23,12 @@ def parse_args():
 @spaces.GPU()
 def predict(message, history, system_prompt, temperature, max_tokens):
     global model, tokenizer, device
-    instruction = "<|im_start|>system\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n<|im_end|>\n"
     for human, assistant in history:
-        instruction += '<|im_start|>user\n' + human + '\n<|im_end|>\n<|im_start|>assistant\n' + assistant
-    instruction += '\n<|im_start|>user\n' + message + '\n<|im_end|>\n<|im_start|>assistant\n'
-    problem = [instruction]
     stop_tokens = ["<|endoftext|>", "<|im_end|>"]
     streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
     enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
@@ -61,14 +62,14 @@ def predict(message, history, system_prompt, temperature, max_tokens):
 if __name__ == "__main__":
     args = parse_args()
-    tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
-    model = AutoModelForCausalLM.from_pretrained("stabilityai/stable-code-instruct-3b", torch_dtype=torch.bfloat16)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = model.to(device)
     gr.ChatInterface(
         predict,
-        title="Stable Code Instruct Chat - Demo",
-        description="Chat Model Stable Code 3B",
         theme="soft",
         chatbot=gr.Chatbot(label="Chat History",),
         textbox=gr.Textbox(placeholder="input", container=False, scale=7),
@@ -76,8 +77,8 @@ if __name__ == "__main__":
         undo_btn="Delete Previous",
         clear_btn="Clear",
         additional_inputs=[
-            gr.Textbox("A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.", label="System Prompt"),
-            gr.Slider(0, 1, 0.9, label="Temperature"),
             gr.Slider(100, 2048, 1024, label="Max Tokens"),
         ],
         additional_inputs_accordion_name="Parameters",

 @spaces.GPU()
 def predict(message, history, system_prompt, temperature, max_tokens):
     global model, tokenizer, device
+    messages = [{'role': 'system', 'content': system_prompt}]
     for human, assistant in history:
+        messages.append({'role': 'user', 'content': human})
+        messages.append({'role': 'assistant', 'content': assistant})
+    messages.append({'role': 'user', 'content': message})
+    problem = [tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)]
     stop_tokens = ["<|endoftext|>", "<|im_end|>"]
     streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
     enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
 if __name__ == "__main__":
     args = parse_args()
+    tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-2-chat", trust_remote_code=True, torch_dtype=torch.bfloat16)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = model.to(device)
     gr.ChatInterface(
         predict,
+        title="StableLM 2 Chat - Demo",
+        description="StableLM 2 Chat - StabilityAI",
         theme="soft",
         chatbot=gr.Chatbot(label="Chat History",),
         textbox=gr.Textbox(placeholder="input", container=False, scale=7),
         undo_btn="Delete Previous",
         clear_btn="Clear",
         additional_inputs=[
+            gr.Textbox("You are a helpful assistant.", label="System Prompt"),
+            gr.Slider(0, 1, 0.5, label="Temperature"),
             gr.Slider(100, 2048, 1024, label="Max Tokens"),
         ],
         additional_inputs_accordion_name="Parameters",