Spaces:

PhysicsWallahAI
/

Aryabhata-Demo

Runtime error

App Files Files Community

pw-ai-research commited on Jul 21

Commit

db1f4a2

verified ·

1 Parent(s): fdedf62

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -8

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# import spaces
 import gradio as gr
 import transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
@@ -6,12 +5,11 @@ from transformers import StopStringCriteria, StoppingCriteriaList
 from datasets import load_dataset, concatenate_datasets
 import torch
-from vllm import LLM, SamplingParams
-llm = LLM(model="PhysicsWallahAI/Aryabhata-1.0")
-sampling_params = SamplingParams(temperature=0.0, max_tokens=4*1024, stop=["<|im_end|>", "<|end|>", "<im_start|>", "⁠```python\n", "⁠<|im_start|>", "]}}]}}]"])
 def process_questions(example):
     example["question_text"] = example["question"]
@@ -27,14 +25,53 @@ dataset = concatenate_datasets([
 examples = dataset.map(process_questions, remove_columns=dataset.column_names)["question_text"]
 def generate_answer_stream(question):
     messages = [
         {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
         {'role': 'user', 'content': question}
     ]
-    results = llm.chat(messages, sampling_params)
-    return results[0].outputs[0].text.strip()
 demo = gr.Interface(
     fn=generate_answer_stream,

 import gradio as gr
 import transformers
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from datasets import load_dataset, concatenate_datasets
 import torch
+import threading
+model_id = "PhysicsWallahAI/Aryabhata-1.0"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
 def process_questions(example):
     example["question_text"] = example["question"]
 examples = dataset.map(process_questions, remove_columns=dataset.column_names)["question_text"]
+# add options
+stop_strings = ["<|im_end|>", "<|end|>", "<im_start|>", "```python\n", "<|im_start|>", "]}}]}}]"]
+def strip_bad_tokens(s, stop_strings):
+    for suffix in stop_strings:
+        if s.endswith(suffix):
+            return s[:-len(suffix)]
+    return s
 def generate_answer_stream(question):
     messages = [
         {'role': 'system', 'content': 'Think step-by-step; put only the final answer inside \\boxed{}.'},
         {'role': 'user', 'content': question}
     ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    inputs = tokenizer([text], return_tensors="pt")
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    stopping = StoppingCriteriaList([StopStringCriteria(tokenizer, stop_strings)])
+    thread = threading.Thread(
+        target=model.generate,
+        kwargs=dict(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=4096,
+            stopping_criteria=stopping,
+        )
+    )
+    thread.start()
+    output = ""
+    for token in streamer:
+        print(token)
+        output += token
+        output = strip_bad_tokens(output, stop_strings)
+        yield output
 demo = gr.Interface(
     fn=generate_answer_stream,