Spaces:

Tonic
/

Native_1-bit_LLM

Running

Tonic commited on Apr 17

Commit

66a9100

verified ·

1 Parent(s): 10d56fb

add history add trust remote code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,13 +4,28 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 def load_model():
     model_id = "microsoft/bitnet-b1.58-2B-4T"
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        torch_dtype=torch.bfloat16
     )
     return model, tokenizer
 def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
     model, tokenizer = load_model()
@@ -38,6 +53,10 @@ def generate_response(user_input, system_prompt, max_new_tokens, temperature, to
     # Update history
     history.append({"role": "user", "content": user_input})
     history.append({"role": "assistant", "content": response})
     return history, history
 # Gradio interface

 def load_model():
     model_id = "microsoft/bitnet-b1.58-2B-4T"
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        torch_dtype=torch.bfloat16,
+        trust_remote_code=True
     )
     return model, tokenizer
+def manage_history(history):
+    # Limit to 3 turns (each turn is user + assistant = 2 messages)
+    max_messages = 6  # 3 turns * 2 messages per turn
+    if len(history) > max_messages:
+        history = history[-max_messages:]
+    # Limit total character count to 300
+    total_chars = sum(len(msg["content"]) for msg in history)
+    while total_chars > 300 and history:
+        history.pop(0)  # Remove oldest message
+        total_chars = sum(len(msg["content"]) for msg in history)
+    return history
 def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
     model, tokenizer = load_model()
     # Update history
     history.append({"role": "user", "content": user_input})
     history.append({"role": "assistant", "content": response})
+    # Manage history limits
+    history = manage_history(history)
     return history, history
 # Gradio interface