Spaces:
Running
Running
add history add trust remote code
Browse files
app.py
CHANGED
|
@@ -4,13 +4,28 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 4 |
|
| 5 |
def load_model():
|
| 6 |
model_id = "microsoft/bitnet-b1.58-2B-4T"
|
| 7 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 8 |
model = AutoModelForCausalLM.from_pretrained(
|
| 9 |
model_id,
|
| 10 |
-
torch_dtype=torch.bfloat16
|
|
|
|
| 11 |
)
|
| 12 |
return model, tokenizer
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
|
| 15 |
model, tokenizer = load_model()
|
| 16 |
|
|
@@ -38,6 +53,10 @@ def generate_response(user_input, system_prompt, max_new_tokens, temperature, to
|
|
| 38 |
# Update history
|
| 39 |
history.append({"role": "user", "content": user_input})
|
| 40 |
history.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
return history, history
|
| 42 |
|
| 43 |
# Gradio interface
|
|
|
|
| 4 |
|
| 5 |
def load_model():
|
| 6 |
model_id = "microsoft/bitnet-b1.58-2B-4T"
|
| 7 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 8 |
model = AutoModelForCausalLM.from_pretrained(
|
| 9 |
model_id,
|
| 10 |
+
torch_dtype=torch.bfloat16,
|
| 11 |
+
trust_remote_code=True
|
| 12 |
)
|
| 13 |
return model, tokenizer
|
| 14 |
|
| 15 |
+
def manage_history(history):
|
| 16 |
+
# Limit to 3 turns (each turn is user + assistant = 2 messages)
|
| 17 |
+
max_messages = 6 # 3 turns * 2 messages per turn
|
| 18 |
+
if len(history) > max_messages:
|
| 19 |
+
history = history[-max_messages:]
|
| 20 |
+
|
| 21 |
+
# Limit total character count to 300
|
| 22 |
+
total_chars = sum(len(msg["content"]) for msg in history)
|
| 23 |
+
while total_chars > 300 and history:
|
| 24 |
+
history.pop(0) # Remove oldest message
|
| 25 |
+
total_chars = sum(len(msg["content"]) for msg in history)
|
| 26 |
+
|
| 27 |
+
return history
|
| 28 |
+
|
| 29 |
def generate_response(user_input, system_prompt, max_new_tokens, temperature, top_p, top_k, history):
|
| 30 |
model, tokenizer = load_model()
|
| 31 |
|
|
|
|
| 53 |
# Update history
|
| 54 |
history.append({"role": "user", "content": user_input})
|
| 55 |
history.append({"role": "assistant", "content": response})
|
| 56 |
+
|
| 57 |
+
# Manage history limits
|
| 58 |
+
history = manage_history(history)
|
| 59 |
+
|
| 60 |
return history, history
|
| 61 |
|
| 62 |
# Gradio interface
|