cafe3310 commited on
Commit
aa73b52
·
1 Parent(s): 9602bb7

feat: Refactor comp.py and add gitignore

Browse files

Refactor comp.py to:
- Add trust_remote_code=True to AutoTokenizer.
- Change system prompt to Chinese.
- Add comments for better readability.

Add .gitignore to ignore temporary directories.

Files changed (2) hide show
  1. .gitignore +0 -0
  2. comp.py +12 -7
.gitignore ADDED
File without changes
comp.py CHANGED
@@ -1,10 +1,8 @@
1
- import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  from threading import Thread
4
  import spaces
5
 
6
- # --- 模型加载 ---
7
- # 使用 "auto" 模式加载模型和分词器,Hugging Face Accelerate 会自动处理设备和精度
8
  MODEL_NAME = "inclusionAI/Ring-mini-2.0"
9
 
10
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
@@ -17,20 +15,25 @@ model = AutoModelForCausalLM.from_pretrained(
17
 
18
  @spaces.GPU(duration=120)
19
  def generate_response(message, history):
20
- # Convert history to messages format
 
 
 
21
  messages = [
22
- {"role": "system", "content": "You are Ring, an assistant created by inclusionAI"}
23
  ]
24
 
25
  # Add conversation history
 
26
  for human, assistant in history:
27
  messages.append({"role": "user", "content": human})
28
  messages.append({"role": "assistant", "content": assistant})
29
 
30
- # Add current message
31
  messages.append({"role": "user", "content": message})
32
 
33
  # Apply chat template
 
34
  text = tokenizer.apply_chat_template(
35
  messages,
36
  tokenize=False,
@@ -51,13 +54,15 @@ def generate_response(message, history):
51
  streamer=streamer,
52
  )
53
 
 
54
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
55
  thread.start()
56
 
57
- # Stream the response
58
  response = ""
59
  for new_text in streamer:
60
  response += new_text
61
  yield response
62
 
 
63
  thread.join()
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
2
  from threading import Thread
3
  import spaces
4
 
5
+ # Model and tokenizer initialization
 
6
  MODEL_NAME = "inclusionAI/Ring-mini-2.0"
7
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
15
 
16
  @spaces.GPU(duration=120)
17
  def generate_response(message, history):
18
+ # (msg, history) -> str: stream response (yielding partial responses)
19
+
20
+ # To construct the 'chat', we start with system prompt
21
+ # then append user and assistant messages from history
22
  messages = [
23
+ {"role": "system", "content": "你是 Ring,蚂蚁集团开发的智能助手,致力于为用户提供有用的信息和帮助,用中文回答用户的问题。"}
24
  ]
25
 
26
  # Add conversation history
27
+ # history is a list of (human, assistant) tuples
28
  for human, assistant in history:
29
  messages.append({"role": "user", "content": human})
30
  messages.append({"role": "assistant", "content": assistant})
31
 
32
+ # Add current message from user
33
  messages.append({"role": "user", "content": message})
34
 
35
  # Apply chat template
36
+ # Doc: https://github.com/huggingface/transformers/blob/main/src/transformers/tokenization_utils_base.py#L1510
37
  text = tokenizer.apply_chat_template(
38
  messages,
39
  tokenize=False,
 
54
  streamer=streamer,
55
  )
56
 
57
+ # Start generation in a separate thread to enable streaming
58
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
59
  thread.start()
60
 
61
+ # ... and yield the generated tokens as they are produced
62
  response = ""
63
  for new_text in streamer:
64
  response += new_text
65
  yield response
66
 
67
+ # wait for the generation thread to finish
68
  thread.join()