Spaces:
Running
on
Zero
Running
on
Zero
zRzRzRzRzRzRzR
commited on
Commit
·
a001585
1
Parent(s):
4fa9584
app.py
CHANGED
|
@@ -15,20 +15,23 @@ import time
|
|
| 15 |
MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
|
| 16 |
stop_generation = False
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
class GLM4VModel:
|
| 20 |
def __init__(self):
|
| 21 |
-
|
| 22 |
-
self.model = None
|
| 23 |
-
|
| 24 |
-
def load(self):
|
| 25 |
-
self.processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
| 26 |
-
self.model = Glm4vForConditionalGeneration.from_pretrained(
|
| 27 |
-
MODEL_PATH,
|
| 28 |
-
torch_dtype=torch.bfloat16,
|
| 29 |
-
device_map="auto",
|
| 30 |
-
attn_implementation="sdpa",
|
| 31 |
-
)
|
| 32 |
|
| 33 |
def _strip_html(self, t):
|
| 34 |
return re.sub(r"<[^>]+>", "", t).strip()
|
|
@@ -125,19 +128,19 @@ class GLM4VModel:
|
|
| 125 |
|
| 126 |
@spaces.GPU(duration=240)
|
| 127 |
def stream_generate(self, raw_hist, sys_prompt):
|
| 128 |
-
global stop_generation
|
| 129 |
stop_generation = False
|
| 130 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
| 131 |
-
inputs =
|
| 132 |
msgs,
|
| 133 |
tokenize=True,
|
| 134 |
add_generation_prompt=True,
|
| 135 |
return_dict=True,
|
| 136 |
return_tensors="pt",
|
| 137 |
padding=True,
|
| 138 |
-
).to(
|
| 139 |
|
| 140 |
-
streamer = TextIteratorStreamer(
|
| 141 |
gen_args = dict(
|
| 142 |
inputs,
|
| 143 |
max_new_tokens=8192,
|
|
@@ -149,7 +152,7 @@ class GLM4VModel:
|
|
| 149 |
streamer=streamer,
|
| 150 |
)
|
| 151 |
|
| 152 |
-
generation_thread = threading.Thread(target=
|
| 153 |
generation_thread.start()
|
| 154 |
|
| 155 |
buf = ""
|
|
@@ -190,8 +193,9 @@ def create_display_history(raw_hist):
|
|
| 190 |
return display_hist
|
| 191 |
|
| 192 |
|
|
|
|
|
|
|
| 193 |
glm4v = GLM4VModel()
|
| 194 |
-
glm4v.load()
|
| 195 |
|
| 196 |
|
| 197 |
def check_files(files):
|
|
@@ -310,4 +314,4 @@ with demo:
|
|
| 310 |
clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
|
| 311 |
|
| 312 |
if __name__ == "__main__":
|
| 313 |
-
demo.launch()
|
|
|
|
| 15 |
MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
|
| 16 |
stop_generation = False
|
| 17 |
|
| 18 |
+
processor = None
|
| 19 |
+
model = None
|
| 20 |
+
|
| 21 |
+
def load_model():
|
| 22 |
+
"""加载模型和处理器"""
|
| 23 |
+
global processor, model
|
| 24 |
+
processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
| 25 |
+
model = Glm4vForConditionalGeneration.from_pretrained(
|
| 26 |
+
MODEL_PATH,
|
| 27 |
+
torch_dtype=torch.bfloat16,
|
| 28 |
+
device_map="auto",
|
| 29 |
+
attn_implementation="sdpa",
|
| 30 |
+
)
|
| 31 |
|
| 32 |
class GLM4VModel:
|
| 33 |
def __init__(self):
|
| 34 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
def _strip_html(self, t):
|
| 37 |
return re.sub(r"<[^>]+>", "", t).strip()
|
|
|
|
| 128 |
|
| 129 |
@spaces.GPU(duration=240)
|
| 130 |
def stream_generate(self, raw_hist, sys_prompt):
|
| 131 |
+
global stop_generation, processor, model
|
| 132 |
stop_generation = False
|
| 133 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
| 134 |
+
inputs = processor.apply_chat_template(
|
| 135 |
msgs,
|
| 136 |
tokenize=True,
|
| 137 |
add_generation_prompt=True,
|
| 138 |
return_dict=True,
|
| 139 |
return_tensors="pt",
|
| 140 |
padding=True,
|
| 141 |
+
).to(model.device)
|
| 142 |
|
| 143 |
+
streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
|
| 144 |
gen_args = dict(
|
| 145 |
inputs,
|
| 146 |
max_new_tokens=8192,
|
|
|
|
| 152 |
streamer=streamer,
|
| 153 |
)
|
| 154 |
|
| 155 |
+
generation_thread = threading.Thread(target=model.generate, kwargs=gen_args)
|
| 156 |
generation_thread.start()
|
| 157 |
|
| 158 |
buf = ""
|
|
|
|
| 193 |
return display_hist
|
| 194 |
|
| 195 |
|
| 196 |
+
# 加载模型和处理器
|
| 197 |
+
load_model()
|
| 198 |
glm4v = GLM4VModel()
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
def check_files(files):
|
|
|
|
| 314 |
clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
|
| 315 |
|
| 316 |
if __name__ == "__main__":
|
| 317 |
+
demo.launch()
|