GLM-4.1V-9B-Thinking-Demo

Running on Zero

App Files Files Community

zRzRzRzRzRzRzR commited on Jul 1

Commit

548989b

1 Parent(s): e076e33

02

Browse files

Files changed (1) hide show

app.py +36 -75

app.py CHANGED Viewed

@@ -11,11 +11,11 @@ import subprocess
 import tempfile
 import os
 import time
 MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
 stop_generation = False
-# Global model and processor
 processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
 model = Glm4vForConditionalGeneration.from_pretrained(
     MODEL_PATH,
@@ -25,12 +25,12 @@ model = Glm4vForConditionalGeneration.from_pretrained(
 )
-def _strip_html(t):
-    return re.sub(r"<[^>]+>", "", t).strip()
-def _wrap_text(t):
-    return [{"type": "text", "text": t}]
 def _pdf_to_imgs(pdf_path):
@@ -68,31 +68,30 @@ def _files_to_content(media):
                 out.append({"type": "image", "url": p})
         elif ext == ".pdf":
             for p in _pdf_to_imgs(f.name):
-                out.append({"type": "image", "url": p})
     return out
-def _stream_fragment(buf: str) -> str:
     think_html = ""
-    if "<think>" in buf:
         if "</think>" in buf:
             seg = re.search(r"<think>(.*?)</think>", buf, re.DOTALL)
             if seg:
                 think_html = (
-                    "<details open><summary style='cursor:pointer;font-weight:bold;color:#bbbbbb;'>💭 Thinking</summary>"
-                    "<div style='color:#cccccc;line-height:1.4;padding:10px;border-left:3px solid #666;margin:5px 0;background-color:rgba(128,128,128,0.1);'>"
-                    + seg.group(1).strip().replace("\n", "<br>")
-                    + "</div></details>"
                 )
         else:
             part = buf.split("<think>", 1)[1]
             think_html = (
-                "<details open><summary style='cursor:pointer;font-weight:bold;color:#bbbbbb;'>💭 Thinking</summary>"
-                "<div style='color:#cccccc;line-height:1.4;padding:10px;border-left:3px solid #666;margin:5px 0;background-color:rgba(128,128,128,0.1);'>"
-                + part.replace("\n", "<br>")
-                + "</div></details>"
             )
     answer_html = ""
     if "<answer>" in buf:
         if "</answer>" in buf:
@@ -101,7 +100,10 @@ def _stream_fragment(buf: str) -> str:
                 answer_html = seg.group(1).strip()
         else:
             answer_html = buf.split("<answer>", 1)[1]
     if not think_html and not answer_html:
         return _strip_html(buf)
     return think_html + answer_html
@@ -109,10 +111,8 @@ def _stream_fragment(buf: str) -> str:
 def _build_messages(raw_hist, sys_prompt):
     msgs = []
     if sys_prompt.strip():
         msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
     for h in raw_hist:
         if h["role"] == "user":
             msgs.append({"role": "user", "content": h["content"]})
@@ -120,8 +120,7 @@ def _build_messages(raw_hist, sys_prompt):
             raw = h["content"]
             raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL)
             raw = re.sub(r"<details.*?</details>", "", raw, flags=re.DOTALL)
-            clean = _strip_html(raw).strip()
-            msgs.append({"role": "assistant", "content": _wrap_text(clean)})
     return msgs
@@ -138,30 +137,25 @@ def stream_generate(raw_hist, sys_prompt):
         return_tensors="pt",
         padding=True,
     ).to(model.device)
     streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
-    gen_args = dict(
         inputs,
         max_new_tokens=8192,
         repetition_penalty=1.1,
         do_sample=True,
         top_k=2,
-        temperature=None,
-        top_p=1e-5,
         streamer=streamer,
     )
-    generation_thread = threading.Thread(target=model.generate, kwargs=gen_args)
-    generation_thread.start()
     buf = ""
     for tok in streamer:
         if stop_generation:
             break
         buf += tok
         yield _stream_fragment(buf)
-    generation_thread.join()
 def format_display_content(content):
@@ -173,7 +167,6 @@ def format_display_content(content):
                 text_parts.append(item["text"])
             else:
                 file_count += 1
         display_text = " ".join(text_parts)
         if file_count > 0:
             return f"[{file_count} file(s) uploaded]\n{display_text}"
@@ -216,39 +209,32 @@ def check_files(files):
 def chat(files, msg, raw_hist, sys_prompt):
     global stop_generation
     stop_generation = False
     ok, err = check_files(files)
     if not ok:
         raw_hist.append({"role": "assistant", "content": err})
         display_hist = create_display_history(raw_hist)
         yield display_hist, copy.deepcopy(raw_hist), None, ""
         return
     payload = _files_to_content(files) if files else None
     if msg.strip():
         if payload is None:
             payload = _wrap_text(msg.strip())
         else:
             payload.append({"type": "text", "text": msg.strip()})
     user_rec = {"role": "user", "content": payload if payload else msg.strip()}
     if raw_hist is None:
         raw_hist = []
     raw_hist.append(user_rec)
     place = {"role": "assistant", "content": ""}
     raw_hist.append(place)
     display_hist = create_display_history(raw_hist)
     yield display_hist, copy.deepcopy(raw_hist), None, ""
     for chunk in stream_generate(raw_hist[:-1], sys_prompt):
         if stop_generation:
             break
         place["content"] = chunk
         display_hist = create_display_history(raw_hist)
         yield display_hist, copy.deepcopy(raw_hist), None, ""
     display_hist = create_display_history(raw_hist)
     yield display_hist, copy.deepcopy(raw_hist), None, ""
@@ -260,52 +246,27 @@ def reset():
     return [], [], None, ""
-css = """.chatbot-container .message-wrap .message{font-size:14px!important}
-details summary{cursor:pointer;font-weight:bold}
-details[open] summary{margin-bottom:10px}"""
-demo = gr.Blocks(title="GLM-4.1V Chat", theme=gr.themes.Soft(), css=css)
 with demo:
-    gr.Markdown("""
-               <div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
-                   GLM-4.1V-9B-Thinking Gradio Space🤗
-                </div>
-               <div style="text-align: center;">
-               <a href="https://huggingface.co/THUDM/GLM-4.1V-9B-Thinking">🤗 Model Hub</a> |
-               <a href="https://github.com/THUDM/GLM-4.1V-Thinking">🌐 Github</a>
-                </div>
-                """)
     raw_history = gr.State([])
     with gr.Row():
         with gr.Column(scale=7):
-            chatbox = gr.Chatbot(
-                label="Conversation",
-                type="messages",
-                height=600,
-                elem_classes="chatbot-container",
-            )
-            textbox = gr.Textbox(label="💭 Message", lines=3)
             with gr.Row():
                 send = gr.Button("Send", variant="primary")
                 clear = gr.Button("Clear")
         with gr.Column(scale=3):
-            up = gr.File(
-                label="📁 Upload",
-                file_count="multiple",
-                file_types=["file"],
-                type="filepath",
-            )
             gr.Markdown("Supports images / videos / PPT / PDF")
             gr.Markdown(
-                "The maximum supported input is 10 images or 1 video/PPT/PDF. During the conversation, video and images cannot be present at the same time."
-            )
-            sys = gr.Textbox(label="⚙️ System Prompt", lines=6)
     send.click(chat, inputs=[up, textbox, raw_history, sys], outputs=[chatbox, raw_history, up, textbox])
     textbox.submit(chat, inputs=[up, textbox, raw_history, sys], outputs=[chatbox, raw_history, up, textbox])
     clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
 if __name__ == "__main__":
-    demo.launch()

 import tempfile
 import os
 import time
+import html
 MODEL_PATH = "THUDM/GLM-4.1V-9B-Thinking"
 stop_generation = False
 processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
 model = Glm4vForConditionalGeneration.from_pretrained(
     MODEL_PATH,
 )
+def _strip_html(text):
+    return re.sub(r"<[^>]+>", "", text).strip()
+def _wrap_text(text):
+    return [{"type": "text", "text": text}]
 def _pdf_to_imgs(pdf_path):
                 out.append({"type": "image", "url": p})
         elif ext == ".pdf":
             for p in _pdf_to_imgs(f.name):
+                out.append({"type": "image", "image_url": p})
     return out
+def _stream_fragment(buf, skip_think=False):
     think_html = ""
+    if "<think>" in buf and not skip_think:
         if "</think>" in buf:
             seg = re.search(r"<think>(.*?)</think>", buf, re.DOTALL)
             if seg:
+                think_content = seg.group(1).strip().replace("\\n", "\n").replace("\n", "<br>")
                 think_html = (
+                        "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
+                        "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
+                        + think_content + "</div></details>"
                 )
         else:
             part = buf.split("<think>", 1)[1]
+            think_content = part.replace("\\n", "\n").replace("\n", "<br>")
             think_html = (
+                    "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
+                    "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
+                    + think_content + "</div></details>"
             )
     answer_html = ""
     if "<answer>" in buf:
         if "</answer>" in buf:
                 answer_html = seg.group(1).strip()
         else:
             answer_html = buf.split("<answer>", 1)[1]
+    if answer_html:
+        answer_html_raw = answer_html.replace("\\n", "\n")
+        escaped = html.escape(answer_html_raw)
+        answer_html = f"<pre class='code-block'><code class='language-html'>{escaped}</code></pre>"
     if not think_html and not answer_html:
         return _strip_html(buf)
     return think_html + answer_html
 def _build_messages(raw_hist, sys_prompt):
     msgs = []
     if sys_prompt.strip():
         msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
     for h in raw_hist:
         if h["role"] == "user":
             msgs.append({"role": "user", "content": h["content"]})
             raw = h["content"]
             raw = re.sub(r"<think>.*?</think>", "", raw, flags=re.DOTALL)
             raw = re.sub(r"<details.*?</details>", "", raw, flags=re.DOTALL)
+            msgs.append({"role": "assistant", "content": _wrap_text(_strip_html(raw).strip())})
     return msgs
         return_tensors="pt",
         padding=True,
     ).to(model.device)
     streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=False)
+    gen_kwargs = dict(
         inputs,
         max_new_tokens=8192,
         repetition_penalty=1.1,
         do_sample=True,
         top_k=2,
+        temperature=0.01,
         streamer=streamer,
     )
+    thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
+    thread.start()
     buf = ""
     for tok in streamer:
         if stop_generation:
             break
         buf += tok
         yield _stream_fragment(buf)
+    thread.join()
 def format_display_content(content):
                 text_parts.append(item["text"])
             else:
                 file_count += 1
         display_text = " ".join(text_parts)
         if file_count > 0:
             return f"[{file_count} file(s) uploaded]\n{display_text}"
 def chat(files, msg, raw_hist, sys_prompt):
     global stop_generation
     stop_generation = False
     ok, err = check_files(files)
     if not ok:
         raw_hist.append({"role": "assistant", "content": err})
         display_hist = create_display_history(raw_hist)
         yield display_hist, copy.deepcopy(raw_hist), None, ""
         return
     payload = _files_to_content(files) if files else None
     if msg.strip():
         if payload is None:
             payload = _wrap_text(msg.strip())
         else:
             payload.append({"type": "text", "text": msg.strip()})
     user_rec = {"role": "user", "content": payload if payload else msg.strip()}
     if raw_hist is None:
         raw_hist = []
     raw_hist.append(user_rec)
     place = {"role": "assistant", "content": ""}
     raw_hist.append(place)
     display_hist = create_display_history(raw_hist)
     yield display_hist, copy.deepcopy(raw_hist), None, ""
     for chunk in stream_generate(raw_hist[:-1], sys_prompt):
         if stop_generation:
             break
         place["content"] = chunk
         display_hist = create_display_history(raw_hist)
         yield display_hist, copy.deepcopy(raw_hist), None, ""
     display_hist = create_display_history(raw_hist)
     yield display_hist, copy.deepcopy(raw_hist), None, ""
     return [], [], None, ""
+demo = gr.Blocks(title="GLM-4.1V-9B-Thinking", theme=gr.themes.Soft())
 with demo:
+    gr.Markdown(
+        "<div style='text-align:center;font-size:32px;font-weight:bold;margin-bottom:20px;'>GLM-4.1V-9B Thinking</div><div style='text-align:center;'><a href='https://huggingface.co/THUDM/GLM-4.1V-9B-Thinking'>Model Hub</a> | <a href='https://github.com/THUDM/GLM-4.1V-Thinking'>Github</a></div>")
     raw_history = gr.State([])
     with gr.Row():
         with gr.Column(scale=7):
+            chatbox = gr.Chatbot(label="Chat", type="messages", height=600, elem_classes="chatbot-container",
+                                 sanitize_html=False, line_breaks=True)
+            textbox = gr.Textbox(label="Message", lines=3)
             with gr.Row():
                 send = gr.Button("Send", variant="primary")
                 clear = gr.Button("Clear")
         with gr.Column(scale=3):
+            up = gr.File(label="Upload Files", file_count="multiple", file_types=["file"], type="filepath")
             gr.Markdown("Supports images / videos / PPT / PDF")
             gr.Markdown(
+                "The maximum supported input is 10 images or 1 video/PPT/PDF. During the conversation, video and images cannot be present at the same time.")
+            sys = gr.Textbox(label="System Prompt", lines=6)
     send.click(chat, inputs=[up, textbox, raw_history, sys], outputs=[chatbox, raw_history, up, textbox])
     textbox.submit(chat, inputs=[up, textbox, raw_history, sys], outputs=[chatbox, raw_history, up, textbox])
     clear.click(reset, outputs=[chatbox, raw_history, up, textbox])
 if __name__ == "__main__":
+    demo.launch(server_port=8000,server_name="0.0.0.0")