Spaces:

jackbond2024
/

glm4

Runtime error

App Files Files Community

glm4 / trans_web_vision_demo.py

jackbond2024

Upload folder using huggingface_hub

03f3fb4 verified over 1 year ago

raw

history blame contribute delete

3.71 kB

	"""
	This script creates a Gradio demo with a Transformers backend for the glm-4v-9b model, allowing users to interact with the model through a Gradio web UI.

	Usage:
	- Run the script to start the Gradio server.
	- Interact with the model via the web UI.

	Requirements:
	- Gradio package
	- Type `pip install gradio` to install Gradio.
	"""

	import os
	import torch
	import gradio as gr
	from threading import Thread
	from transformers import (
	AutoTokenizer,
	StoppingCriteria,
	StoppingCriteriaList,
	TextIteratorStreamer, AutoModel, BitsAndBytesConfig
	)
	from PIL import Image
	import requests
	from io import BytesIO

	MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/glm-4v-9b')

	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_PATH,
	trust_remote_code=True,
	encode_special_tokens=True
	)
	model = AutoModel.from_pretrained(
	MODEL_PATH,
	trust_remote_code=True,
	device_map="auto",
	torch_dtype=torch.bfloat16
	).eval()

	class StopOnTokens(StoppingCriteria):
	def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
	stop_ids = model.config.eos_token_id
	for stop_id in stop_ids:
	if input_ids[0][-1] == stop_id:
	return True
	return False

	def get_image(image_path=None, image_url=None):
	if image_path:
	return Image.open(image_path).convert("RGB")
	elif image_url:
	response = requests.get(image_url)
	return Image.open(BytesIO(response.content)).convert("RGB")
	return None

	def chatbot(image_path=None, image_url=None, assistant_prompt=""):
	image = get_image(image_path, image_url)

	messages = [
	{"role": "assistant", "content": assistant_prompt},
	{"role": "user", "content": "", "image": image}
	]

	model_inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_tensors="pt",
	return_dict=True
	).to(next(model.parameters()).device)

	streamer = TextIteratorStreamer(
	tokenizer=tokenizer,
	timeout=60,
	skip_prompt=True,
	skip_special_tokens=True
	)

	generate_kwargs = {
	**model_inputs,
	"streamer": streamer,
	"max_new_tokens": 1024,
	"do_sample": True,
	"top_p": 0.8,
	"temperature": 0.6,
	"stopping_criteria": StoppingCriteriaList([StopOnTokens()]),
	"repetition_penalty": 1.2,
	"eos_token_id": [151329, 151336, 151338],
	}

	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	response = ""
	for new_token in streamer:
	if new_token:
	response += new_token

	return image, response.strip()

	with gr.Blocks() as demo:
	demo.title = "GLM-4V-9B Image Recognition Demo"
	demo.description = """
	This demo uses the GLM-4V-9B model to got image infomation.
	"""
	with gr.Row():
	with gr.Column():
	image_path_input = gr.File(label="Upload Image (High-Priority)", type="filepath")
	image_url_input = gr.Textbox(label="Image URL (Low-Priority)")
	assistant_prompt_input = gr.Textbox(label="Assistant Prompt (You Can Change It)", value="这是什么？")
	submit_button = gr.Button("Submit")
	with gr.Column():
	chatbot_output = gr.Textbox(label="GLM-4V-9B Model Response")
	image_output = gr.Image(label="Image Preview")

	submit_button.click(chatbot,
	inputs=[image_path_input, image_url_input, assistant_prompt_input],
	outputs=[image_output, chatbot_output])

	demo.launch(server_name="127.0.0.1", server_port=8911, inbrowser=True, share=False)