Spaces:
Runtime error
Runtime error
wenhuchen
commited on
Commit
·
bfc56dc
1
Parent(s):
dfaecf8
update dem
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import spaces
|
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
from PIL import Image
|
|
|
|
| 6 |
from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
|
| 7 |
from typing import List
|
| 8 |
processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
|
|
@@ -54,45 +55,7 @@ def get_chat_images(history):
|
|
| 54 |
if isinstance(message[0], tuple):
|
| 55 |
images.extend(message[0])
|
| 56 |
return images
|
| 57 |
-
|
| 58 |
-
def bot(history):
|
| 59 |
-
print(history)
|
| 60 |
-
cur_messages = {"text": "", "images": []}
|
| 61 |
-
for message in history[::-1]:
|
| 62 |
-
if message[1]:
|
| 63 |
-
break
|
| 64 |
-
if isinstance(message[0], str):
|
| 65 |
-
cur_messages["text"] = message[0] + " " + cur_messages["text"]
|
| 66 |
-
elif isinstance(message[0], tuple):
|
| 67 |
-
cur_messages["images"].extend(message[0])
|
| 68 |
-
cur_messages["text"] = cur_messages["text"].strip()
|
| 69 |
-
cur_messages["images"] = cur_messages["images"][::-1]
|
| 70 |
-
if not cur_messages["text"]:
|
| 71 |
-
raise gr.Error("Please enter a message")
|
| 72 |
-
if cur_messages['text'].count("<image>") < len(cur_messages['images']):
|
| 73 |
-
gr.Warning("The number of images uploaded is more than the number of <image> placeholders in the text. Will automatically prepend <image> to the text.")
|
| 74 |
-
cur_messages['text'] = "<image> "* (len(cur_messages['images']) - cur_messages['text'].count("<image>")) + cur_messages['text']
|
| 75 |
-
history[-1][0] = cur_messages["text"]
|
| 76 |
-
if cur_messages['text'].count("<image>") > len(cur_messages['images']):
|
| 77 |
-
gr.Warning("The number of images uploaded is less than the number of <image> placeholders in the text. Will automatically remove extra <image> placeholders from the text.")
|
| 78 |
-
cur_messages['text'] = cur_messages['text'][::-1].replace("<image>"[::-1], "", cur_messages['text'].count("<image>") - len(cur_messages['images']))[::-1]
|
| 79 |
-
history[-1][0] = cur_messages["text"]
|
| 80 |
-
|
| 81 |
-
chat_history = get_chat_history(history)
|
| 82 |
-
chat_images = get_chat_images(history)
|
| 83 |
-
generation_kwargs = {
|
| 84 |
-
"max_new_tokens": 4096,
|
| 85 |
-
"temperature": 0.7,
|
| 86 |
-
"top_p": 1.0,
|
| 87 |
-
"do_sample": True,
|
| 88 |
-
}
|
| 89 |
-
print(None, chat_images, chat_history, generation_kwargs)
|
| 90 |
-
response = generate(None, chat_images, chat_history, **generation_kwargs)
|
| 91 |
-
|
| 92 |
-
for _output in response:
|
| 93 |
-
history[-1][1] = _output
|
| 94 |
-
time.sleep(0.05)
|
| 95 |
-
yield history
|
| 96 |
|
| 97 |
def build_demo():
|
| 98 |
with gr.Blocks() as demo:
|
|
@@ -118,14 +81,73 @@ def build_demo():
|
|
| 118 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
|
| 119 |
|
| 120 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
|
| 122 |
|
| 123 |
chatbot.like(print_like_dislike, None, None)
|
| 124 |
-
|
| 125 |
with gr.Row():
|
| 126 |
send_button = gr.Button("Send")
|
| 127 |
clear_button = gr.ClearButton([chatbot, chat_input])
|
| 128 |
-
|
| 129 |
send_button.click(
|
| 130 |
add_message, [chatbot, chat_input], [chatbot, chat_input]
|
| 131 |
).then(
|
|
@@ -134,6 +156,10 @@ def build_demo():
|
|
| 134 |
|
| 135 |
gr.Examples(
|
| 136 |
examples=[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
{
|
| 138 |
"text": "<image> <image> <image> Which image shows a different mood of character from the others?",
|
| 139 |
"files": ["./examples/image12.jpg", "./examples/image13.jpg", "./examples/image14.jpg"]
|
|
@@ -142,10 +168,6 @@ def build_demo():
|
|
| 142 |
"text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
|
| 143 |
"files": ["./examples/image1.jpg", "./examples/image2.jpg"]
|
| 144 |
},
|
| 145 |
-
{
|
| 146 |
-
"text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
|
| 147 |
-
"files": ["./examples/image10.jpg", "./examples/image15.jpg"]
|
| 148 |
-
},
|
| 149 |
{
|
| 150 |
"text": "<image> <image> Which image shows an older dog?",
|
| 151 |
"files": ["./examples/image8.jpg", "./examples/image9.jpg"]
|
|
|
|
| 3 |
import os
|
| 4 |
import time
|
| 5 |
from PIL import Image
|
| 6 |
+
import functools
|
| 7 |
from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
|
| 8 |
from typing import List
|
| 9 |
processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
|
|
|
|
| 55 |
if isinstance(message[0], tuple):
|
| 56 |
images.extend(message[0])
|
| 57 |
return images
|
| 58 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
def build_demo():
|
| 61 |
with gr.Blocks() as demo:
|
|
|
|
| 81 |
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
|
| 82 |
|
| 83 |
chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
|
| 84 |
+
|
| 85 |
+
with gr.Accordion(label='Advanced options', open=False):
|
| 86 |
+
temperature = gr.Slider(
|
| 87 |
+
label='Temperature',
|
| 88 |
+
minimum=0.1,
|
| 89 |
+
maximum=2.0,
|
| 90 |
+
step=0.1,
|
| 91 |
+
value=0.2,
|
| 92 |
+
interactive=True
|
| 93 |
+
)
|
| 94 |
+
top_p = gr.Slider(
|
| 95 |
+
label='Top-p',
|
| 96 |
+
minimum=0.05,
|
| 97 |
+
maximum=1.0,
|
| 98 |
+
step=0.05,
|
| 99 |
+
value=1.0,
|
| 100 |
+
interactive=True
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
def bot(history):
|
| 104 |
+
print(history)
|
| 105 |
+
cur_messages = {"text": "", "images": []}
|
| 106 |
+
for message in history[::-1]:
|
| 107 |
+
if message[1]:
|
| 108 |
+
break
|
| 109 |
+
if isinstance(message[0], str):
|
| 110 |
+
cur_messages["text"] = message[0] + " " + cur_messages["text"]
|
| 111 |
+
elif isinstance(message[0], tuple):
|
| 112 |
+
cur_messages["images"].extend(message[0])
|
| 113 |
+
cur_messages["text"] = cur_messages["text"].strip()
|
| 114 |
+
cur_messages["images"] = cur_messages["images"][::-1]
|
| 115 |
+
if not cur_messages["text"]:
|
| 116 |
+
raise gr.Error("Please enter a message")
|
| 117 |
+
if cur_messages['text'].count("<image>") < len(cur_messages['images']):
|
| 118 |
+
gr.Warning("The number of images uploaded is more than the number of <image> placeholders in the text. Will automatically prepend <image> to the text.")
|
| 119 |
+
cur_messages['text'] = "<image> "* (len(cur_messages['images']) - cur_messages['text'].count("<image>")) + cur_messages['text']
|
| 120 |
+
history[-1][0] = cur_messages["text"]
|
| 121 |
+
if cur_messages['text'].count("<image>") > len(cur_messages['images']):
|
| 122 |
+
gr.Warning("The number of images uploaded is less than the number of <image> placeholders in the text. Will automatically remove extra <image> placeholders from the text.")
|
| 123 |
+
cur_messages['text'] = cur_messages['text'][::-1].replace("<image>"[::-1], "", cur_messages['text'].count("<image>") - len(cur_messages['images']))[::-1]
|
| 124 |
+
history[-1][0] = cur_messages["text"]
|
| 125 |
+
|
| 126 |
+
chat_history = get_chat_history(history)
|
| 127 |
+
chat_images = get_chat_images(history)
|
| 128 |
+
generation_kwargs = {
|
| 129 |
+
"max_new_tokens": 4096,
|
| 130 |
+
"temperature": temperature,
|
| 131 |
+
"top_p": top_p,
|
| 132 |
+
"do_sample": True,
|
| 133 |
+
}
|
| 134 |
+
print(None, chat_images, chat_history, generation_kwargs)
|
| 135 |
+
response = generate(None, chat_images, chat_history, **generation_kwargs)
|
| 136 |
+
|
| 137 |
+
for _output in response:
|
| 138 |
+
history[-1][1] = _output
|
| 139 |
+
time.sleep(0.05)
|
| 140 |
+
yield history
|
| 141 |
+
|
| 142 |
+
|
| 143 |
bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
|
| 144 |
|
| 145 |
chatbot.like(print_like_dislike, None, None)
|
| 146 |
+
|
| 147 |
with gr.Row():
|
| 148 |
send_button = gr.Button("Send")
|
| 149 |
clear_button = gr.ClearButton([chatbot, chat_input])
|
| 150 |
+
|
| 151 |
send_button.click(
|
| 152 |
add_message, [chatbot, chat_input], [chatbot, chat_input]
|
| 153 |
).then(
|
|
|
|
| 156 |
|
| 157 |
gr.Examples(
|
| 158 |
examples=[
|
| 159 |
+
{
|
| 160 |
+
"text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
|
| 161 |
+
"files": ["./examples/image10.jpg", "./examples/image15.jpg"]
|
| 162 |
+
},
|
| 163 |
{
|
| 164 |
"text": "<image> <image> <image> Which image shows a different mood of character from the others?",
|
| 165 |
"files": ["./examples/image12.jpg", "./examples/image13.jpg", "./examples/image14.jpg"]
|
|
|
|
| 168 |
"text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
|
| 169 |
"files": ["./examples/image1.jpg", "./examples/image2.jpg"]
|
| 170 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
{
|
| 172 |
"text": "<image> <image> Which image shows an older dog?",
|
| 173 |
"files": ["./examples/image8.jpg", "./examples/image9.jpg"]
|