VideoScore

Runtime error

App Files Files Community

wenhuchen commited on Apr 15, 2024

Commit

bfc56dc

1 Parent(s): dfaecf8

update dem

Browse files

Files changed (1) hide show

app.py +67 -45

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import spaces
 import os
 import time
 from PIL import Image
 from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
 from typing import List
 processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
@@ -54,45 +55,7 @@ def get_chat_images(history):
         if isinstance(message[0], tuple):
             images.extend(message[0])
     return images
-def bot(history):
-    print(history)
-    cur_messages = {"text": "", "images": []}
-    for message in history[::-1]:
-        if message[1]:
-            break
-        if isinstance(message[0], str):
-            cur_messages["text"] = message[0] + " " + cur_messages["text"]
-        elif isinstance(message[0], tuple):
-            cur_messages["images"].extend(message[0])
-    cur_messages["text"] = cur_messages["text"].strip()
-    cur_messages["images"] = cur_messages["images"][::-1]
-    if not cur_messages["text"]:
-        raise gr.Error("Please enter a message")
-    if cur_messages['text'].count("<image>") < len(cur_messages['images']):
-        gr.Warning("The number of images uploaded is more than the number of <image> placeholders in the text. Will automatically prepend <image> to the text.")
-        cur_messages['text'] = "<image> "* (len(cur_messages['images']) - cur_messages['text'].count("<image>")) + cur_messages['text']
-        history[-1][0] = cur_messages["text"]
-    if cur_messages['text'].count("<image>") > len(cur_messages['images']):
-        gr.Warning("The number of images uploaded is less than the number of <image> placeholders in the text. Will automatically remove extra <image> placeholders from the text.")
-        cur_messages['text'] = cur_messages['text'][::-1].replace("<image>"[::-1], "", cur_messages['text'].count("<image>") - len(cur_messages['images']))[::-1]
-        history[-1][0] = cur_messages["text"]
-    chat_history = get_chat_history(history)
-    chat_images = get_chat_images(history)
-    generation_kwargs = {
-        "max_new_tokens": 4096,
-        "temperature": 0.7,
-        "top_p": 1.0,
-        "do_sample": True,
-    }
-    print(None, chat_images, chat_history, generation_kwargs)
-    response = generate(None, chat_images, chat_history, **generation_kwargs)
-    for _output in response:
-        history[-1][1] = _output
-        time.sleep(0.05)
-        yield history
 def build_demo():
     with gr.Blocks() as demo:
@@ -118,14 +81,73 @@ def build_demo():
         chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
         chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
         bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
         chatbot.like(print_like_dislike, None, None)
         with gr.Row():
             send_button = gr.Button("Send")
             clear_button = gr.ClearButton([chatbot, chat_input])
         send_button.click(
             add_message, [chatbot, chat_input], [chatbot, chat_input]
         ).then(
@@ -134,6 +156,10 @@ def build_demo():
         gr.Examples(
             examples=[
                 {
                     "text": "<image> <image> <image> Which image shows a different mood of character from the others?",
                     "files": ["./examples/image12.jpg", "./examples/image13.jpg", "./examples/image14.jpg"]
@@ -142,10 +168,6 @@ def build_demo():
                     "text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
                     "files": ["./examples/image1.jpg", "./examples/image2.jpg"]
                 },
-                {
-                    "text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
-                    "files": ["./examples/image10.jpg", "./examples/image15.jpg"]
-                },
                 {
                     "text": "<image> <image> Which image shows an older dog?",
                     "files": ["./examples/image8.jpg", "./examples/image9.jpg"]

 import os
 import time
 from PIL import Image
+import functools
 from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
 from typing import List
 processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-llava-7b-v1.1")
         if isinstance(message[0], tuple):
             images.extend(message[0])
     return images
 def build_demo():
     with gr.Blocks() as demo:
         chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
         chat_msg = chat_input.submit(add_message, [chatbot, chat_input], [chatbot, chat_input])
+        with gr.Accordion(label='Advanced options', open=False):
+            temperature = gr.Slider(
+                label='Temperature',
+                minimum=0.1,
+                maximum=2.0,
+                step=0.1,
+                value=0.2,
+                interactive=True
+            )
+            top_p = gr.Slider(
+                label='Top-p',
+                minimum=0.05,
+                maximum=1.0,
+                step=0.05,
+                value=1.0,
+                interactive=True
+            )
+        def bot(history):
+            print(history)
+            cur_messages = {"text": "", "images": []}
+            for message in history[::-1]:
+                if message[1]:
+                    break
+                if isinstance(message[0], str):
+                    cur_messages["text"] = message[0] + " " + cur_messages["text"]
+                elif isinstance(message[0], tuple):
+                    cur_messages["images"].extend(message[0])
+            cur_messages["text"] = cur_messages["text"].strip()
+            cur_messages["images"] = cur_messages["images"][::-1]
+            if not cur_messages["text"]:
+                raise gr.Error("Please enter a message")
+            if cur_messages['text'].count("<image>") < len(cur_messages['images']):
+                gr.Warning("The number of images uploaded is more than the number of <image> placeholders in the text. Will automatically prepend <image> to the text.")
+                cur_messages['text'] = "<image> "* (len(cur_messages['images']) - cur_messages['text'].count("<image>")) + cur_messages['text']
+                history[-1][0] = cur_messages["text"]
+            if cur_messages['text'].count("<image>") > len(cur_messages['images']):
+                gr.Warning("The number of images uploaded is less than the number of <image> placeholders in the text. Will automatically remove extra <image> placeholders from the text.")
+                cur_messages['text'] = cur_messages['text'][::-1].replace("<image>"[::-1], "", cur_messages['text'].count("<image>") - len(cur_messages['images']))[::-1]
+                history[-1][0] = cur_messages["text"]
+            chat_history = get_chat_history(history)
+            chat_images = get_chat_images(history)
+            generation_kwargs = {
+                "max_new_tokens": 4096,
+                "temperature": temperature,
+                "top_p": top_p,
+                "do_sample": True,
+            }
+            print(None, chat_images, chat_history, generation_kwargs)
+            response = generate(None, chat_images, chat_history, **generation_kwargs)
+            for _output in response:
+                history[-1][1] = _output
+                time.sleep(0.05)
+                yield history
         bot_msg = chat_msg.success(bot, chatbot, chatbot, api_name="bot_response")
         chatbot.like(print_like_dislike, None, None)
         with gr.Row():
             send_button = gr.Button("Send")
             clear_button = gr.ClearButton([chatbot, chat_input])
         send_button.click(
             add_message, [chatbot, chat_input], [chatbot, chat_input]
         ).then(
         gr.Examples(
             examples=[
+                {
+                    "text": "<image> <image> How many dices are there in image 1 and image 2 respectively?",
+                    "files": ["./examples/image10.jpg", "./examples/image15.jpg"]
+                },
                 {
                     "text": "<image> <image> <image> Which image shows a different mood of character from the others?",
                     "files": ["./examples/image12.jpg", "./examples/image13.jpg", "./examples/image14.jpg"]
                     "text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
                     "files": ["./examples/image1.jpg", "./examples/image2.jpg"]
                 },
                 {
                     "text": "<image> <image> Which image shows an older dog?",
                     "files": ["./examples/image8.jpg", "./examples/image9.jpg"]