Spaces:

lixin4ever
/

VideoLLaMA2

Running on Zero

App Files Files Community

ClownRat commited on Jun 14, 2024

Commit

c17f8ec

1 Parent(s): 93426a3

Update demo style.

Browse files

Files changed (1) hide show

app.py +6 -8

app.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import spaces
 import os
-import shutil
 import torch
-import tempfile
 import gradio as gr
-from PIL import Image
 import sys
 sys.path.append('./')
@@ -30,9 +27,9 @@ title_markdown = ("""
 <div align="center">
     <div style="display:flex; gap: 0.25rem;" align="center">
-        <a href='VideoLLaMA 2: Advancing Spatial-Temporal Modeling and Audio Understanding in Video-LLMs'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
         <a href="https://arxiv.org/pdf/2406.07476.pdf"><img src="https://img.shields.io/badge/Arxiv-2406.07476-red"></a>
-        <a href='https://github.com/DAMO-NLP-SG/VideoLLaMA2/stargazers'><img src='https://img.shields.io/github/stars/DAMO-NLP-SG/VideoLLaMA2.svg?style=social'></a>
     </div>
 </div>
 """)
@@ -158,8 +155,9 @@ def generate(image, video, first_run, state, state_, textbox_in, dtype=torch.flo
     if not os.path.exists(image) and os.path.exists(video):
         text_en_in = text_en_in.replace(DEFAULT_MMODAL_TOKEN['VIDEO'], '').strip()
         text_en_in = DEFAULT_MMODAL_TOKEN['VIDEO'] + '\n' + text_en_in
-    # if os.path.exists(image) and os.path.exists(video):
-    #   pass
     text_en_out, state_ = handler.generate(tensor, modals, text_en_in, first_run=first_run, state=state_)
     state_.messages[-1] = (state_.roles[1], text_en_out)
@@ -211,7 +209,7 @@ handler = Chat(model_path, conv_mode=conv_mode, load_8bit=False, load_4bit=True)
 textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
-with gr.Blocks(title='VideoLLaMA 2 🔥🚀🔥', theme=gr.themes.Default(), css=block_css) as demo:
     gr.Markdown(title_markdown)
     state = gr.State()
     state_ = gr.State()

 import spaces
 import os
 import torch
 import gradio as gr
 import sys
 sys.path.append('./')
 <div align="center">
     <div style="display:flex; gap: 0.25rem;" align="center">
+        <a href="https://github.com/DAMO-NLP-SG/VideoLLaMA2"><img src='https://img.shields.io/badge/Github-VideoLLaMA2-9C276A'></a>
         <a href="https://arxiv.org/pdf/2406.07476.pdf"><img src="https://img.shields.io/badge/Arxiv-2406.07476-red"></a>
+        <a href="https://github.com/DAMO-NLP-SG/VideoLLaMA2/stargazers"><img src="https://img.shields.io/github/stars/DAMO-NLP-SG/VideoLLaMA2.svg?style=social"></a>
     </div>
 </div>
 """)
     if not os.path.exists(image) and os.path.exists(video):
         text_en_in = text_en_in.replace(DEFAULT_MMODAL_TOKEN['VIDEO'], '').strip()
         text_en_in = DEFAULT_MMODAL_TOKEN['VIDEO'] + '\n' + text_en_in
+    if os.path.exists(image) and os.path.exists(video):
+        text_en_in = text_en_in.replace(DEFAULT_MMODAL_TOKEN['VIDEO'], '').strip()
+        text_en_in = DEFAULT_MMODAL_TOKEN['VIDEO'] + '\n' + text_en_in
     text_en_out, state_ = handler.generate(tensor, modals, text_en_in, first_run=first_run, state=state_)
     state_.messages[-1] = (state_.roles[1], text_en_out)
 textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
+with gr.Blocks(title='VideoLLaMA 2 🔥🚀🔥', theme=gr.themes.Soft(primary_hue=gr.themes.colors.violet), css=block_css) as demo:
     gr.Markdown(title_markdown)
     state = gr.State()
     state_ = gr.State()