Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -51,6 +51,18 @@ def identify_and_save_blob(blob_path):
|
|
| 51 |
except Exception as e:
|
| 52 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
@spaces.GPU
|
| 55 |
def model_inference(input_dict, history):
|
| 56 |
text = input_dict["text"]
|
|
@@ -95,9 +107,8 @@ def model_inference(input_dict, history):
|
|
| 95 |
# Apply chat template and process inputs
|
| 96 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 97 |
|
| 98 |
-
#
|
| 99 |
-
image_inputs
|
| 100 |
-
video_inputs = [path for path, media_type in zip(media_paths, media_types) if media_type == "video"]
|
| 101 |
|
| 102 |
# Ensure video_inputs is not empty
|
| 103 |
if not video_inputs:
|
|
|
|
| 51 |
except Exception as e:
|
| 52 |
raise ValueError(f"An error occurred while processing the file: {e}")
|
| 53 |
|
| 54 |
+
def process_vision_info(messages):
|
| 55 |
+
"""Processes vision inputs (images and videos) from messages."""
|
| 56 |
+
image_inputs = []
|
| 57 |
+
video_inputs = []
|
| 58 |
+
for message in messages:
|
| 59 |
+
for content in message["content"]:
|
| 60 |
+
if content["type"] == "image":
|
| 61 |
+
image_inputs.append(load_image(content["image"]))
|
| 62 |
+
elif content["type"] == "video":
|
| 63 |
+
video_inputs.append(content["video"])
|
| 64 |
+
return image_inputs, video_inputs
|
| 65 |
+
|
| 66 |
@spaces.GPU
|
| 67 |
def model_inference(input_dict, history):
|
| 68 |
text = input_dict["text"]
|
|
|
|
| 107 |
# Apply chat template and process inputs
|
| 108 |
prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 109 |
|
| 110 |
+
# Process vision inputs (images and videos)
|
| 111 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
|
|
|
| 112 |
|
| 113 |
# Ensure video_inputs is not empty
|
| 114 |
if not video_inputs:
|