Spaces:
Sleeping
Sleeping
Mohammed Abdeldayem
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ from transformers import AutoTokenizer, VisionEncoderDecoderModel, AutoImageProc
|
|
| 3 |
from PIL import Image
|
| 4 |
from torchvision.transforms.functional import crop
|
| 5 |
import gradio as gr
|
| 6 |
-
import json
|
| 7 |
import base64
|
| 8 |
import io
|
| 9 |
from huggingface_hub import hf_hub_download
|
|
@@ -101,20 +100,15 @@ def process_image(image):
|
|
| 101 |
caption = tokenizer.decode(caption_ids[0], skip_special_tokens=True)
|
| 102 |
captions.append(caption)
|
| 103 |
|
| 104 |
-
# Prepare the result for visualization
|
| 105 |
-
detection_results =
|
| 106 |
for i, (label, box, score, caption) in enumerate(zip(labels, boxes, scores, captions)):
|
| 107 |
-
detection_results
|
| 108 |
-
"label": label,
|
| 109 |
-
"caption": caption,
|
| 110 |
-
"bounding_box": [float(coord) for coord in box], # Convert to float
|
| 111 |
-
"confidence_score": float(score) # Convert to float
|
| 112 |
-
})
|
| 113 |
|
| 114 |
# Render image with bounding boxes
|
| 115 |
result_image = results.render()[0]
|
| 116 |
|
| 117 |
-
# Return the image with detections and the caption
|
| 118 |
return result_image, detection_results, original_caption
|
| 119 |
|
| 120 |
except Exception as e:
|
|
@@ -129,7 +123,7 @@ interface = gr.Interface(
|
|
| 129 |
inputs=gr.Image(type="pil"), # Input: Image upload
|
| 130 |
outputs=[
|
| 131 |
gr.Image(type="pil", label="Detected Objects"), # Output 1: Image with bounding boxes
|
| 132 |
-
gr.
|
| 133 |
gr.Textbox(label="Whole Image Caption") # Output 3: Caption for the whole image
|
| 134 |
],
|
| 135 |
live=True
|
|
|
|
| 3 |
from PIL import Image
|
| 4 |
from torchvision.transforms.functional import crop
|
| 5 |
import gradio as gr
|
|
|
|
| 6 |
import base64
|
| 7 |
import io
|
| 8 |
from huggingface_hub import hf_hub_download
|
|
|
|
| 100 |
caption = tokenizer.decode(caption_ids[0], skip_special_tokens=True)
|
| 101 |
captions.append(caption)
|
| 102 |
|
| 103 |
+
# Prepare the result for visualization as a formatted string
|
| 104 |
+
detection_results = ""
|
| 105 |
for i, (label, box, score, caption) in enumerate(zip(labels, boxes, scores, captions)):
|
| 106 |
+
detection_results += f"Object {i + 1}: {label} - Caption: {caption}\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# Render image with bounding boxes
|
| 109 |
result_image = results.render()[0]
|
| 110 |
|
| 111 |
+
# Return the image with detections, formatted captions, and the whole image caption
|
| 112 |
return result_image, detection_results, original_caption
|
| 113 |
|
| 114 |
except Exception as e:
|
|
|
|
| 123 |
inputs=gr.Image(type="pil"), # Input: Image upload
|
| 124 |
outputs=[
|
| 125 |
gr.Image(type="pil", label="Detected Objects"), # Output 1: Image with bounding boxes
|
| 126 |
+
gr.Textbox(label="Object Captions & Bounding Boxes", lines=10), # Output 2: Formatted captions
|
| 127 |
gr.Textbox(label="Whole Image Caption") # Output 3: Caption for the whole image
|
| 128 |
],
|
| 129 |
live=True
|