maria355 commited on
Commit
b5afce3
ยท
verified ยท
1 Parent(s): c646716

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -0
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
5
+ import io
6
+ import time
7
+
8
+ # Set page config
9
+ st.set_page_config(
10
+ page_title="๐Ÿš€ BLIP-2 Caption Generator",
11
+ page_icon="๐Ÿš€",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded"
14
+ )
15
+
16
+ # Custom CSS for better styling
17
+ st.markdown("""
18
+ <style>
19
+ .main-header {
20
+ text-align: center;
21
+ padding: 2rem 0;
22
+ background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
23
+ color: white;
24
+ border-radius: 10px;
25
+ margin-bottom: 2rem;
26
+ }
27
+ .upload-section {
28
+ border: 2px dashed #ccc;
29
+ border-radius: 10px;
30
+ padding: 2rem;
31
+ text-align: center;
32
+ margin: 1rem 0;
33
+ }
34
+ .caption-box {
35
+ background-color: #f0f2f6;
36
+ border-left: 4px solid #667eea;
37
+ padding: 1rem;
38
+ border-radius: 5px;
39
+ margin: 1rem 0;
40
+ }
41
+ </style>
42
+ """, unsafe_allow_html=True)
43
+
44
+ @st.cache_resource
45
+ def load_model():
46
+ """Load and cache the BLIP-2 model and processor"""
47
+ try:
48
+ device = "cuda" if torch.cuda.is_available() else "cpu"
49
+
50
+ # Use the smaller BLIP-2 model for better performance on Hugging Face Spaces
51
+ model_name = "Salesforce/blip2-opt-2.7b"
52
+
53
+ processor = Blip2Processor.from_pretrained(model_name)
54
+ model = Blip2ForConditionalGeneration.from_pretrained(
55
+ model_name,
56
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
57
+ device_map="auto" if device == "cuda" else None
58
+ )
59
+
60
+ if device == "cpu":
61
+ model = model.to(device)
62
+
63
+ return processor, model, device
64
+ except Exception as e:
65
+ st.error(f"Error loading model: {str(e)}")
66
+ return None, None, None
67
+
68
+ def generate_caption(image, processor, model, device, prompt=""):
69
+ """Generate caption for the uploaded image"""
70
+ try:
71
+ # Prepare inputs
72
+ if prompt:
73
+ inputs = processor(image, text=prompt, return_tensors="pt").to(device)
74
+ else:
75
+ inputs = processor(image, return_tensors="pt").to(device)
76
+
77
+ # Generate caption
78
+ with torch.no_grad():
79
+ generated_ids = model.generate(
80
+ **inputs,
81
+ max_length=50,
82
+ num_beams=5,
83
+ temperature=0.7,
84
+ do_sample=True,
85
+ early_stopping=True
86
+ )
87
+
88
+ # Decode the generated caption
89
+ caption = processor.decode(generated_ids[0], skip_special_tokens=True)
90
+ return caption
91
+
92
+ except Exception as e:
93
+ st.error(f"Error generating caption: {str(e)}")
94
+ return None
95
+
96
+ def main():
97
+ # Header
98
+ st.markdown("""
99
+ <div class="main-header">
100
+ <h1>๐Ÿš€ BLIP-2 Caption Generator</h1>
101
+ <p>Upload an image and get AI-generated captions instantly!</p>
102
+ </div>
103
+ """, unsafe_allow_html=True)
104
+
105
+ # Sidebar
106
+ with st.sidebar:
107
+ st.header("๐Ÿ”ง Settings")
108
+ st.markdown("### Model Information")
109
+ st.info("Using **BLIP-2** (Salesforce/blip2-opt-2.7b)")
110
+
111
+ # Custom prompt option
112
+ custom_prompt = st.text_input(
113
+ "Custom Prompt (Optional):",
114
+ placeholder="e.g., 'Question: What is in this image? Answer:'"
115
+ )
116
+
117
+ st.markdown("### About")
118
+ st.markdown("""
119
+ This app uses the **BLIP-2** model to generate natural language descriptions of images.
120
+
121
+ **Features:**
122
+ - ๐Ÿ–ผ๏ธ Upload any image format
123
+ - ๐Ÿค– AI-powered captioning
124
+ - โšก Fast inference
125
+ - ๐ŸŽฏ Optional custom prompts
126
+ """)
127
+
128
+ # Main content
129
+ col1, col2 = st.columns([1, 1])
130
+
131
+ with col1:
132
+ st.markdown("### ๐Ÿ“ค Upload Image")
133
+
134
+ # File uploader
135
+ uploaded_file = st.file_uploader(
136
+ "Choose an image file",
137
+ type=["jpg", "jpeg", "png", "bmp", "tiff"],
138
+ help="Upload an image to generate a caption"
139
+ )
140
+
141
+ if uploaded_file is not None:
142
+ # Display uploaded image
143
+ image = Image.open(uploaded_file)
144
+ st.image(image, caption="Uploaded Image", use_column_width=True)
145
+
146
+ # Image info
147
+ st.markdown(f"""
148
+ **Image Info:**
149
+ - Size: {image.size[0]} x {image.size[1]} pixels
150
+ - Format: {image.format}
151
+ - Mode: {image.mode}
152
+ """)
153
+
154
+ with col2:
155
+ st.markdown("### ๐Ÿ”ฎ Generated Caption")
156
+
157
+ if uploaded_file is not None:
158
+ # Load model
159
+ with st.spinner("Loading BLIP-2 model..."):
160
+ processor, model, device = load_model()
161
+
162
+ if processor is not None and model is not None:
163
+ # Generate caption button
164
+ if st.button("๐ŸŽฏ Generate Caption", type="primary"):
165
+ with st.spinner("Generating caption..."):
166
+ start_time = time.time()
167
+
168
+ # Generate caption
169
+ caption = generate_caption(
170
+ image, processor, model, device, custom_prompt
171
+ )
172
+
173
+ end_time = time.time()
174
+
175
+ if caption:
176
+ # Display caption
177
+ st.markdown(f"""
178
+ <div class="caption-box">
179
+ <h4>๐Ÿ“ Caption:</h4>
180
+ <p style="font-size: 16px; font-weight: 500;">{caption}</p>
181
+ </div>
182
+ """, unsafe_allow_html=True)
183
+
184
+ # Performance info
185
+ st.success(f"Caption generated in {end_time - start_time:.2f} seconds")
186
+
187
+ # Copy to clipboard button
188
+ st.code(caption, language=None)
189
+ else:
190
+ st.error("Failed to load the model. Please try refreshing the page.")
191
+ else:
192
+ st.markdown("""
193
+ <div class="upload-section">
194
+ <h3>๐Ÿ‘† Upload an image to get started</h3>
195
+ <p>Supported formats: JPG, PNG, BMP, TIFF</p>
196
+ </div>
197
+ """, unsafe_allow_html=True)
198
+
199
+ # Footer
200
+ st.markdown("---")
201
+ st.markdown("""
202
+ <div style="text-align: center; color: #666;">
203
+ <p>Built using <strong>Streamlit</strong> and <strong>Hugging Face Transformers</strong></p>
204
+ <p>Powered by <strong>BLIP-2</strong> - Bootstrapping Language-Image Pre-training</p>
205
+ </div>
206
+ """, unsafe_allow_html=True)
207
+
208
+ if __name__ == "__main__":
209
+ main()