Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,31 +1,41 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
MULTIMODAL CAPABILITIES:
|
| 7 |
- Video Analysis: Visual content, scenes, objects, actions, composition
|
| 8 |
- Audio Analysis: Speech, music, sound effects, ambient audio, transcription
|
| 9 |
- Combined Analysis: Synchronized audiovisual understanding and insights
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
- Calculated position embeddings (e.g., 1080 positions)
|
| 17 |
-
- Attention mask dimensions (e.g., 1044 valid positions)
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
3. Enhanced Error Messages: Provides actionable troubleshooting advice
|
| 23 |
-
4. Video Diagnostics: Logs resolution and format information
|
| 24 |
-
5. Audio Extraction: Librosa-based audio processing with error handling
|
| 25 |
|
| 26 |
VIDEO COMPATIBILITY:
|
| 27 |
- Preserves original video resolution and quality
|
| 28 |
-
- Format: MP4, AVI, MOV, WebM supported
|
|
|
|
| 29 |
- Duration: Any length (frames are sampled automatically)
|
| 30 |
- Audio: Automatically extracted and analyzed when available
|
| 31 |
"""
|
|
@@ -195,14 +205,106 @@ def load_model():
|
|
| 195 |
print(f"❌ Error loading model: {e}")
|
| 196 |
raise e
|
| 197 |
|
| 198 |
-
def
|
| 199 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
if video_file is None:
|
| 202 |
return "❌ Please upload a video file"
|
| 203 |
|
| 204 |
-
|
| 205 |
-
|
| 206 |
|
| 207 |
try:
|
| 208 |
# Load model
|
|
@@ -210,7 +312,7 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 210 |
model, tokenizer = load_model()
|
| 211 |
|
| 212 |
# Process video
|
| 213 |
-
print(f"Processing
|
| 214 |
|
| 215 |
# Add video diagnostics to help identify potential issues
|
| 216 |
try:
|
|
@@ -221,10 +323,11 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 221 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 222 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 223 |
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 224 |
-
|
|
|
|
| 225 |
cap.release()
|
| 226 |
except:
|
| 227 |
-
print("📹
|
| 228 |
|
| 229 |
# Extract video frames
|
| 230 |
frames = encode_video(video_file, max_num_frames=max_frames)
|
|
@@ -239,16 +342,206 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 239 |
|
| 240 |
# Prepare multimodal content
|
| 241 |
content = frames.copy() # Start with video frames
|
|
|
|
| 242 |
|
| 243 |
-
# Add audio description to prompt if audio was found
|
| 244 |
if audio_data is not None:
|
| 245 |
-
enhanced_prompt = f"{prompt}\n\nPlease also analyze the audio content including any speech, music, sound effects, or ambient sounds in the video."
|
| 246 |
print(f"🎵 Audio analysis enabled - {len(audio_data)/sample_rate:.1f}s of audio")
|
| 247 |
else:
|
| 248 |
-
|
| 249 |
-
print("🔇 Video analysis only - no audio content")
|
| 250 |
|
| 251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
# Prepare messages exactly as in sample code
|
| 254 |
msgs = [
|
|
@@ -260,7 +553,7 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 260 |
params["use_image_id"] = False
|
| 261 |
params["max_slice_nums"] = 1 # Reduced for Spaces memory limits
|
| 262 |
|
| 263 |
-
print("🧠 Analyzing
|
| 264 |
|
| 265 |
# Clear GPU cache before inference
|
| 266 |
if torch.cuda.is_available():
|
|
@@ -307,12 +600,9 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 307 |
frames_reduced = encode_video(video_file, max_num_frames=16)
|
| 308 |
|
| 309 |
if frames_reduced:
|
| 310 |
-
# Prepare reduced content
|
| 311 |
content_reduced = frames_reduced.copy()
|
| 312 |
-
|
| 313 |
-
content_reduced.append(f"{prompt}\n\nPlease analyze both video and audio content (audio: {len(audio_data)/sample_rate:.1f}s)")
|
| 314 |
-
else:
|
| 315 |
-
content_reduced.append(f"{prompt}\n\nVideo-only analysis (no audio detected)")
|
| 316 |
|
| 317 |
msgs_reduced = [
|
| 318 |
{'role': 'user', 'content': content_reduced},
|
|
@@ -336,21 +626,22 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 336 |
except Exception as final_error:
|
| 337 |
print(f"❌ All fallback strategies failed: {final_error}")
|
| 338 |
|
| 339 |
-
# Provide helpful error message
|
| 340 |
error_details = f"""
|
| 341 |
-
Shape mismatch error detected. This can happen due to:
|
| 342 |
1. Unusual video resolution/aspect ratio
|
| 343 |
-
2. Video compression artifacts
|
| 344 |
3. Frame dimension inconsistencies
|
| 345 |
|
| 346 |
-
Suggested solutions:
|
| 347 |
-
-
|
| 348 |
-
-
|
| 349 |
-
- Convert
|
|
|
|
| 350 |
|
| 351 |
Technical details: {str(inference_error)}
|
| 352 |
"""
|
| 353 |
-
return f"❌
|
| 354 |
|
| 355 |
# Try to clear cache and retry once for other errors
|
| 356 |
if torch.cuda.is_available():
|
|
@@ -362,27 +653,29 @@ Technical details: {str(inference_error)}
|
|
| 362 |
# Check which attention implementation was actually used
|
| 363 |
attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
|
| 364 |
|
| 365 |
-
# Prepare analysis type info
|
| 366 |
if audio_data is not None:
|
| 367 |
-
analysis_type = f"
|
| 368 |
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio Duration:** {len(audio_data)/sample_rate:.1f} seconds \n**Sample Rate:** {sample_rate} Hz"
|
| 369 |
else:
|
| 370 |
-
analysis_type = "
|
| 371 |
-
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio:** Not detected
|
| 372 |
|
| 373 |
-
result = f"""##
|
| 374 |
|
| 375 |
**Processing Time:** {processing_time:.2f} seconds
|
| 376 |
{media_info}
|
| 377 |
-
**Model:** MiniCPM-o 2.6
|
| 378 |
**Attention:** {attention_type}
|
| 379 |
**Analysis Type:** {analysis_type}
|
| 380 |
|
| 381 |
-
|
|
|
|
|
|
|
| 382 |
{answer}
|
| 383 |
|
| 384 |
---
|
| 385 |
-
*Powered by MiniCPM-o 2.6
|
| 386 |
"""
|
| 387 |
|
| 388 |
return result
|
|
@@ -416,14 +709,14 @@ def get_example_prompts():
|
|
| 416 |
|
| 417 |
# Create Gradio interface
|
| 418 |
def create_interface():
|
| 419 |
-
"""Create the
|
| 420 |
|
| 421 |
with gr.Blocks(
|
| 422 |
-
title="MiniCPM-o 2.6
|
| 423 |
theme=gr.themes.Soft(),
|
| 424 |
css="""
|
| 425 |
.gradio-container {
|
| 426 |
-
max-width:
|
| 427 |
}
|
| 428 |
.example-prompt {
|
| 429 |
cursor: pointer;
|
|
@@ -436,104 +729,139 @@ def create_interface():
|
|
| 436 |
.example-prompt:hover {
|
| 437 |
background: #e0e0e0;
|
| 438 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 439 |
"""
|
| 440 |
) as demo:
|
| 441 |
|
| 442 |
gr.Markdown("""
|
| 443 |
-
#
|
|
|
|
| 444 |
|
| 445 |
-
|
| 446 |
|
| 447 |
-
|
| 448 |
-
-
|
| 449 |
-
-
|
| 450 |
-
-
|
| 451 |
-
-
|
| 452 |
-
-
|
| 453 |
-
-
|
| 454 |
-
- 🔧 **Robust error handling** with automatic fallback strategies
|
| 455 |
|
| 456 |
-
|
| 457 |
-
**Analysis includes:** Visual content + Audio content + Speech transcription
|
| 458 |
-
**Original quality preserved** - no resizing or compression
|
| 459 |
|
| 460 |
-
|
| 461 |
""")
|
| 462 |
|
| 463 |
with gr.Row():
|
| 464 |
-
with gr.Column(scale=
|
| 465 |
-
# Video input
|
| 466 |
video_input = gr.Video(
|
| 467 |
-
label="
|
| 468 |
-
elem_id="video_input"
|
|
|
|
| 469 |
)
|
| 470 |
|
| 471 |
-
#
|
| 472 |
-
prompt_input = gr.Textbox(
|
| 473 |
-
label="💬 Analysis Prompt",
|
| 474 |
-
placeholder="Describe this video in detail...",
|
| 475 |
-
value="Describe this video in detail",
|
| 476 |
-
lines=3
|
| 477 |
-
)
|
| 478 |
-
|
| 479 |
-
# Max frames slider
|
| 480 |
-
max_frames_slider = gr.Slider(
|
| 481 |
-
minimum=8,
|
| 482 |
-
maximum=64,
|
| 483 |
-
value=32,
|
| 484 |
-
step=8,
|
| 485 |
-
label="🎞️ Max Frames to Analyze",
|
| 486 |
-
info="More frames = more detail but slower processing"
|
| 487 |
-
)
|
| 488 |
-
|
| 489 |
-
# Analyze button
|
| 490 |
analyze_btn = gr.Button(
|
| 491 |
-
"🚀 Analyze
|
| 492 |
variant="primary",
|
| 493 |
-
size="lg"
|
|
|
|
| 494 |
)
|
| 495 |
|
| 496 |
-
#
|
| 497 |
-
gr.
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
|
| 525 |
-
with gr.Column(scale=
|
| 526 |
-
# Results output
|
| 527 |
output_text = gr.Markdown(
|
| 528 |
label="📊 Analysis Results",
|
| 529 |
-
value="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
elem_id="output"
|
| 531 |
)
|
| 532 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
# Event handlers
|
| 534 |
analyze_btn.click(
|
| 535 |
-
fn=
|
| 536 |
-
inputs=[video_input,
|
| 537 |
outputs=output_text,
|
| 538 |
show_progress=True
|
| 539 |
)
|
|
@@ -541,15 +869,17 @@ def create_interface():
|
|
| 541 |
# Footer
|
| 542 |
gr.Markdown("""
|
| 543 |
---
|
| 544 |
-
### ℹ️ About
|
| 545 |
-
This
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
|
| 547 |
-
|
| 548 |
-
- **Capabilities:** Video analysis + Audio processing + Speech transcription
|
| 549 |
-
- **Audio Processing:** Powered by librosa for high-quality audio extraction
|
| 550 |
-
- **GPU:** Optimized for Hugging Face Spaces with SDPA/Flash Attention
|
| 551 |
|
| 552 |
-
|
| 553 |
""")
|
| 554 |
|
| 555 |
return demo
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
Mobile Creative Ad Analyzer - Powered by MiniCPM-o 2.6
|
| 4 |
+
Professional mobile advertising creative analysis with comprehensive 12-point evaluation framework
|
| 5 |
+
|
| 6 |
+
COMPREHENSIVE ANALYSIS FRAMEWORK:
|
| 7 |
+
1. Hook Analysis (0-5 second critical window)
|
| 8 |
+
2. Opening Sequence & First Impression
|
| 9 |
+
3. Audiovisual Synchronization
|
| 10 |
+
4. Text & UI Element Analysis
|
| 11 |
+
5. Mobile-First Design Optimization
|
| 12 |
+
6. Target Audience & Demographic Appeal
|
| 13 |
+
7. Game/Product Appeal & Positioning
|
| 14 |
+
8. Conversion & Retention Elements
|
| 15 |
+
9. Creative Production Quality
|
| 16 |
+
10. Performance Optimization Recommendations
|
| 17 |
+
11. Platform-Specific Analysis (iOS/Android/Social)
|
| 18 |
+
12. Competitive & Market Context
|
| 19 |
|
| 20 |
MULTIMODAL CAPABILITIES:
|
| 21 |
- Video Analysis: Visual content, scenes, objects, actions, composition
|
| 22 |
- Audio Analysis: Speech, music, sound effects, ambient audio, transcription
|
| 23 |
- Combined Analysis: Synchronized audiovisual understanding and insights
|
| 24 |
|
| 25 |
+
MOBILE AD FOCUS:
|
| 26 |
+
- Optimized for mobile game ads, app install campaigns, social media creatives
|
| 27 |
+
- Hook effectiveness assessment for social feeds (Instagram, TikTok, Facebook)
|
| 28 |
+
- Mobile-first design evaluation and thumb-stopping power analysis
|
| 29 |
+
- Conversion optimization and performance improvement recommendations
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
SHAPE MISMATCH ERROR HANDLING:
|
| 32 |
+
Robust handling for the common shape mismatch error with fallback strategies
|
| 33 |
+
and specialized error messages for mobile creative formats.
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
VIDEO COMPATIBILITY:
|
| 36 |
- Preserves original video resolution and quality
|
| 37 |
+
- Format: MP4, AVI, MOV, WebM supported
|
| 38 |
+
- Optimized for mobile creative formats (9:16, 16:9, 1:1)
|
| 39 |
- Duration: Any length (frames are sampled automatically)
|
| 40 |
- Audio: Automatically extracted and analyzed when available
|
| 41 |
"""
|
|
|
|
| 205 |
print(f"❌ Error loading model: {e}")
|
| 206 |
raise e
|
| 207 |
|
| 208 |
+
def get_comprehensive_mobile_ad_prompt():
|
| 209 |
+
"""Get the comprehensive mobile ad analysis prompt"""
|
| 210 |
+
return """🎮 COMPREHENSIVE MOBILE CREATIVE AD ANALYSIS
|
| 211 |
+
|
| 212 |
+
Please provide a detailed analysis of this mobile advertising creative, examining both visual and audio elements across the following dimensions:
|
| 213 |
+
|
| 214 |
+
**1. HOOK ANALYSIS (0-5 Seconds - Critical Window)**
|
| 215 |
+
- Identify the specific hook mechanism used in the opening moments
|
| 216 |
+
- Analyze attention-grabbing visual elements (motion, colors, characters, UI elements)
|
| 217 |
+
- Evaluate audio hooks (music intro, sound effects, voiceover opening, silence/contrast)
|
| 218 |
+
- Assess scroll-stopping power and thumb-stopping effectiveness
|
| 219 |
+
- Rate the hook strength for mobile social feeds (Instagram, TikTok, Facebook)
|
| 220 |
+
|
| 221 |
+
**2. OPENING SEQUENCE & FIRST IMPRESSION**
|
| 222 |
+
- Analyze the first-play experience and immediate visual impact
|
| 223 |
+
- Examine character introductions, gameplay preview, or problem presentation
|
| 224 |
+
- Evaluate motion graphics, transitions, and visual flow
|
| 225 |
+
- Assess brand logo placement and timing
|
| 226 |
+
- Review pacing and information hierarchy in opening seconds
|
| 227 |
+
|
| 228 |
+
**3. AUDIOVISUAL SYNCHRONIZATION**
|
| 229 |
+
- Analyze how visuals and audio work together to create impact
|
| 230 |
+
- Evaluate music style, tempo, and genre appropriateness for target audience
|
| 231 |
+
- Assess sound effects quality and timing with visual cues
|
| 232 |
+
- Review voiceover clarity, tone, and message delivery
|
| 233 |
+
- Examine audio branding and brand voice consistency
|
| 234 |
+
|
| 235 |
+
**4. TEXT & UI ELEMENT ANALYSIS**
|
| 236 |
+
- Evaluate all written text elements (headlines, UI text, CTAs, game titles)
|
| 237 |
+
- Assess readability and legibility on mobile screens
|
| 238 |
+
- Analyze urgency triggers, emotional appeals, and persuasion techniques
|
| 239 |
+
- Review call-to-action button design, placement, and messaging
|
| 240 |
+
- Examine subtitle usage and accessibility considerations
|
| 241 |
+
|
| 242 |
+
**5. MOBILE-FIRST DESIGN OPTIMIZATION**
|
| 243 |
+
- Assess effectiveness on small screens (5-7 inch displays)
|
| 244 |
+
- Evaluate thumb accessibility and touch target sizing
|
| 245 |
+
- Analyze social media feed optimization (square, vertical, horizontal formats)
|
| 246 |
+
- Review sound-off viewing compatibility and visual storytelling clarity
|
| 247 |
+
- Assess loading speed implications and file size considerations
|
| 248 |
+
|
| 249 |
+
**6. TARGET AUDIENCE & DEMOGRAPHIC APPEAL**
|
| 250 |
+
- Identify primary and secondary target demographics
|
| 251 |
+
- Analyze age group appeal through visual style, music, and messaging
|
| 252 |
+
- Evaluate gender targeting through character design and content themes
|
| 253 |
+
- Assess cultural relevance and localization effectiveness
|
| 254 |
+
- Review psychographic targeting (interests, behaviors, values)
|
| 255 |
+
|
| 256 |
+
**7. GAME/PRODUCT APPEAL & POSITIONING**
|
| 257 |
+
- Identify genre and gameplay mechanics showcased
|
| 258 |
+
- Analyze competitive advantages highlighted in the creative
|
| 259 |
+
- Evaluate product demonstration and feature communication
|
| 260 |
+
- Assess value proposition clarity and uniqueness
|
| 261 |
+
- Review progression systems, rewards, or benefits shown
|
| 262 |
+
|
| 263 |
+
**8. CONVERSION & RETENTION ELEMENTS**
|
| 264 |
+
- Identify download triggers and install prompts
|
| 265 |
+
- Analyze gameplay teasers and engagement hooks
|
| 266 |
+
- Evaluate social proof elements (ratings, download numbers, testimonials)
|
| 267 |
+
- Assess limited-time offers, bonuses, or incentive messaging
|
| 268 |
+
- Review onboarding hints and tutorial elements
|
| 269 |
+
|
| 270 |
+
**9. CREATIVE PRODUCTION QUALITY**
|
| 271 |
+
- Evaluate overall production value and professional polish
|
| 272 |
+
- Analyze animation quality, visual effects, and technical execution
|
| 273 |
+
- Assess audio production quality and mixing
|
| 274 |
+
- Review brand consistency across all creative elements
|
| 275 |
+
- Evaluate creative concept originality and memorability
|
| 276 |
+
|
| 277 |
+
**10. PERFORMANCE OPTIMIZATION RECOMMENDATIONS**
|
| 278 |
+
- Provide specific suggestions for improving the first 5-second hook
|
| 279 |
+
- Recommend audiovisual enhancements for better engagement
|
| 280 |
+
- Suggest mobile optimization improvements
|
| 281 |
+
- Propose A/B testing opportunities for key elements
|
| 282 |
+
- Offer conversion rate optimization strategies
|
| 283 |
+
|
| 284 |
+
**11. PLATFORM-SPECIFIC ANALYSIS**
|
| 285 |
+
- Evaluate effectiveness for iOS vs Android audiences
|
| 286 |
+
- Analyze App Store vs Google Play creative requirements compliance
|
| 287 |
+
- Assess social platform compatibility (Instagram Stories, TikTok, Facebook Feed)
|
| 288 |
+
- Review programmatic advertising network optimization
|
| 289 |
+
- Consider influencer/creator content adaptation potential
|
| 290 |
+
|
| 291 |
+
**12. COMPETITIVE & MARKET CONTEXT**
|
| 292 |
+
- Compare creative approach to category standards and competitors
|
| 293 |
+
- Identify unique differentiation points and market positioning
|
| 294 |
+
- Assess trend alignment and contemporary relevance
|
| 295 |
+
- Evaluate seasonal or cultural timing appropriateness
|
| 296 |
+
- Suggest competitive advantages to emphasize
|
| 297 |
+
|
| 298 |
+
Please provide specific, actionable insights for each section with concrete examples from the creative. Include both strengths to leverage and weaknesses to address, with prioritized recommendations for maximum impact improvement."""
|
| 299 |
+
|
| 300 |
+
def analyze_video(video_file, max_frames=32):
|
| 301 |
+
"""Analyze mobile creative ad using comprehensive analysis prompt"""
|
| 302 |
|
| 303 |
if video_file is None:
|
| 304 |
return "❌ Please upload a video file"
|
| 305 |
|
| 306 |
+
# Use the comprehensive mobile ad analysis prompt
|
| 307 |
+
prompt = get_comprehensive_mobile_ad_prompt()
|
| 308 |
|
| 309 |
try:
|
| 310 |
# Load model
|
|
|
|
| 312 |
model, tokenizer = load_model()
|
| 313 |
|
| 314 |
# Process video
|
| 315 |
+
print(f"Processing mobile creative ad: {video_file}")
|
| 316 |
|
| 317 |
# Add video diagnostics to help identify potential issues
|
| 318 |
try:
|
|
|
|
| 323 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 324 |
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 325 |
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 326 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 327 |
+
print(f"📹 Creative specs: {width}x{height}, {fps:.1f}fps, {duration:.1f}s duration")
|
| 328 |
cap.release()
|
| 329 |
except:
|
| 330 |
+
print("📹 Creative info: Could not read video metadata")
|
| 331 |
|
| 332 |
# Extract video frames
|
| 333 |
frames = encode_video(video_file, max_num_frames=max_frames)
|
|
|
|
| 342 |
|
| 343 |
# Prepare multimodal content
|
| 344 |
content = frames.copy() # Start with video frames
|
| 345 |
+
content.append(prompt) # Add the comprehensive analysis prompt
|
| 346 |
|
|
|
|
| 347 |
if audio_data is not None:
|
|
|
|
| 348 |
print(f"🎵 Audio analysis enabled - {len(audio_data)/sample_rate:.1f}s of audio")
|
| 349 |
else:
|
| 350 |
+
print("🔇 Visual analysis only - no audio content detected")
|
|
|
|
| 351 |
|
| 352 |
+
# Prepare messages exactly as in sample code
|
| 353 |
+
msgs = [
|
| 354 |
+
{'role': 'user', 'content': content},
|
| 355 |
+
]
|
| 356 |
+
|
| 357 |
+
# Set decode params for video exactly as in sample code
|
| 358 |
+
params = {}
|
| 359 |
+
params["use_image_id"] = False
|
| 360 |
+
params["max_slice_nums"] = 1 # Reduced for Spaces memory limits
|
| 361 |
+
|
| 362 |
+
print("🧠 Analyzing mobile creative ad with MiniCPM-o 2.6...")
|
| 363 |
+
|
| 364 |
+
# Clear GPU cache before inference
|
| 365 |
+
if torch.cuda.is_available():
|
| 366 |
+
torch.cuda.empty_cache()
|
| 367 |
+
|
| 368 |
+
start_time = time.time()
|
| 369 |
+
|
| 370 |
+
# Generate response using exact method from sample code
|
| 371 |
+
with torch.no_grad(): # Save memory
|
| 372 |
+
try:
|
| 373 |
+
answer = model.chat(
|
| 374 |
+
msgs=msgs,
|
| 375 |
+
tokenizer=tokenizer,
|
| 376 |
+
**params
|
| 377 |
+
)
|
| 378 |
+
except Exception as inference_error:
|
| 379 |
+
print(f"Inference error: {inference_error}")
|
| 380 |
+
|
| 381 |
+
# Check if it's the known shape mismatch error
|
| 382 |
+
if "shape mismatch" in str(inference_error) and "cannot be broadcast" in str(inference_error):
|
| 383 |
+
print("🔧 Detected shape mismatch error - applying fallback strategy...")
|
| 384 |
+
|
| 385 |
+
try:
|
| 386 |
+
# Fallback Strategy 1: Reduce max_slice_nums to 1 for simpler processing
|
| 387 |
+
params["max_slice_nums"] = 1
|
| 388 |
+
print("📝 Trying with reduced max_slice_nums=1...")
|
| 389 |
+
|
| 390 |
+
if torch.cuda.is_available():
|
| 391 |
+
torch.cuda.empty_cache()
|
| 392 |
+
|
| 393 |
+
answer = model.chat(
|
| 394 |
+
msgs=msgs,
|
| 395 |
+
tokenizer=tokenizer,
|
| 396 |
+
**params
|
| 397 |
+
)
|
| 398 |
+
print("✅ Fallback strategy 1 successful!")
|
| 399 |
+
|
| 400 |
+
except Exception as fallback_error:
|
| 401 |
+
print(f"❌ Fallback strategy 1 failed: {fallback_error}")
|
| 402 |
+
|
| 403 |
+
try:
|
| 404 |
+
# Fallback Strategy 2: Re-process video with fewer frames
|
| 405 |
+
print("📝 Trying with fewer frames (16 max)...")
|
| 406 |
+
frames_reduced = encode_video(video_file, max_num_frames=16)
|
| 407 |
+
|
| 408 |
+
if frames_reduced:
|
| 409 |
+
# Prepare reduced content for mobile ad analysis
|
| 410 |
+
content_reduced = frames_reduced.copy()
|
| 411 |
+
content_reduced.append(prompt) # Use the same comprehensive prompt
|
| 412 |
+
|
| 413 |
+
msgs_reduced = [
|
| 414 |
+
{'role': 'user', 'content': content_reduced},
|
| 415 |
+
]
|
| 416 |
+
|
| 417 |
+
params["max_slice_nums"] = 1
|
| 418 |
+
params["use_image_id"] = False
|
| 419 |
+
|
| 420 |
+
if torch.cuda.is_available():
|
| 421 |
+
torch.cuda.empty_cache()
|
| 422 |
+
|
| 423 |
+
answer = model.chat(
|
| 424 |
+
msgs=msgs_reduced,
|
| 425 |
+
tokenizer=tokenizer,
|
| 426 |
+
**params
|
| 427 |
+
)
|
| 428 |
+
print("✅ Fallback strategy 2 successful with reduced frames!")
|
| 429 |
+
else:
|
| 430 |
+
raise Exception("Could not process video with reduced frames")
|
| 431 |
+
|
| 432 |
+
except Exception as final_error:
|
| 433 |
+
print(f"❌ All fallback strategies failed: {final_error}")
|
| 434 |
+
|
| 435 |
+
# Provide helpful error message for mobile ad analysis
|
| 436 |
+
error_details = f"""
|
| 437 |
+
Shape mismatch error detected during mobile creative analysis. This can happen due to:
|
| 438 |
+
1. Unusual video resolution/aspect ratio
|
| 439 |
+
2. Video compression artifacts
|
| 440 |
+
3. Frame dimension inconsistencies
|
| 441 |
+
|
| 442 |
+
Suggested solutions for mobile creatives:
|
| 443 |
+
- Ensure video is in standard mobile format (9:16, 16:9, 1:1)
|
| 444 |
+
- Use common resolutions (1080x1920, 1920x1080, 1080x1080)
|
| 445 |
+
- Convert to MP4 with H.264 encoding
|
| 446 |
+
- Check if video is corrupted or has unusual codec
|
| 447 |
+
|
| 448 |
+
Technical details: {str(inference_error)}
|
| 449 |
+
"""
|
| 450 |
+
return f"❌ Mobile creative analysis failed after multiple attempts:\n{error_details}"
|
| 451 |
+
|
| 452 |
+
# Try to clear cache and retry once for other errors
|
| 453 |
+
if torch.cuda.is_available():
|
| 454 |
+
torch.cuda.empty_cache()
|
| 455 |
+
raise inference_error
|
| 456 |
+
|
| 457 |
+
processing_time = time.time() - start_time
|
| 458 |
+
|
| 459 |
+
# Check which attention implementation was actually used
|
| 460 |
+
attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
|
| 461 |
+
|
| 462 |
+
# Prepare analysis type info for mobile ad focus
|
| 463 |
+
if audio_data is not None:
|
| 464 |
+
analysis_type = f"Comprehensive Mobile Ad Analysis (Visual + Audio)"
|
| 465 |
+
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio Duration:** {len(audio_data)/sample_rate:.1f} seconds \n**Sample Rate:** {sample_rate} Hz"
|
| 466 |
+
else:
|
| 467 |
+
analysis_type = "Mobile Ad Analysis (Visual Only)"
|
| 468 |
+
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio:** Not detected in creative"
|
| 469 |
+
|
| 470 |
+
result = f"""## 📱 Mobile Creative Ad Analysis Results
|
| 471 |
+
|
| 472 |
+
**Processing Time:** {processing_time:.2f} seconds
|
| 473 |
+
{media_info}
|
| 474 |
+
**Model:** MiniCPM-o 2.6 Multimodal AI
|
| 475 |
+
**Attention:** {attention_type}
|
| 476 |
+
**Analysis Type:** {analysis_type}
|
| 477 |
+
|
| 478 |
+
---
|
| 479 |
+
|
| 480 |
+
### 🎯 Creative Analysis Report:
|
| 481 |
+
{answer}
|
| 482 |
+
|
| 483 |
+
---
|
| 484 |
+
*Powered by MiniCPM-o 2.6 - Professional Mobile Creative Analysis on Hugging Face Spaces*
|
| 485 |
+
"""
|
| 486 |
+
|
| 487 |
+
return result
|
| 488 |
+
|
| 489 |
+
except Exception as e:
|
| 490 |
+
error_msg = f"❌ Error processing video: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
| 491 |
+
print(error_msg)
|
| 492 |
+
return error_msg
|
| 493 |
+
|
| 494 |
+
def analyze_video_with_custom_prompt(video_file, custom_prompt, max_frames=32):
|
| 495 |
+
"""Analyze mobile creative ad using a custom analysis prompt"""
|
| 496 |
+
|
| 497 |
+
if video_file is None:
|
| 498 |
+
return "❌ Please upload a video file"
|
| 499 |
+
|
| 500 |
+
# Use the provided custom prompt
|
| 501 |
+
prompt = custom_prompt
|
| 502 |
+
|
| 503 |
+
try:
|
| 504 |
+
# Load model
|
| 505 |
+
print("Loading model...")
|
| 506 |
+
model, tokenizer = load_model()
|
| 507 |
+
|
| 508 |
+
# Process video
|
| 509 |
+
print(f"Processing mobile creative ad: {video_file}")
|
| 510 |
+
|
| 511 |
+
# Add video diagnostics to help identify potential issues
|
| 512 |
+
try:
|
| 513 |
+
import cv2
|
| 514 |
+
cap = cv2.VideoCapture(video_file)
|
| 515 |
+
if cap.isOpened():
|
| 516 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 517 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 518 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 519 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 520 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 521 |
+
print(f"📹 Creative specs: {width}x{height}, {fps:.1f}fps, {duration:.1f}s duration")
|
| 522 |
+
cap.release()
|
| 523 |
+
except:
|
| 524 |
+
print("📹 Creative info: Could not read video metadata")
|
| 525 |
+
|
| 526 |
+
# Extract video frames
|
| 527 |
+
frames = encode_video(video_file, max_num_frames=max_frames)
|
| 528 |
+
|
| 529 |
+
if not frames:
|
| 530 |
+
return "❌ Could not extract frames from video"
|
| 531 |
+
|
| 532 |
+
print(f"📸 Extracted {len(frames)} frames")
|
| 533 |
+
|
| 534 |
+
# Extract audio from video
|
| 535 |
+
audio_data, sample_rate = extract_audio_from_video(video_file)
|
| 536 |
+
|
| 537 |
+
# Prepare multimodal content
|
| 538 |
+
content = frames.copy() # Start with video frames
|
| 539 |
+
content.append(prompt) # Add the comprehensive analysis prompt
|
| 540 |
+
|
| 541 |
+
if audio_data is not None:
|
| 542 |
+
print(f"🎵 Audio analysis enabled - {len(audio_data)/sample_rate:.1f}s of audio")
|
| 543 |
+
else:
|
| 544 |
+
print("🔇 Visual analysis only - no audio content detected")
|
| 545 |
|
| 546 |
# Prepare messages exactly as in sample code
|
| 547 |
msgs = [
|
|
|
|
| 553 |
params["use_image_id"] = False
|
| 554 |
params["max_slice_nums"] = 1 # Reduced for Spaces memory limits
|
| 555 |
|
| 556 |
+
print("🧠 Analyzing mobile creative ad with MiniCPM-o 2.6...")
|
| 557 |
|
| 558 |
# Clear GPU cache before inference
|
| 559 |
if torch.cuda.is_available():
|
|
|
|
| 600 |
frames_reduced = encode_video(video_file, max_num_frames=16)
|
| 601 |
|
| 602 |
if frames_reduced:
|
| 603 |
+
# Prepare reduced content for mobile ad analysis
|
| 604 |
content_reduced = frames_reduced.copy()
|
| 605 |
+
content_reduced.append(prompt) # Use the same comprehensive prompt
|
|
|
|
|
|
|
|
|
|
| 606 |
|
| 607 |
msgs_reduced = [
|
| 608 |
{'role': 'user', 'content': content_reduced},
|
|
|
|
| 626 |
except Exception as final_error:
|
| 627 |
print(f"❌ All fallback strategies failed: {final_error}")
|
| 628 |
|
| 629 |
+
# Provide helpful error message for mobile ad analysis
|
| 630 |
error_details = f"""
|
| 631 |
+
Shape mismatch error detected during mobile creative analysis. This can happen due to:
|
| 632 |
1. Unusual video resolution/aspect ratio
|
| 633 |
+
2. Video compression artifacts
|
| 634 |
3. Frame dimension inconsistencies
|
| 635 |
|
| 636 |
+
Suggested solutions for mobile creatives:
|
| 637 |
+
- Ensure video is in standard mobile format (9:16, 16:9, 1:1)
|
| 638 |
+
- Use common resolutions (1080x1920, 1920x1080, 1080x1080)
|
| 639 |
+
- Convert to MP4 with H.264 encoding
|
| 640 |
+
- Check if video is corrupted or has unusual codec
|
| 641 |
|
| 642 |
Technical details: {str(inference_error)}
|
| 643 |
"""
|
| 644 |
+
return f"❌ Mobile creative analysis failed after multiple attempts:\n{error_details}"
|
| 645 |
|
| 646 |
# Try to clear cache and retry once for other errors
|
| 647 |
if torch.cuda.is_available():
|
|
|
|
| 653 |
# Check which attention implementation was actually used
|
| 654 |
attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
|
| 655 |
|
| 656 |
+
# Prepare analysis type info for mobile ad focus
|
| 657 |
if audio_data is not None:
|
| 658 |
+
analysis_type = f"Comprehensive Mobile Ad Analysis (Visual + Audio)"
|
| 659 |
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio Duration:** {len(audio_data)/sample_rate:.1f} seconds \n**Sample Rate:** {sample_rate} Hz"
|
| 660 |
else:
|
| 661 |
+
analysis_type = "Mobile Ad Analysis (Visual Only)"
|
| 662 |
+
media_info = f"**Frames Analyzed:** {len(frames)} \n**Audio:** Not detected in creative"
|
| 663 |
|
| 664 |
+
result = f"""## 📱 Mobile Creative Ad Analysis Results
|
| 665 |
|
| 666 |
**Processing Time:** {processing_time:.2f} seconds
|
| 667 |
{media_info}
|
| 668 |
+
**Model:** MiniCPM-o 2.6 Multimodal AI
|
| 669 |
**Attention:** {attention_type}
|
| 670 |
**Analysis Type:** {analysis_type}
|
| 671 |
|
| 672 |
+
---
|
| 673 |
+
|
| 674 |
+
### 🎯 Creative Analysis Report:
|
| 675 |
{answer}
|
| 676 |
|
| 677 |
---
|
| 678 |
+
*Powered by MiniCPM-o 2.6 - Professional Mobile Creative Analysis on Hugging Face Spaces*
|
| 679 |
"""
|
| 680 |
|
| 681 |
return result
|
|
|
|
| 709 |
|
| 710 |
# Create Gradio interface
|
| 711 |
def create_interface():
|
| 712 |
+
"""Create the mobile creative ad analysis interface"""
|
| 713 |
|
| 714 |
with gr.Blocks(
|
| 715 |
+
title="Mobile Creative Ad Analyzer - MiniCPM-o 2.6",
|
| 716 |
theme=gr.themes.Soft(),
|
| 717 |
css="""
|
| 718 |
.gradio-container {
|
| 719 |
+
max-width: 1000px !important;
|
| 720 |
}
|
| 721 |
.example-prompt {
|
| 722 |
cursor: pointer;
|
|
|
|
| 729 |
.example-prompt:hover {
|
| 730 |
background: #e0e0e0;
|
| 731 |
}
|
| 732 |
+
.advanced-section {
|
| 733 |
+
background: #f8f9fa;
|
| 734 |
+
padding: 15px;
|
| 735 |
+
border-radius: 8px;
|
| 736 |
+
margin-top: 10px;
|
| 737 |
+
}
|
| 738 |
"""
|
| 739 |
) as demo:
|
| 740 |
|
| 741 |
gr.Markdown("""
|
| 742 |
+
# 📱 Mobile Creative Ad Analyzer
|
| 743 |
+
### Powered by MiniCPM-o 2.6 Multimodal AI
|
| 744 |
|
| 745 |
+
**Professional mobile advertising creative analysis** with comprehensive insights across 12 key dimensions including hook analysis, audiovisual sync, mobile optimization, and conversion elements.
|
| 746 |
|
| 747 |
+
**🎯 Analysis includes:**
|
| 748 |
+
- **Hook Analysis** (0-5 second critical window)
|
| 749 |
+
- **Mobile-First Design** optimization assessment
|
| 750 |
+
- **Audiovisual Synchronization** evaluation
|
| 751 |
+
- **Target Audience** demographic analysis
|
| 752 |
+
- **Conversion Elements** and performance optimization
|
| 753 |
+
- **Platform-Specific** recommendations (iOS/Android/Social)
|
|
|
|
| 754 |
|
| 755 |
+
**📱 Optimized for:** Mobile game ads, app install campaigns, social media creatives, programmatic display ads
|
|
|
|
|
|
|
| 756 |
|
| 757 |
+
**🎵 Supports:** Video + Audio analysis for complete creative assessment
|
| 758 |
""")
|
| 759 |
|
| 760 |
with gr.Row():
|
| 761 |
+
with gr.Column(scale=2):
|
| 762 |
+
# Video input - main focus
|
| 763 |
video_input = gr.Video(
|
| 764 |
+
label="📱 Upload Mobile Creative Ad",
|
| 765 |
+
elem_id="video_input",
|
| 766 |
+
height=400
|
| 767 |
)
|
| 768 |
|
| 769 |
+
# Main analyze button
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
analyze_btn = gr.Button(
|
| 771 |
+
"🚀 Analyze Mobile Creative",
|
| 772 |
variant="primary",
|
| 773 |
+
size="lg",
|
| 774 |
+
scale=2
|
| 775 |
)
|
| 776 |
|
| 777 |
+
# Advanced Settings Section
|
| 778 |
+
with gr.Accordion("⚙️ Advanced Settings", open=False):
|
| 779 |
+
with gr.Group(elem_classes=["advanced-section"]):
|
| 780 |
+
gr.Markdown("### 🔧 Processing Options")
|
| 781 |
+
|
| 782 |
+
# Max frames slider (moved from main interface)
|
| 783 |
+
max_frames_slider = gr.Slider(
|
| 784 |
+
minimum=8,
|
| 785 |
+
maximum=64,
|
| 786 |
+
value=32,
|
| 787 |
+
step=8,
|
| 788 |
+
label="🎞️ Max Frames to Analyze",
|
| 789 |
+
info="More frames = more detail but slower processing. 32 is optimal for most mobile ads."
|
| 790 |
+
)
|
| 791 |
+
|
| 792 |
+
# Custom prompt option (for advanced users)
|
| 793 |
+
with gr.Accordion("📝 Custom Analysis Prompt (Advanced)", open=False):
|
| 794 |
+
custom_prompt_input = gr.Textbox(
|
| 795 |
+
label="Custom Analysis Prompt",
|
| 796 |
+
placeholder="Enter custom analysis instructions (leave empty to use comprehensive mobile ad analysis)",
|
| 797 |
+
lines=3,
|
| 798 |
+
value=""
|
| 799 |
+
)
|
| 800 |
+
gr.Markdown("*Leave empty to use the comprehensive 12-point mobile creative analysis prompt*")
|
| 801 |
+
|
| 802 |
+
# Example prompts (moved from main interface)
|
| 803 |
+
with gr.Accordion("💡 Alternative Analysis Prompts", open=False):
|
| 804 |
+
gr.Markdown("### Example Analysis Focuses:")
|
| 805 |
+
example_prompts = get_example_prompts()
|
| 806 |
|
| 807 |
+
with gr.Row():
|
| 808 |
+
for i in range(0, min(6, len(example_prompts)), 2):
|
| 809 |
+
with gr.Column():
|
| 810 |
+
if i < len(example_prompts):
|
| 811 |
+
ex_btn1 = gr.Button(
|
| 812 |
+
example_prompts[i][:50] + "..." if len(example_prompts[i]) > 50 else example_prompts[i],
|
| 813 |
+
size="sm",
|
| 814 |
+
elem_classes=["example-prompt"]
|
| 815 |
+
)
|
| 816 |
+
ex_btn1.click(
|
| 817 |
+
lambda x=example_prompts[i]: x,
|
| 818 |
+
outputs=custom_prompt_input
|
| 819 |
+
)
|
| 820 |
+
|
| 821 |
+
if i + 1 < len(example_prompts):
|
| 822 |
+
ex_btn2 = gr.Button(
|
| 823 |
+
example_prompts[i + 1][:50] + "..." if len(example_prompts[i + 1]) > 50 else example_prompts[i + 1],
|
| 824 |
+
size="sm",
|
| 825 |
+
elem_classes=["example-prompt"]
|
| 826 |
+
)
|
| 827 |
+
ex_btn2.click(
|
| 828 |
+
lambda x=example_prompts[i + 1]: x,
|
| 829 |
+
outputs=custom_prompt_input
|
| 830 |
+
)
|
| 831 |
|
| 832 |
+
with gr.Column(scale=3):
|
| 833 |
+
# Results output - larger space for comprehensive analysis
|
| 834 |
output_text = gr.Markdown(
|
| 835 |
label="📊 Analysis Results",
|
| 836 |
+
value="""### 🎯 Mobile Creative Analysis Ready!
|
| 837 |
+
|
| 838 |
+
Upload your mobile advertising creative (video ad) and click **"Analyze Mobile Creative"** to receive a comprehensive professional analysis covering:
|
| 839 |
+
|
| 840 |
+
✅ **Hook effectiveness** (critical first 5 seconds)
|
| 841 |
+
✅ **Mobile optimization** for small screens
|
| 842 |
+
✅ **Audio-visual impact** and synchronization
|
| 843 |
+
✅ **Target audience appeal** assessment
|
| 844 |
+
✅ **Conversion optimization** recommendations
|
| 845 |
+
✅ **Platform-specific** insights (iOS/Android/Social)
|
| 846 |
+
|
| 847 |
+
**Supports:** MP4, AVI, MOV, WebM formats with automatic audio extraction for complete analysis.
|
| 848 |
+
""",
|
| 849 |
elem_id="output"
|
| 850 |
)
|
| 851 |
|
| 852 |
+
# Modified event handler to use custom prompt if provided
|
| 853 |
+
def analyze_with_options(video_file, max_frames, custom_prompt):
|
| 854 |
+
if custom_prompt and custom_prompt.strip():
|
| 855 |
+
# Use custom prompt if provided
|
| 856 |
+
return analyze_video_with_custom_prompt(video_file, custom_prompt.strip(), max_frames)
|
| 857 |
+
else:
|
| 858 |
+
# Use default comprehensive mobile ad analysis
|
| 859 |
+
return analyze_video(video_file, max_frames)
|
| 860 |
+
|
| 861 |
# Event handlers
|
| 862 |
analyze_btn.click(
|
| 863 |
+
fn=analyze_with_options,
|
| 864 |
+
inputs=[video_input, max_frames_slider, custom_prompt_input],
|
| 865 |
outputs=output_text,
|
| 866 |
show_progress=True
|
| 867 |
)
|
|
|
|
| 869 |
# Footer
|
| 870 |
gr.Markdown("""
|
| 871 |
---
|
| 872 |
+
### ℹ️ About Mobile Creative Ad Analyzer
|
| 873 |
+
This professional mobile advertising analysis tool uses **MiniCPM-o 2.6**, a state-of-the-art multimodal AI model for comprehensive creative assessment.
|
| 874 |
+
|
| 875 |
+
- **Model:** [openbmb/MiniCPM-o-2_6](https://huggingface.co/openbmb/MiniCPM-o-2_6) - Advanced multimodal AI
|
| 876 |
+
- **Analysis Framework:** 12-point comprehensive mobile creative evaluation
|
| 877 |
+
- **Capabilities:** Visual analysis + Audio processing + Performance optimization insights
|
| 878 |
+
- **Optimization:** Flash Attention 2 / SDPA for maximum processing efficiency
|
| 879 |
|
| 880 |
+
**🎯 Perfect for:** Mobile game ads, app install campaigns, social media creatives, programmatic display, influencer content, and any mobile-first advertising creative.
|
|
|
|
|
|
|
|
|
|
| 881 |
|
| 882 |
+
**📊 Analysis Dimensions:** Hook effectiveness, mobile optimization, target audience appeal, audiovisual sync, conversion elements, platform compliance, competitive positioning, and actionable optimization recommendations.
|
| 883 |
""")
|
| 884 |
|
| 885 |
return demo
|