Spaces:

chakkale
/

minicpm-video-analyzer

Paused

chakkale commited on Jul 9

Commit

0768376

1 Parent(s): b4de84b

commit

Files changed (2) hide show

app.py CHANGED Viewed

@@ -69,9 +69,14 @@ def load_model():
         print(f"Loading on device: {device} with dtype: {torch_dtype}")
-        # Use SDPA attention which provides excellent performance without compilation issues
-        attn_implementation = 'sdpa'
-        print("🚀 Using SDPA (Scaled Dot Product Attention) for optimal compatibility and performance")
         # Load model with memory optimization for Spaces
         model = AutoModel.from_pretrained(
@@ -142,12 +147,15 @@ def analyze_video(video_file, prompt, max_frames):
         processing_time = time.time() - start_time
         result = f"""## 🎬 Video Analysis Results
 **Processing Time:** {processing_time:.2f} seconds
 **Frames Analyzed:** {len(frames)}
 **Model:** MiniCPM-o 2.6
-**Attention:** SDPA (Optimized)
 ### Analysis:
 {answer}
@@ -213,7 +221,7 @@ def create_interface():
         - 🖼️ Frame-by-frame understanding
         - 📝 Detailed descriptions
         - 🎨 Creative and marketing insights
-        - 🚀 SDPA optimized for reliable performance
         **Supported formats:** MP4, AVI, MOV, WebM
         """)

         print(f"Loading on device: {device} with dtype: {torch_dtype}")
+        # Try to use Flash Attention 2 if available, fallback to SDPA
+        try:
+            import flash_attn
+            attn_implementation = 'flash_attention_2'
+            print("⚡ Flash Attention 2 detected - using optimized attention kernels")
+        except ImportError:
+            attn_implementation = 'sdpa'
+            print("🚀 Using SDPA (Scaled Dot Product Attention) for optimal compatibility and performance")
         # Load model with memory optimization for Spaces
         model = AutoModel.from_pretrained(
         processing_time = time.time() - start_time
+        # Check which attention implementation was actually used
+        attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
         result = f"""## 🎬 Video Analysis Results
 **Processing Time:** {processing_time:.2f} seconds
 **Frames Analyzed:** {len(frames)}
 **Model:** MiniCPM-o 2.6
+**Attention:** {attention_type}
 ### Analysis:
 {answer}
         - 🖼️ Frame-by-frame understanding
         - 📝 Detailed descriptions
         - 🎨 Creative and marketing insights
+        - ⚡ Flash Attention 2 optimized for maximum performance
         **Supported formats:** MP4, AVI, MOV, WebM
         """)

requirements.txt CHANGED Viewed

@@ -1,32 +1,12 @@
-# Core PyTorch dependencies - install first
 torch==2.3.1
-torchaudio==2.3.1
-torchvision==0.18.1
-# Core ML libraries
 transformers==4.44.2
-accelerate==1.2.1
-numpy>=1.24.0
-einops>=0.8.0
-einx>=0.3.0
-# Gradio and Spaces
 gradio==5.35.0
 spaces==0.37.1
-# Image and video processing
-Pillow==10.1.0
 decord
 moviepy
-# Audio processing
-soundfile==0.12.1
-librosa==0.9.0
-# Additional ML dependencies
-sentencepiece==0.2.0
-vector-quantize-pytorch==1.18.5
-vocos==0.1.0
-timm==0.9.10
-huggingface_hub>=0.33.2
-protobuf==3.20.3

 torch==2.3.1
 transformers==4.44.2
 gradio==5.35.0
 spaces==0.37.1
+vllm-flash-attn==2.6.2
+accelerate
+opencv-python-headless
+pillow
+numpy
 decord
 moviepy
+ffmpeg-python