chakkale commited on
Commit
0768376
·
1 Parent(s): b4de84b
Files changed (2) hide show
  1. app.py +13 -5
  2. requirements.txt +6 -26
app.py CHANGED
@@ -69,9 +69,14 @@ def load_model():
69
 
70
  print(f"Loading on device: {device} with dtype: {torch_dtype}")
71
 
72
- # Use SDPA attention which provides excellent performance without compilation issues
73
- attn_implementation = 'sdpa'
74
- print("🚀 Using SDPA (Scaled Dot Product Attention) for optimal compatibility and performance")
 
 
 
 
 
75
 
76
  # Load model with memory optimization for Spaces
77
  model = AutoModel.from_pretrained(
@@ -142,12 +147,15 @@ def analyze_video(video_file, prompt, max_frames):
142
 
143
  processing_time = time.time() - start_time
144
 
 
 
 
145
  result = f"""## 🎬 Video Analysis Results
146
 
147
  **Processing Time:** {processing_time:.2f} seconds
148
  **Frames Analyzed:** {len(frames)}
149
  **Model:** MiniCPM-o 2.6
150
- **Attention:** SDPA (Optimized)
151
 
152
  ### Analysis:
153
  {answer}
@@ -213,7 +221,7 @@ def create_interface():
213
  - 🖼️ Frame-by-frame understanding
214
  - 📝 Detailed descriptions
215
  - 🎨 Creative and marketing insights
216
- - 🚀 SDPA optimized for reliable performance
217
 
218
  **Supported formats:** MP4, AVI, MOV, WebM
219
  """)
 
69
 
70
  print(f"Loading on device: {device} with dtype: {torch_dtype}")
71
 
72
+ # Try to use Flash Attention 2 if available, fallback to SDPA
73
+ try:
74
+ import flash_attn
75
+ attn_implementation = 'flash_attention_2'
76
+ print("⚡ Flash Attention 2 detected - using optimized attention kernels")
77
+ except ImportError:
78
+ attn_implementation = 'sdpa'
79
+ print("🚀 Using SDPA (Scaled Dot Product Attention) for optimal compatibility and performance")
80
 
81
  # Load model with memory optimization for Spaces
82
  model = AutoModel.from_pretrained(
 
147
 
148
  processing_time = time.time() - start_time
149
 
150
+ # Check which attention implementation was actually used
151
+ attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
152
+
153
  result = f"""## 🎬 Video Analysis Results
154
 
155
  **Processing Time:** {processing_time:.2f} seconds
156
  **Frames Analyzed:** {len(frames)}
157
  **Model:** MiniCPM-o 2.6
158
+ **Attention:** {attention_type}
159
 
160
  ### Analysis:
161
  {answer}
 
221
  - 🖼️ Frame-by-frame understanding
222
  - 📝 Detailed descriptions
223
  - 🎨 Creative and marketing insights
224
+ - Flash Attention 2 optimized for maximum performance
225
 
226
  **Supported formats:** MP4, AVI, MOV, WebM
227
  """)
requirements.txt CHANGED
@@ -1,32 +1,12 @@
1
- # Core PyTorch dependencies - install first
2
  torch==2.3.1
3
- torchaudio==2.3.1
4
- torchvision==0.18.1
5
-
6
- # Core ML libraries
7
  transformers==4.44.2
8
- accelerate==1.2.1
9
- numpy>=1.24.0
10
- einops>=0.8.0
11
- einx>=0.3.0
12
-
13
- # Gradio and Spaces
14
  gradio==5.35.0
15
  spaces==0.37.1
16
-
17
- # Image and video processing
18
- Pillow==10.1.0
 
 
19
  decord
20
  moviepy
21
-
22
- # Audio processing
23
- soundfile==0.12.1
24
- librosa==0.9.0
25
-
26
- # Additional ML dependencies
27
- sentencepiece==0.2.0
28
- vector-quantize-pytorch==1.18.5
29
- vocos==0.1.0
30
- timm==0.9.10
31
- huggingface_hub>=0.33.2
32
- protobuf==3.20.3
 
 
1
  torch==2.3.1
 
 
 
 
2
  transformers==4.44.2
 
 
 
 
 
 
3
  gradio==5.35.0
4
  spaces==0.37.1
5
+ vllm-flash-attn==2.6.2
6
+ accelerate
7
+ opencv-python-headless
8
+ pillow
9
+ numpy
10
  decord
11
  moviepy
12
+ ffmpeg-python