commit
Browse files- app.py +13 -5
- requirements.txt +6 -26
app.py
CHANGED
|
@@ -69,9 +69,14 @@ def load_model():
|
|
| 69 |
|
| 70 |
print(f"Loading on device: {device} with dtype: {torch_dtype}")
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
# Load model with memory optimization for Spaces
|
| 77 |
model = AutoModel.from_pretrained(
|
|
@@ -142,12 +147,15 @@ def analyze_video(video_file, prompt, max_frames):
|
|
| 142 |
|
| 143 |
processing_time = time.time() - start_time
|
| 144 |
|
|
|
|
|
|
|
|
|
|
| 145 |
result = f"""## 🎬 Video Analysis Results
|
| 146 |
|
| 147 |
**Processing Time:** {processing_time:.2f} seconds
|
| 148 |
**Frames Analyzed:** {len(frames)}
|
| 149 |
**Model:** MiniCPM-o 2.6
|
| 150 |
-
**Attention:**
|
| 151 |
|
| 152 |
### Analysis:
|
| 153 |
{answer}
|
|
@@ -213,7 +221,7 @@ def create_interface():
|
|
| 213 |
- 🖼️ Frame-by-frame understanding
|
| 214 |
- 📝 Detailed descriptions
|
| 215 |
- 🎨 Creative and marketing insights
|
| 216 |
-
-
|
| 217 |
|
| 218 |
**Supported formats:** MP4, AVI, MOV, WebM
|
| 219 |
""")
|
|
|
|
| 69 |
|
| 70 |
print(f"Loading on device: {device} with dtype: {torch_dtype}")
|
| 71 |
|
| 72 |
+
# Try to use Flash Attention 2 if available, fallback to SDPA
|
| 73 |
+
try:
|
| 74 |
+
import flash_attn
|
| 75 |
+
attn_implementation = 'flash_attention_2'
|
| 76 |
+
print("⚡ Flash Attention 2 detected - using optimized attention kernels")
|
| 77 |
+
except ImportError:
|
| 78 |
+
attn_implementation = 'sdpa'
|
| 79 |
+
print("🚀 Using SDPA (Scaled Dot Product Attention) for optimal compatibility and performance")
|
| 80 |
|
| 81 |
# Load model with memory optimization for Spaces
|
| 82 |
model = AutoModel.from_pretrained(
|
|
|
|
| 147 |
|
| 148 |
processing_time = time.time() - start_time
|
| 149 |
|
| 150 |
+
# Check which attention implementation was actually used
|
| 151 |
+
attention_type = "Flash Attention 2 (Optimized)" if hasattr(model.config, 'attn_implementation') and model.config.attn_implementation == 'flash_attention_2' else "SDPA (Optimized)"
|
| 152 |
+
|
| 153 |
result = f"""## 🎬 Video Analysis Results
|
| 154 |
|
| 155 |
**Processing Time:** {processing_time:.2f} seconds
|
| 156 |
**Frames Analyzed:** {len(frames)}
|
| 157 |
**Model:** MiniCPM-o 2.6
|
| 158 |
+
**Attention:** {attention_type}
|
| 159 |
|
| 160 |
### Analysis:
|
| 161 |
{answer}
|
|
|
|
| 221 |
- 🖼️ Frame-by-frame understanding
|
| 222 |
- 📝 Detailed descriptions
|
| 223 |
- 🎨 Creative and marketing insights
|
| 224 |
+
- ⚡ Flash Attention 2 optimized for maximum performance
|
| 225 |
|
| 226 |
**Supported formats:** MP4, AVI, MOV, WebM
|
| 227 |
""")
|
requirements.txt
CHANGED
|
@@ -1,32 +1,12 @@
|
|
| 1 |
-
# Core PyTorch dependencies - install first
|
| 2 |
torch==2.3.1
|
| 3 |
-
torchaudio==2.3.1
|
| 4 |
-
torchvision==0.18.1
|
| 5 |
-
|
| 6 |
-
# Core ML libraries
|
| 7 |
transformers==4.44.2
|
| 8 |
-
accelerate==1.2.1
|
| 9 |
-
numpy>=1.24.0
|
| 10 |
-
einops>=0.8.0
|
| 11 |
-
einx>=0.3.0
|
| 12 |
-
|
| 13 |
-
# Gradio and Spaces
|
| 14 |
gradio==5.35.0
|
| 15 |
spaces==0.37.1
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
decord
|
| 20 |
moviepy
|
| 21 |
-
|
| 22 |
-
# Audio processing
|
| 23 |
-
soundfile==0.12.1
|
| 24 |
-
librosa==0.9.0
|
| 25 |
-
|
| 26 |
-
# Additional ML dependencies
|
| 27 |
-
sentencepiece==0.2.0
|
| 28 |
-
vector-quantize-pytorch==1.18.5
|
| 29 |
-
vocos==0.1.0
|
| 30 |
-
timm==0.9.10
|
| 31 |
-
huggingface_hub>=0.33.2
|
| 32 |
-
protobuf==3.20.3
|
|
|
|
|
|
|
| 1 |
torch==2.3.1
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
transformers==4.44.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
gradio==5.35.0
|
| 4 |
spaces==0.37.1
|
| 5 |
+
vllm-flash-attn==2.6.2
|
| 6 |
+
accelerate
|
| 7 |
+
opencv-python-headless
|
| 8 |
+
pillow
|
| 9 |
+
numpy
|
| 10 |
decord
|
| 11 |
moviepy
|
| 12 |
+
ffmpeg-python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|