Spaces:

TencentARC
/

ToonComposer

Running on Zero

App Files Files Community

l-li commited on Aug 17

Commit

843418d

1 Parent(s): 556a5c6

update(*): debug update.

Browse files

Files changed (3) hide show

app.py +1 -0
util/env_resolver.py +16 -0
util/optical_flow.py +0 -140

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import spaces
 import torch
 import numpy as np

+import util.env_resolver
 import spaces
 import torch
 import numpy as np

util/env_resolver.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import subprocess
+import sys
+def install_flash_attn():
+    try:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", "/home/user/app/wheels/flash_attn-2.8.2+cu129torch2.8-cp310-cp310-linux_x86_64.whl", "--no-build-isolation"])
+        print(f"Successfully installed flash_attn")
+    except subprocess.CalledProcessError as e:
+        print(f"Error installing flash_attn: {e}. This demo won't work properly.")
+try:
+    import flash_attn
+    print(f"`flash_attn` has been installed.")
+except ImportError:
+    print(f"`flash_attn` is NOT installed. Trying to install from local wheel...")
+    install_flash_attn()

util/optical_flow.py DELETED Viewed

@@ -1,140 +0,0 @@
-import cv2
-import numpy as np
-import torch
-import torch.nn.functional as F
-from torchvision.models.optical_flow import Raft_Large_Weights, raft_large
-from typing import List, Tuple, Dict
-import argparse
-from pathlib import Path
-from sklearn.cluster import KMeans
-from tqdm import tqdm
-import os
-os.environ['OPENBLAS_NUM_THREADS'] = '64'
-class OpticalFlowAnalyzer:
-    def __init__(self, device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
-        self.device = device
-        self.model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
-        self.model.eval()
-    def preprocess_frame(self, frame: np.ndarray) -> torch.Tensor:
-        """Preprocess a frame for RAFT model."""
-        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        frame = torch.from_numpy(frame).permute(2, 0, 1).float()
-        frame = frame.unsqueeze(0) / 255.0
-        return frame.to(self.device)
-    def compute_optical_flow(self, frame1: np.ndarray, frame2: np.ndarray) -> np.ndarray:
-        """Compute optical flow between two consecutive frames."""
-        with torch.no_grad():
-            frame1_tensor = self.preprocess_frame(frame1)
-            frame2_tensor = self.preprocess_frame(frame2)
-            flow = self.model(frame1_tensor, frame2_tensor)[-1]
-            flow = flow[0].permute(1, 2, 0).cpu().numpy()
-        return flow
-    def analyze_motion_regions(self, flow: np.ndarray, num_clusters: int = 3) -> Tuple[np.ndarray, Dict]:
-        """Cluster motion regions based on optical flow magnitude and direction."""
-        h, w = flow.shape[:2]
-        magnitude = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)
-        direction = np.arctan2(flow[..., 1], flow[..., 0])
-        # Create feature matrix for clustering
-        features = np.zeros((h * w, 3))
-        features[:, 0] = magnitude.ravel()
-        features[:, 1] = np.cos(direction).ravel()
-        features[:, 2] = np.sin(direction).ravel()
-        # Normalize features
-        features = (features - features.mean(axis=0)) / features.std(axis=0)
-        # Perform clustering
-        kmeans = KMeans(n_clusters=num_clusters, random_state=42,)
-        labels = kmeans.fit_predict(features)
-        labels = labels.reshape(h, w)
-        # Analyze clusters
-        cluster_stats = {}
-        for i in range(num_clusters):
-            cluster_mask = (labels == i)
-            cluster_magnitude = magnitude[cluster_mask]
-            cluster_stats[i] = {
-                'mean_magnitude': np.mean(cluster_magnitude),
-                'std_magnitude': np.std(cluster_magnitude),
-                'pixel_count': np.sum(cluster_mask),
-                'is_static': np.mean(cluster_magnitude) < 0.1  # Threshold for static regions
-            }
-        return labels, cluster_stats
-    def process_video(self, video_path: str, output_path: str = None) -> List[Tuple[np.ndarray, Dict]]:
-        """Process a video and return motion analysis results for each frame pair."""
-        cap = cv2.VideoCapture(video_path)
-        if not cap.isOpened():
-            raise ValueError(f"Could not open video: {video_path}")
-        results = []
-        ret, prev_frame = cap.read()
-        if not ret:
-            raise ValueError("Could not read first frame")
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        pbar = tqdm(total=total_frames-1, desc="Processing video")
-        while True:
-            ret, curr_frame = cap.read()
-            if not ret:
-                break
-            flow = self.compute_optical_flow(prev_frame, curr_frame)
-            labels, stats = self.analyze_motion_regions(flow)
-            if output_path:
-                # Visualize results
-                vis_frame = curr_frame.copy()
-                for i, stat in stats.items():
-                    if not stat['is_static']:
-                        mask = (labels == i).astype(np.uint8) * 255
-                        print("mask:",mask.shape)
-                        print("vis_frame:",vis_frame.shape)
-                        mask = np.expand_dims(mask, axis=-1).repeat(3, axis=-1)
-                        print("mask:",mask.shape)
-                        vis_frame[mask > 0] = cv2.addWeighted(vis_frame[mask > 0], 0.7, 255, 0.3, 0)
-                cv2.imwrite(f"{output_path}/frame_{len(results):04d}.jpg", vis_frame)
-            results.append((labels, stats))
-            prev_frame = curr_frame
-            pbar.update(1)
-        cap.release()
-        pbar.close()
-        return results
-def main():
-    parser = argparse.ArgumentParser(description='Analyze motion regions in a video using RAFT optical flow')
-    parser.add_argument('--video', type=str, required=True, help='Path to input video')
-    parser.add_argument('--output', type=str, help='Path to output directory for visualization')
-    parser.add_argument('--clusters', type=int, default=3, help='Number of motion clusters')
-    args = parser.parse_args()
-    analyzer = OpticalFlowAnalyzer()
-    results = analyzer.process_video(args.video, args.output)
-    # Print summary statistics
-    print("\nMotion Analysis Summary:")
-    for i, (_, stats) in enumerate(results):
-        print(f"\nFrame {i+1}:")
-        for cluster_id, stat in stats.items():
-            motion_type = "Static" if stat['is_static'] else "Moving"
-            print(f"  Cluster {cluster_id} ({motion_type}):")
-            print(f"    Mean magnitude: {stat['mean_magnitude']:.4f}")
-            print(f"    Pixel count: {stat['pixel_count']}")
-if __name__ == "__main__":
-    main()