Spaces:

TencentARC
/

ToonComposer

Running on Zero

App Files Files Community

ToonComposer / util /optical_flow.py

l-li

update requirements.

00274d1 3 months ago

raw

history blame

5.73 kB

	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F
	from torchvision.models.optical_flow import Raft_Large_Weights, raft_large
	from typing import List, Tuple, Dict
	import argparse
	from pathlib import Path
	from sklearn.cluster import KMeans
	from tqdm import tqdm
	import os

	os.environ['OPENBLAS_NUM_THREADS'] = '64'

	class OpticalFlowAnalyzer:
	def __init__(self, device: str = 'cuda' if torch.cuda.is_available() else 'cpu'):
	self.device = device
	self.model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device)
	self.model.eval()

	def preprocess_frame(self, frame: np.ndarray) -> torch.Tensor:
	"""Preprocess a frame for RAFT model."""
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frame = torch.from_numpy(frame).permute(2, 0, 1).float()
	frame = frame.unsqueeze(0) / 255.0
	return frame.to(self.device)

	def compute_optical_flow(self, frame1: np.ndarray, frame2: np.ndarray) -> np.ndarray:
	"""Compute optical flow between two consecutive frames."""
	with torch.no_grad():
	frame1_tensor = self.preprocess_frame(frame1)
	frame2_tensor = self.preprocess_frame(frame2)

	flow = self.model(frame1_tensor, frame2_tensor)[-1]
	flow = flow[0].permute(1, 2, 0).cpu().numpy()

	return flow

	def analyze_motion_regions(self, flow: np.ndarray, num_clusters: int = 3) -> Tuple[np.ndarray, Dict]:
	"""Cluster motion regions based on optical flow magnitude and direction."""
	h, w = flow.shape[:2]
	magnitude = np.sqrt(flow[..., 0]2 + flow[..., 1]2)
	direction = np.arctan2(flow[..., 1], flow[..., 0])

	# Create feature matrix for clustering
	features = np.zeros((h * w, 3))
	features[:, 0] = magnitude.ravel()
	features[:, 1] = np.cos(direction).ravel()
	features[:, 2] = np.sin(direction).ravel()

	# Normalize features
	features = (features - features.mean(axis=0)) / features.std(axis=0)

	# Perform clustering
	kmeans = KMeans(n_clusters=num_clusters, random_state=42,)
	labels = kmeans.fit_predict(features)
	labels = labels.reshape(h, w)

	# Analyze clusters
	cluster_stats = {}
	for i in range(num_clusters):
	cluster_mask = (labels == i)
	cluster_magnitude = magnitude[cluster_mask]
	cluster_stats[i] = {
	'mean_magnitude': np.mean(cluster_magnitude),
	'std_magnitude': np.std(cluster_magnitude),
	'pixel_count': np.sum(cluster_mask),
	'is_static': np.mean(cluster_magnitude) < 0.1 # Threshold for static regions
	}

	return labels, cluster_stats

	def process_video(self, video_path: str, output_path: str = None) -> List[Tuple[np.ndarray, Dict]]:
	"""Process a video and return motion analysis results for each frame pair."""
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	raise ValueError(f"Could not open video: {video_path}")

	results = []
	ret, prev_frame = cap.read()
	if not ret:
	raise ValueError("Could not read first frame")

	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	pbar = tqdm(total=total_frames-1, desc="Processing video")

	while True:
	ret, curr_frame = cap.read()
	if not ret:
	break

	flow = self.compute_optical_flow(prev_frame, curr_frame)
	labels, stats = self.analyze_motion_regions(flow)

	if output_path:
	# Visualize results
	vis_frame = curr_frame.copy()
	for i, stat in stats.items():
	if not stat['is_static']:
	mask = (labels == i).astype(np.uint8) * 255
	print("mask:",mask.shape)
	print("vis_frame:",vis_frame.shape)
	mask = np.expand_dims(mask, axis=-1).repeat(3, axis=-1)
	print("mask:",mask.shape)

	vis_frame[mask > 0] = cv2.addWeighted(vis_frame[mask > 0], 0.7, 255, 0.3, 0)

	cv2.imwrite(f"{output_path}/frame_{len(results):04d}.jpg", vis_frame)

	results.append((labels, stats))
	prev_frame = curr_frame
	pbar.update(1)

	cap.release()
	pbar.close()
	return results

	def main():
	parser = argparse.ArgumentParser(description='Analyze motion regions in a video using RAFT optical flow')
	parser.add_argument('--video', type=str, required=True, help='Path to input video')
	parser.add_argument('--output', type=str, help='Path to output directory for visualization')
	parser.add_argument('--clusters', type=int, default=3, help='Number of motion clusters')
	args = parser.parse_args()

	analyzer = OpticalFlowAnalyzer()
	results = analyzer.process_video(args.video, args.output)

	# Print summary statistics
	print("\nMotion Analysis Summary:")
	for i, (_, stats) in enumerate(results):
	print(f"\nFrame {i+1}:")
	for cluster_id, stat in stats.items():
	motion_type = "Static" if stat['is_static'] else "Moving"
	print(f" Cluster {cluster_id} ({motion_type}):")
	print(f" Mean magnitude: {stat['mean_magnitude']:.4f}")
	print(f" Pixel count: {stat['pixel_count']}")

	if __name__ == "__main__":
	main()