Spaces:

fffiloni
/

X-Portrait

Running on Zero

App Files Files Community

X-Portrait / app.py

fffiloni

Update app.py

9dabcb0 verified 5 months ago

raw

history blame contribute delete

9.79 kB

	import gradio as gr
	import subprocess
	import os
	import cv2
	from huggingface_hub import hf_hub_download
	import glob
	from moviepy.editor import VideoFileClip
	from datetime import datetime
	import time
	import spaces
	is_shared_ui = True if "fffiloni/X-Portrait" in os.environ['SPACE_ID'] else False

	# Ensure 'checkpoint' directory exists
	os.makedirs("checkpoint", exist_ok=True)

	hf_hub_download(
	repo_id="fffiloni/X-Portrait",
	filename="model_state-415001.th",
	local_dir="checkpoint"
	)

	def trim_video(video_path, output_dir="trimmed_videos", max_duration=2):
	# Create output directory if it does not exist
	os.makedirs(output_dir, exist_ok=True)

	# Generate a timestamp for the output filename
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	output_path = os.path.join(output_dir, f"trimmed_video_{timestamp}.mp4")

	# Load the video
	with VideoFileClip(video_path) as video:
	# Check the duration of the video
	if video.duration > max_duration:
	# Trim the video to the first max_duration seconds
	trimmed_video = video.subclip(0, max_duration)
	# Write the trimmed video to a file
	trimmed_video.write_videofile(output_path, codec="libx264")
	return output_path
	else:
	# If the video is within the duration, return the original path
	return video_path

	def load_driving_video(video_path):
	if is_shared_ui :
	video_path = trim_video(video_path)
	print("Path to the (trimmed) driving video:", video_path)
	frames_data = extract_frames_with_labels(video_path)
	return video_path, frames_data, gr.update(open="True")
	else:
	frames_data = extract_frames_with_labels(video_path)
	return video_path, frames_data, gr.update(open="True")

	def extract_frames_with_labels(video_path, base_output_dir="frames"):

	# Generate a timestamped folder name
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	output_dir = os.path.join(base_output_dir, f"frames_{timestamp}")

	# Ensure output directory exists
	os.makedirs(output_dir, exist_ok=True)

	# Open the video file
	video_capture = cv2.VideoCapture(video_path)
	if not video_capture.isOpened():
	raise ValueError(f"Cannot open video file: {video_path}")

	frame_data = []
	frame_index = 0

	# Loop through the video frames
	while True:
	ret, frame = video_capture.read()
	if not ret:
	break # Exit the loop if there are no frames left to read

	# Zero-padded frame index for filename and label
	frame_label = f"{frame_index:04}"
	frame_filename = os.path.join(output_dir, f"frame_{frame_label}.jpg")

	# Save the frame as a .jpg file
	cv2.imwrite(frame_filename, frame)

	# Append the tuple (filename, label) to the list
	frame_data.append((frame_filename, frame_label))

	# Increment frame index
	frame_index += 1

	# Release the video capture object
	video_capture.release()

	return frame_data

	# Define a function to run your script with selected inputs
	@spaces.GPU(duration=180)
	def run_xportrait(source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps, progress=gr.Progress(track_tqdm=True)):

	start_time = time.perf_counter()

	# Create a unique output directory name based on current date and time
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	output_dir = f"output_{timestamp}"
	os.makedirs(output_dir, exist_ok=True)

	model_config = "config/cldm_v15_appearance_pose_local_mm.yaml"
	resume_dir = "checkpoint/model_state-415001.th"

	# Construct the command
	command = [
	"python3", "core/test_xportrait.py",
	"--model_config", model_config,
	"--output_dir", output_dir,
	"--resume_dir", resume_dir,
	"--seed", str(seed),
	"--uc_scale", str(uc_scale),
	"--source_image", source_image,
	"--driving_video", driving_video,
	"--best_frame", str(best_frame),
	"--out_frames", str(out_frames),
	"--num_mix", str(num_mix),
	"--ddim_steps", str(ddim_steps)
	]

	# Run the command
	try:
	subprocess.run(command, check=True)

	# Find the generated video file in the output directory
	video_files = glob.glob(os.path.join(output_dir, "*.mp4"))
	print(video_files)
	if video_files:
	final_vid = convert_video_to_h264_aac(video_files[0])

	return f"Output video saved at: {final_vid}", final_vid
	else:
	return "No video file was found in the output directory.", None
	except subprocess.CalledProcessError as e:
	return f"An error occurred: {e}", None
	finally:
	end_time = time.perf_counter()
	elapsed = end_time - start_time
	print(f"[LOG] Execution time: {elapsed:.4f} seconds")

	def convert_video_to_h264_aac(video_path):
	# Get the directory and original filename
	original_dir = os.path.dirname(video_path)
	original_name, _ = os.path.splitext(os.path.basename(video_path))

	# Define the output path in the same directory
	output_path = os.path.join(original_dir, f"{original_name}_converted.mp4")

	# Load the video
	with VideoFileClip(video_path) as video:
	# Write the video with H.264 and AAC codecs
	video.write_videofile(
	output_path,
	codec="libx264", # H.264 video codec
	audio_codec="aac", # AAC audio codec
	temp_audiofile="temp-audio.m4a", # Temporary audio file (moviepy requirement)
	remove_temp=True # Remove temporary files after writing
	)

	return output_path

	# Set up Gradio interface
	css="""
	div#frames-gallery{
	overflow: scroll!important;
	}
	"""

	example_frame_data = extract_frames_with_labels("./assets/driving_video.mp4")
	with gr.Blocks(css=css) as demo:

	with gr.Column(elem_id="col-container"):
	gr.Markdown("# X-Portrait: Expressive Portrait Animation with Hierarchical Motion Attention")
	gr.Markdown("On this shared UI, driving video input will be trimmed to 2 seconds max. Duplicate this space for more controls.")
	gr.HTML("""
	<div style="display:flex;column-gap:4px;">
	<a href='https://github.com/bytedance/X-Portrait'>
	<img src='https://img.shields.io/badge/GitHub-Repo-blue'>
	</a>
	<a href='https://byteaigc.github.io/x-portrait/'>
	<img src='https://img.shields.io/badge/Project-Page-green'>
	</a>
	</div>
	""")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	source_image = gr.Image(label="Source Image", type="filepath")
	driving_video = gr.Video(label="Driving Video")
	with gr.Group():
	with gr.Row():
	best_frame = gr.Number(value=18, label="Best Frame", info="specify the frame index in the driving video where the head pose best matches the source image (note: precision of best_frame index might affect the final quality)")
	out_frames = gr.Number(value=-1, label="Out Frames", info="number of generation frames")
	with gr.Accordion("Driving video Frames", open=False) as frames_gallery_panel:
	driving_frames = gr.Gallery(show_label=True, columns=6, height=380, elem_id="frames-gallery")
	with gr.Row():
	seed = gr.Number(value=999, label="Seed")
	uc_scale = gr.Number(value=5, label="UC Scale")
	with gr.Row():
	num_mix = gr.Number(value=4, label="Number of Mix")
	ddim_steps = gr.Number(value=30, label="DDIM Steps")
	submit_btn = gr.Button("Submit")

	with gr.Column():
	video_output = gr.Video(label="Output Video")
	status = gr.Textbox(label="status")
	gr.Examples(
	examples=[
	["./assets/source_image.png", "./assets/driving_video.mp4", "./assets/inference_result.mp4"]
	],
	inputs=[source_image, driving_video, video_output]
	)

	gr.HTML("""
	<div style="display:flex;column-gap:4px;">
	<a href="https://huggingface.co/spaces/fffiloni/X-Portrait?duplicate=true">
	<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-xl.svg" alt="Duplicate this Space">
	</a>
	<a href="https://huggingface.co/fffiloni">
	<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-xl-dark.svg" alt="Follow me on HF">
	</a>
	</div>
	""")


	driving_video.upload(
	fn = load_driving_video,
	inputs = [driving_video],
	outputs = [driving_video, driving_frames, frames_gallery_panel],
	queue = False
	)

	driving_video.change(
	fn = load_driving_video,
	inputs = [driving_video],
	outputs = [driving_video, driving_frames, frames_gallery_panel],
	queue = False
	)

	submit_btn.click(
	fn = run_xportrait,
	inputs = [source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps],
	outputs = [status, video_output]
	)

	# Launch the Gradio app
	demo.launch(ssr_mode = False)