Spaces:

fffiloni
/

text-guided-image-colorization

Running on Zero

App Files Files Community

text-guided-image-colorization / eval_controlnet_sdxl_light.py

fffiloni

Migrated from GitHub

691af46 verified 11 months ago

raw

history blame contribute delete

9.29 kB

	import os
	import time
	import torch
	import shutil
	import argparse
	import numpy as np

	from tqdm import tqdm
	from PIL import Image
	from datasets import load_dataset
	from accelerate import Accelerator
	from diffusers.utils import load_image
	from diffusers import (
	AutoencoderKL,
	StableDiffusionXLControlNetPipeline,
	ControlNetModel,
	UNet2DConditionModel,
	)
	from huggingface_hub import hf_hub_download
	from safetensors.torch import load_file

	# Define the function to parse arguments
	def parse_args(input_args=None):
	parser = argparse.ArgumentParser(description="Simple example of a ControlNet evaluation script.")

	parser.add_argument(
	"--pretrained_model_name_or_path",
	type=str,
	default=None,
	required=True,
	help="Path to pretrained model or model identifier from huggingface.co/models.",
	)
	parser.add_argument(
	"--pretrained_vae_model_name_or_path",
	type=str,
	default=None,
	help="Path to an improved VAE to stabilize training. For more details check out: https://github.com/huggingface/diffusers/pull/4038.",
	)
	parser.add_argument(
	"--controlnet_model_name_or_path",
	type=str,
	default=None,
	required=True,
	help="Path to pretrained controlnet model.",
	)
	parser.add_argument(
	"--output_dir",
	type=str,
	default=None,
	required=True,
	help="Path to output results.",
	)
	parser.add_argument(
	"--dataset",
	type=str,
	default="nickpai/coco2017-colorization",
	help="Dataset used"
	)
	parser.add_argument(
	"--dataset_revision",
	type=str,
	default="caption-free",
	choices=["main", "caption-free", "custom-caption"],
	help="Revision option (main/caption-free/custom-caption)"
	)
	parser.add_argument(
	"--mixed_precision",
	type=str,
	default=None,
	choices=["no", "fp16", "bf16"],
	help=(
	"Whether to use mixed precision. Choose between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >="
	" 1.10.and an Nvidia Ampere GPU. Default to the value of accelerate config of the current system or the"
	" flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
	),
	)
	parser.add_argument(
	"--variant",
	type=str,
	default=None,
	help="Variant of the model files of the pretrained model identifier from huggingface.co/models, 'e.g.' fp16",
	)
	parser.add_argument(
	"--revision",
	type=str,
	default=None,
	required=False,
	help="Revision of pretrained model identifier from huggingface.co/models.",
	)
	parser.add_argument(
	"--num_inference_steps",
	type=int,
	default=8,
	help="1-step, 2-step, 4-step, or 8-step distilled models"
	)
	parser.add_argument(
	"--repo",
	type=str,
	default="ByteDance/SDXL-Lightning",
	required=True,
	help="Repository from huggingface.co",
	)
	parser.add_argument(
	"--ckpt",
	type=str,
	default="sdxl_lightning_4step_unet.safetensors",
	required=True,
	help="Available checkpoints from the repository",
	)
	parser.add_argument(
	"--negative_prompt",
	action="store_true",
	help="The prompt or prompts not to guide the image generation",
	)

	if input_args is not None:
	args = parser.parse_args(input_args)
	else:
	args = parser.parse_args()

	return args

	def apply_color(image, color_map):
	# Convert input images to LAB color space
	image_lab = image.convert('LAB')
	color_map_lab = color_map.convert('LAB')

	# Split LAB channels
	l, a, b = image_lab.split()
	_, a_map, b_map = color_map_lab.split()

	# Merge LAB channels with color map
	merged_lab = Image.merge('LAB', (l, a_map, b_map))

	# Convert merged LAB image back to RGB color space
	result_rgb = merged_lab.convert('RGB')

	return result_rgb

	def main(args):
	generator = torch.manual_seed(0)

	# Path to the eval_results folder
	eval_results_folder = os.path.join(args.output_dir, "results")

	# Remove eval_results folder if it exists
	if os.path.exists(eval_results_folder):
	shutil.rmtree(eval_results_folder)

	# Create directory for eval_results
	os.makedirs(eval_results_folder)

	# Create subfolders for compare and colorized images
	compare_folder = os.path.join(eval_results_folder, "compare")
	colorized_folder = os.path.join(eval_results_folder, "colorized")
	os.makedirs(compare_folder)
	os.makedirs(colorized_folder)

	# Load the validation split of the colorization dataset
	val_dataset = load_dataset(args.dataset, split="validation", revision=args.dataset_revision)

	accelerator = Accelerator(
	mixed_precision=args.mixed_precision,
	)

	weight_dtype = torch.float32
	if accelerator.mixed_precision == "fp16":
	weight_dtype = torch.float16
	elif accelerator.mixed_precision == "bf16":
	weight_dtype = torch.bfloat16

	vae_path = (
	args.pretrained_model_name_or_path
	if args.pretrained_vae_model_name_or_path is None
	else args.pretrained_vae_model_name_or_path
	)
	vae = AutoencoderKL.from_pretrained(
	vae_path,
	subfolder="vae" if args.pretrained_vae_model_name_or_path is None else None,
	revision=args.revision,
	variant=args.variant,
	)
	unet = UNet2DConditionModel.from_config(
	args.pretrained_model_name_or_path,
	subfolder="unet",
	revision=args.revision,
	variant=args.variant,
	)
	unet.load_state_dict(load_file(hf_hub_download(args.repo, args.ckpt)))

	# Move vae, unet and text_encoder to device and cast to weight_dtype
	# The VAE is in float32 to avoid NaN losses.
	if args.pretrained_vae_model_name_or_path is not None:
	vae.to(accelerator.device, dtype=weight_dtype)
	else:
	vae.to(accelerator.device, dtype=torch.float32)
	unet.to(accelerator.device, dtype=weight_dtype)

	controlnet = ControlNetModel.from_pretrained(args.controlnet_model_name_or_path, torch_dtype=weight_dtype)
	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	args.pretrained_model_name_or_path,
	vae=vae,
	unet=unet,
	controlnet=controlnet,
	)
	pipe.to(accelerator.device, dtype=weight_dtype)

	# Prepare everything with our `accelerator`.
	pipe, val_dataset = accelerator.prepare(pipe, val_dataset)

	pipe.safety_checker = None

	# Counter for processed images
	processed_images = 0

	# Record start time
	start_time = time.time()

	# Iterate through the validation dataset
	for example in tqdm(val_dataset, desc="Processing Images"):
	image_path = example["file_name"]

	prompt = []
	for caption in example["captions"]:
	if isinstance(caption, str):
	prompt.append(caption)
	elif isinstance(caption, (list, np.ndarray)):
	# take a random caption if there are multiple
	prompt.append(caption[0])
	else:
	raise ValueError(
	f"Caption column `captions` should contain either strings or lists of strings."
	)

	negative_prompt = None
	if args.negative_prompt:
	negative_prompt = [
	"low quality, bad quality, low contrast, black and white, bw, monochrome, grainy, blurry, historical, restored, desaturate"
	]

	# Generate image
	ground_truth_image = load_image(image_path).resize((512, 512))
	control_image = load_image(image_path).convert("L").convert("RGB").resize((512, 512))
	image = pipe(prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=args.num_inference_steps,
	generator=generator,
	image=control_image).images[0]

	# Apply color mapping
	image = apply_color(ground_truth_image, image)

	# Concatenate images into a row
	row_image = np.hstack((np.array(control_image), np.array(image), np.array(ground_truth_image)))
	row_image = Image.fromarray(row_image)

	# Save row image in the compare folder
	compare_output_path = os.path.join(compare_folder, f"{image_path.split('/')[-1]}")
	row_image.save(compare_output_path)

	# Save colorized image in the colorized folder
	colorized_output_path = os.path.join(colorized_folder, f"{image_path.split('/')[-1]}")
	image.save(colorized_output_path)

	# Increment processed images counter
	processed_images += 1

	# Record end time
	end_time = time.time()

	# Calculate total time taken
	total_time = end_time - start_time

	# Calculate FPS
	fps = processed_images / total_time

	print("All images processed.")
	print(f"Total time taken: {total_time:.2f} seconds")
	print(f"FPS: {fps:.2f}")

	# Entry point of the script
	if __name__ == "__main__":
	args = parse_args()
	main(args)