Spaces:

clinteroni
/

outpainting-with-differential-diffusion-demo

Running on Zero

App Files Files Community

clinteroni commited on Apr 12, 2024

Commit

dbf5021

1 Parent(s): 0087f2b

Initial attempt

Browse files

Files changed (3) hide show

README.md +8 -1
app.py +285 -0
requirements.txt +9 -0

README.md CHANGED Viewed

@@ -7,6 +7,13 @@ sdk: gradio
 sdk_version: 4.26.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 4.26.0
 app_file: app.py
 pinned: false
+models:
+  - stabilityai/stable-diffusion-xl-base-1.0
+  - h94/IP-Adapter
+preload_from_hub:
+  - stabilityai/stable-diffusion-xl-base-1.0
+  - h94/IP-Adapter
 ---
+This demo uses code lifted almost verbatim from
+[Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion).

app.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import random
+import cv2
+import numpy as np
+import torch
+import gradio as gr
+from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
+xlp_kwargs = {
+    'custom_pipeline': 'pipeline_stable_diffusion_xl_differential_img2img'
+}
+if torch.cuda.is_available():
+    device = 'cuda'
+    device_dtype = torch.float16
+    xlp_kwargs['variant'] = 'fp16'
+else:
+    device = 'cpu'
+    device_dtype = torch.float32
+xlp_kwargs['torch_dtype'] = device_dtype
+def merge_images(original, new_image, offset, direction):
+    if direction in ["left", "right"]:
+        merged_image = np.zeros(
+            (original.shape[0], original.shape[1] + offset, 3), dtype=np.uint8)
+    elif direction in ["top", "bottom"]:
+        merged_image = np.zeros(
+            (original.shape[0] + offset, original.shape[1], 3), dtype=np.uint8)
+    if direction == "left":
+        merged_image[:, offset:] = original
+        merged_image[:, : new_image.shape[1]] = new_image
+    elif direction == "right":
+        merged_image[:, : original.shape[1]] = original
+        merged_image[:, original.shape[1] + offset -
+                     new_image.shape[1]: original.shape[1] + offset] = new_image
+    elif direction == "top":
+        merged_image[offset:, :] = original
+        merged_image[: new_image.shape[0], :] = new_image
+    elif direction == "bottom":
+        merged_image[: original.shape[0], :] = original
+        merged_image[original.shape[0] + offset - new_image.shape[0]: original.shape[0] + offset, :] = new_image
+    return merged_image
+def slice_image(image):
+    height, width, _ = image.shape
+    slice_size = min(width // 2, height // 3)
+    slices = []
+    for h in range(3):
+        for w in range(2):
+            left = w * slice_size
+            upper = h * slice_size
+            right = left + slice_size
+            lower = upper + slice_size
+            if w == 1 and right > width:
+                left -= right - width
+                right = width
+            if h == 2 and lower > height:
+                upper -= lower - height
+                lower = height
+            slice = image[upper:lower, left:right]
+            slices.append(slice)
+    return slices
+def process_image(
+    image,
+    fill_color=(0, 0, 0),
+    mask_offset=50,
+    blur_radius=500,
+    expand_pixels=256,
+    direction="left",
+    inpaint_mask_color=50,
+    max_size=1024,
+):
+    height, width = image.shape[:2]
+    new_height = height + \
+        (expand_pixels if direction in ["top", "bottom"] else 0)
+    new_width = width + \
+        (expand_pixels if direction in ["left", "right"] else 0)
+    if new_height > max_size:
+        # If so, crop the image from the opposite side
+        if direction == "top":
+            image = image[:max_size, :]
+        elif direction == "bottom":
+            image = image[new_height - max_size:, :]
+        new_height = max_size
+    if new_width > max_size:
+        # If so, crop the image from the opposite side
+        if direction == "left":
+            image = image[:, :max_size]
+        elif direction == "right":
+            image = image[:, new_width - max_size:]
+        new_width = max_size
+    height, width = image.shape[:2]
+    new_image = np.full((new_height, new_width, 3), fill_color, dtype=np.uint8)
+    mask = np.full_like(new_image, 255, dtype=np.uint8)
+    inpaint_mask = np.full_like(new_image, 0, dtype=np.uint8)
+    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+    inpaint_mask = cv2.cvtColor(inpaint_mask, cv2.COLOR_BGR2GRAY)
+    if direction == "left":
+        new_image[:, expand_pixels:] = image[:, : max_size - expand_pixels]
+        mask[:, : expand_pixels + mask_offset] = inpaint_mask_color
+        inpaint_mask[:, :expand_pixels] = 255
+    elif direction == "right":
+        new_image[:, :width] = image
+        mask[:, width - mask_offset:] = inpaint_mask_color
+        inpaint_mask[:, width:] = 255
+    elif direction == "top":
+        new_image[expand_pixels:, :] = image[: max_size - expand_pixels, :]
+        mask[: expand_pixels + mask_offset, :] = inpaint_mask_color
+        inpaint_mask[:expand_pixels, :] = 255
+    elif direction == "bottom":
+        new_image[:height, :] = image
+        mask[height - mask_offset:, :] = inpaint_mask_color
+        inpaint_mask[height:, :] = 255
+    # mask blur
+    if blur_radius % 2 == 0:
+        blur_radius += 1
+    mask = cv2.GaussianBlur(mask, (blur_radius, blur_radius), 0)
+    # telea inpaint
+    _, mask_np = cv2.threshold(
+        inpaint_mask, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
+    inpaint = cv2.inpaint(new_image, mask_np, 3, cv2.INPAINT_TELEA)
+    # convert image to tensor
+    inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)
+    inpaint = torch.from_numpy(inpaint).permute(2, 0, 1).float()
+    inpaint = inpaint / 127.5 - 1
+    inpaint = inpaint.unsqueeze(0).to(device)
+    # convert mask to tensor
+    mask = torch.from_numpy(mask)
+    mask = mask.unsqueeze(0).float() / 255.0
+    mask = mask.to(device)
+    return inpaint, mask
+def image_resize(image, new_size=1024):
+    height, width = image.shape[:2]
+    aspect_ratio = width / height
+    new_width = new_size
+    new_height = new_size
+    if aspect_ratio != 1:
+        if width > height:
+            new_height = int(new_size / aspect_ratio)
+        else:
+            new_width = int(new_size * aspect_ratio)
+    image = cv2.resize(image, (new_width, new_height),
+                       interpolation=cv2.INTER_LANCZOS4)
+    return image
+pipeline = StableDiffusionXLPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    **xlp_kwargs
+).to(device)
+pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
+    pipeline.scheduler.config, use_karras_sigmas=True)
+pipeline.load_ip_adapter(
+    "h94/IP-Adapter",
+    subfolder="sdxl_models",
+    weight_name=[
+        "ip-adapter-plus_sdxl_vit-h.safetensors",
+    ],
+    image_encoder_folder="models/image_encoder",
+)
+pipeline.set_ip_adapter_scale(0.1)
+def generate_image(prompt, negative_prompt, image, mask, ip_adapter_image, seed: int = None):
+    if seed is None:
+        seed = random.randint(0, 2**32 - 1)
+    generator = torch.Generator(device="cpu").manual_seed(seed)
+    image = pipeline(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=1024,
+        height=1024,
+        guidance_scale=4.0,
+        num_inference_steps=25,
+        original_image=image,
+        image=image,
+        strength=1.0,
+        map=mask,
+        generator=generator,
+        ip_adapter_image=[ip_adapter_image],
+        output_type="np",
+    ).images[0]
+    image = (image * 255).astype(np.uint8)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    return image
+def outpaint(pil_image, direction='right', times_to_expand=4):
+    prompt = ""
+    negative_prompt = ""
+    inpaint_mask_color = 50  # lighter use more of the Telea inpainting
+    # I recommend to don't go more than half of the picture so it has context
+    expand_pixels = 256
+    original = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+    image = image_resize(original)
+    # image.shape[1] for horizontal, image.shape[0] for vertical
+    expand_pixels_to_square = 1024 - image.shape[1]
+    image, mask = process_image(
+        image, expand_pixels=expand_pixels_to_square, direction=direction, inpaint_mask_color=inpaint_mask_color
+    )
+    ip_adapter_image = []
+    for index, part in enumerate(slice_image(original)):
+        ip_adapter_image.append(part)
+    generated = generate_image(
+        prompt, negative_prompt, image, mask, ip_adapter_image)
+    final_image = generated
+    for i in range(times_to_expand):
+        image, mask = process_image(
+            final_image, direction=direction, expand_pixels=expand_pixels, inpaint_mask_color=inpaint_mask_color
+        )
+        ip_adapter_image = []
+        for index, part in enumerate(slice_image(generated)):
+            ip_adapter_image.append(part)
+        generated = generate_image(
+            prompt, negative_prompt, image, mask, ip_adapter_image)
+        final_image = merge_images(final_image, generated, 256, direction)
+    color_converted = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
+    return color_converted
+gradio_app = gr.Interface(
+    outpaint,
+    inputs=[
+        gr.Image(label="Select start image", sources=[
+                 'upload', 'webcam'], type='pil'),
+        gr.Radio(["left", "right", "top", 'bottom'], label="Direction",
+                 info="Outward from which edge to paint?", value='right'),
+        gr.Slider(2, 4, step=1, value=4, label="Times to expand",
+                  info="Choose between 2 and 4"),
+    ],
+    outputs=[gr.Image(label="Processed Image")],
+    title="Outpainting with differential diffusion demo",
+    description='''
+        # Outpainting with differential diffusion demo
+        This uses code lifted almost verbatim from
+        [Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion).
+        If this Space is running on a CPU, it will take hours to get results.  You may [duplicate this space](https://huggingface.co/spaces/clinteroni/outpainting-demo?duplicate=true) and pay for an upgraded runtime instead.
+        '''
+)
+if __name__ == "__main__":
+    gradio_app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+accelerate
+git+https://github.com/huggingface/diffusers.git
+gradio
+numpy
+opencv-python
+pillow
+torch
+torchvision
+transformers