Spaces:
Sleeping
Sleeping
Commit
·
5bccb70
1
Parent(s):
98eda10
up test
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ from diffusers import StableDiffusionUpscalePipeline
|
|
| 22 |
from diffusers import LDMSuperResolutionPipeline
|
| 23 |
import cv2
|
| 24 |
import onnxruntime
|
| 25 |
-
import xformers
|
| 26 |
# from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
|
| 27 |
|
| 28 |
def removeFurniture(input_img1,
|
|
@@ -77,21 +77,28 @@ def segmentation(img):
|
|
| 77 |
return json.dumps(results)
|
| 78 |
|
| 79 |
|
| 80 |
-
def upscale(image, prompt):
|
| 81 |
-
print("upscale",image,prompt)
|
| 82 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 83 |
-
print("device",device)
|
| 84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
# image.thumbnail((512, 512))
|
| 86 |
# print("resize",image)
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
# pipe = StableDiffusionLatentUpscalePipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16)
|
| 90 |
pipe = pipe.to(device)
|
| 91 |
pipe.enable_attention_slicing()
|
| 92 |
-
pipe.enable_xformers_memory_efficient_attention(
|
|
|
|
| 93 |
# Workaround for not accepting attention shape using VAE for Flash Attention
|
| 94 |
-
pipe.vae.enable_xformers_memory_efficient_attention(
|
| 95 |
|
| 96 |
ret = pipe(prompt=prompt,
|
| 97 |
image=image,
|
|
@@ -111,6 +118,9 @@ def upscale2(image, prompt):
|
|
| 111 |
pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages", torch_dtype=torch.float16)
|
| 112 |
pipe = pipe.to(device)
|
| 113 |
pipe.enable_attention_slicing()
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
upscaled_image = pipe(image, num_inference_steps=10, eta=1).images[0]
|
| 116 |
return upscaled_image
|
|
@@ -174,7 +184,151 @@ def upscale3(image):
|
|
| 174 |
return image_output
|
| 175 |
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
with gr.Blocks() as app:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
with gr.Row():
|
| 179 |
|
| 180 |
with gr.Column():
|
|
@@ -202,15 +356,71 @@ with gr.Blocks() as app:
|
|
| 202 |
gr.Button("Segmentation").click(segmentation, inputs=gr.Image(type="pil"), outputs=gr.JSON())
|
| 203 |
|
| 204 |
with gr.Column():
|
| 205 |
-
gr.Button("Upscale").click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
| 207 |
-
with gr.Column():
|
| 208 |
-
gr.Button("Upscale2").click(upscale2, inputs=[gr.Image(type="pil"),gr.Textbox(label="prompt",value="empty room")], outputs=gr.Image())
|
| 209 |
|
| 210 |
-
with gr.Column():
|
| 211 |
-
gr.Button("Upscale3").click(upscale3, inputs=[gr.Image(type="pil")], outputs=gr.Image())
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
# UP 1
|
|
|
|
| 22 |
from diffusers import LDMSuperResolutionPipeline
|
| 23 |
import cv2
|
| 24 |
import onnxruntime
|
| 25 |
+
# import xformers
|
| 26 |
# from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
|
| 27 |
|
| 28 |
def removeFurniture(input_img1,
|
|
|
|
| 77 |
return json.dumps(results)
|
| 78 |
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def upscale1(image, prompt):
|
| 84 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 85 |
+
print("upscale1", device, image, prompt)
|
| 86 |
+
|
| 87 |
# image.thumbnail((512, 512))
|
| 88 |
# print("resize",image)
|
| 89 |
|
| 90 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 91 |
+
|
| 92 |
+
pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler",
|
| 93 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
| 94 |
+
use_safetensors=True)
|
| 95 |
# pipe = StableDiffusionLatentUpscalePipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16)
|
| 96 |
pipe = pipe.to(device)
|
| 97 |
pipe.enable_attention_slicing()
|
| 98 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 99 |
+
# pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp)
|
| 100 |
# Workaround for not accepting attention shape using VAE for Flash Attention
|
| 101 |
+
pipe.vae.enable_xformers_memory_efficient_attention()
|
| 102 |
|
| 103 |
ret = pipe(prompt=prompt,
|
| 104 |
image=image,
|
|
|
|
| 118 |
pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages", torch_dtype=torch.float16)
|
| 119 |
pipe = pipe.to(device)
|
| 120 |
pipe.enable_attention_slicing()
|
| 121 |
+
pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp)
|
| 122 |
+
# Workaround for not accepting attention shape using VAE for Flash Attention
|
| 123 |
+
pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
|
| 124 |
|
| 125 |
upscaled_image = pipe(image, num_inference_steps=10, eta=1).images[0]
|
| 126 |
return upscaled_image
|
|
|
|
| 184 |
return image_output
|
| 185 |
|
| 186 |
|
| 187 |
+
|
| 188 |
+
def split_image(im, rows, cols, should_square, should_quiet=False):
|
| 189 |
+
im_width, im_height = im.size
|
| 190 |
+
row_width = int(im_width / cols)
|
| 191 |
+
row_height = int(im_height / rows)
|
| 192 |
+
name = "image"
|
| 193 |
+
ext = ".png"
|
| 194 |
+
name = os.path.basename(name)
|
| 195 |
+
images = []
|
| 196 |
+
if should_square:
|
| 197 |
+
min_dimension = min(im_width, im_height)
|
| 198 |
+
max_dimension = max(im_width, im_height)
|
| 199 |
+
if not should_quiet:
|
| 200 |
+
print("Resizing image to a square...")
|
| 201 |
+
print("Determining background color...")
|
| 202 |
+
bg_color = split.determine_bg_color(im)
|
| 203 |
+
if not should_quiet:
|
| 204 |
+
print("Background color is... " + str(bg_color))
|
| 205 |
+
im_r = Image.new("RGBA" if ext == "png" else "RGB",
|
| 206 |
+
(max_dimension, max_dimension), bg_color)
|
| 207 |
+
offset = int((max_dimension - min_dimension) / 2)
|
| 208 |
+
if im_width > im_height:
|
| 209 |
+
im_r.paste(im, (0, offset))
|
| 210 |
+
else:
|
| 211 |
+
im_r.paste(im, (offset, 0))
|
| 212 |
+
im = im_r
|
| 213 |
+
row_width = int(max_dimension / cols)
|
| 214 |
+
row_height = int(max_dimension / rows)
|
| 215 |
+
n = 0
|
| 216 |
+
for i in range(0, rows):
|
| 217 |
+
for j in range(0, cols):
|
| 218 |
+
box = (j * row_width, i * row_height, j * row_width +
|
| 219 |
+
row_width, i * row_height + row_height)
|
| 220 |
+
outp = im.crop(box)
|
| 221 |
+
outp_path = name + "_" + str(n) + ext
|
| 222 |
+
if not should_quiet:
|
| 223 |
+
print("Exporting image tile: " + outp_path)
|
| 224 |
+
images.append(outp)
|
| 225 |
+
n += 1
|
| 226 |
+
return [img for img in images]
|
| 227 |
+
|
| 228 |
+
def upscale_image(img, rows, cols, seed, prompt, negative_prompt, xformers, cpu_offload, attention_slicing, enable_custom_sliders=False, guidance=7, iterations=50):
|
| 229 |
+
model_id = "stabilityai/stable-diffusion-x4-upscaler"
|
| 230 |
+
try:
|
| 231 |
+
pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
|
| 232 |
+
except:
|
| 233 |
+
pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16, local_files_only=True)
|
| 234 |
+
pipeline = pipeline.to("cuda")
|
| 235 |
+
if xformers:
|
| 236 |
+
pipeline.enable_xformers_memory_efficient_attention()
|
| 237 |
+
else:
|
| 238 |
+
pipeline.disable_xformers_memory_efficient_attention()
|
| 239 |
+
if cpu_offload:
|
| 240 |
+
try:
|
| 241 |
+
pipeline.enable_sequential_cpu_offload()
|
| 242 |
+
except:
|
| 243 |
+
pass
|
| 244 |
+
if attention_slicing:
|
| 245 |
+
pipeline.enable_attention_slicing()
|
| 246 |
+
else:
|
| 247 |
+
pipeline.disable_attention_slicing()
|
| 248 |
+
img = Image.fromarray(img)
|
| 249 |
+
# load model and scheduler
|
| 250 |
+
if seed==-1:
|
| 251 |
+
generator = torch.manual_seed(random.randint(0, 9999999))
|
| 252 |
+
else:
|
| 253 |
+
generator = torch.manual_seed(seed)
|
| 254 |
+
|
| 255 |
+
original_width, original_height = img.size
|
| 256 |
+
max_dimension = max(original_width, original_height)
|
| 257 |
+
tiles = split_image(img, rows, cols, True, False)
|
| 258 |
+
ups_tiles = []
|
| 259 |
+
i = 0
|
| 260 |
+
for x in tiles:
|
| 261 |
+
i=i+1
|
| 262 |
+
if enable_custom_sliders:
|
| 263 |
+
ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt,guidance_scale=guidance, num_inference_steps=iterations, image=x.convert("RGB"),generator=generator).images[0]
|
| 264 |
+
else:
|
| 265 |
+
ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt, image=x.convert("RGB"),generator=generator).images[0]
|
| 266 |
+
ups_tiles.append(ups_tile)
|
| 267 |
+
|
| 268 |
+
# Determine the size of the merged upscaled image
|
| 269 |
+
total_width = 0
|
| 270 |
+
total_height = 0
|
| 271 |
+
side = 0
|
| 272 |
+
for ups_tile in ups_tiles:
|
| 273 |
+
side = ups_tile.width
|
| 274 |
+
break
|
| 275 |
+
for x in tiles:
|
| 276 |
+
tsize = x.width
|
| 277 |
+
break
|
| 278 |
+
|
| 279 |
+
ups_times = abs(side/tsize)
|
| 280 |
+
new_size = (max_dimension * ups_times, max_dimension * ups_times)
|
| 281 |
+
total_width = cols*side
|
| 282 |
+
total_height = rows*side
|
| 283 |
+
|
| 284 |
+
# Create a blank image with the calculated size
|
| 285 |
+
merged_image = Image.new("RGB", (total_width, total_height))
|
| 286 |
+
|
| 287 |
+
# Paste each upscaled tile into the blank image
|
| 288 |
+
current_width = 0
|
| 289 |
+
current_height = 0
|
| 290 |
+
maximum_width = cols*side
|
| 291 |
+
for ups_tile in ups_tiles:
|
| 292 |
+
merged_image.paste(ups_tile, (current_width, current_height))
|
| 293 |
+
current_width += ups_tile.width
|
| 294 |
+
if current_width>=maximum_width:
|
| 295 |
+
current_width = 0
|
| 296 |
+
current_height = current_height+side
|
| 297 |
+
|
| 298 |
+
# Using the center of the image as pivot, crop the image to the original dimension times four
|
| 299 |
+
crop_left = (new_size[0] - original_width * ups_times) // 2
|
| 300 |
+
crop_upper = (new_size[1] - original_height * ups_times) // 2
|
| 301 |
+
crop_right = crop_left + original_width * ups_times
|
| 302 |
+
crop_lower = crop_upper + original_height * ups_times
|
| 303 |
+
final_img = merged_image.crop((crop_left, crop_upper, crop_right, crop_lower))
|
| 304 |
+
|
| 305 |
+
# The resulting image should be identical to the original image in proportions / aspect ratio, with no loss of elements.
|
| 306 |
+
# Save the merged image
|
| 307 |
+
return final_img
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def upscale(mode, image, prompt):
|
| 311 |
+
print("upscale", mode, image, prompt)
|
| 312 |
+
# return upscale1(image, prompt)
|
| 313 |
+
return upscale_image(image,rows=3,cols=3,seed=-1,prompt=prompt,negative_prompt="jpeg artifacts, lowres, bad quality, watermark",xformers=True,cpu_offload=True,attention_slicing=True,iterations=10)
|
| 314 |
+
|
| 315 |
+
modes = {
|
| 316 |
+
'1': '1',
|
| 317 |
+
'img2img': 'Image to Image',
|
| 318 |
+
'inpaint': 'Inpainting',
|
| 319 |
+
'upscale4x': 'Upscale 4x',
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
|
| 324 |
with gr.Blocks() as app:
|
| 325 |
+
gr.HTML(
|
| 326 |
+
f"""
|
| 327 |
+
Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
|
| 328 |
+
</div>
|
| 329 |
+
"""
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
with gr.Row():
|
| 333 |
|
| 334 |
with gr.Column():
|
|
|
|
| 356 |
gr.Button("Segmentation").click(segmentation, inputs=gr.Image(type="pil"), outputs=gr.JSON())
|
| 357 |
|
| 358 |
with gr.Column():
|
| 359 |
+
gr.Button("Upscale").click(
|
| 360 |
+
upscale,
|
| 361 |
+
inputs=[
|
| 362 |
+
gr.Radio(label="Mode", choices=list(modes.values())[:4], value=modes['txt2img']),
|
| 363 |
+
gr.Image(type="pil"),
|
| 364 |
+
gr.Textbox(label="prompt",value="empty room")
|
| 365 |
+
],
|
| 366 |
+
outputs=gr.Image())
|
| 367 |
|
|
|
|
|
|
|
| 368 |
|
|
|
|
|
|
|
| 369 |
|
| 370 |
+
# with gr.Row():
|
| 371 |
+
|
| 372 |
+
# with gr.Column(scale=55):
|
| 373 |
+
# with gr.Group():
|
| 374 |
+
# with gr.Row():
|
| 375 |
+
# prompt = gr.Textbox(label="Prompt", show_label=False, max_lines=2,placeholder=f"Enter prompt")
|
| 376 |
+
# generate = gr.Button(value="Generate")
|
| 377 |
+
|
| 378 |
+
# gallery = gr.Gallery(label="Generated images", show_label=False)
|
| 379 |
+
# state_info = gr.Textbox(label="State", show_label=False, max_lines=2)
|
| 380 |
+
# error_output = gr.Markdown(visible=False)
|
| 381 |
+
|
| 382 |
+
# with gr.Column(scale=45):
|
| 383 |
+
# inf_mode = gr.Radio(label="Inference Mode", choices=list(modes.values())[:4], value=modes['txt2img']) # TODO remove [:3] limit
|
| 384 |
+
|
| 385 |
+
# with gr.Group(visible=False) as i2i_options:
|
| 386 |
+
# image = gr.Image(label="Image", height=128, type="pil")
|
| 387 |
+
# inpaint_info = gr.Markdown("Inpainting resizes and pads images to 512x512", visible=False)
|
| 388 |
+
# upscale_info = gr.Markdown("""Best for small images (128x128 or smaller).<br>
|
| 389 |
+
# Bigger images will be sliced into 128x128 tiles which will be upscaled individually.<br>
|
| 390 |
+
# This is done to avoid running out of GPU memory.""", visible=False)
|
| 391 |
+
# strength = gr.Slider(label="Transformation strength", minimum=0, maximum=1, step=0.01, value=0.5)
|
| 392 |
+
|
| 393 |
+
# with gr.Group():
|
| 394 |
+
# neg_prompt = gr.Textbox(label="Negative prompt", placeholder="What to exclude from the image")
|
| 395 |
+
|
| 396 |
+
# n_images = gr.Slider(label="Number of images", value=1, minimum=1, maximum=4, step=1)
|
| 397 |
+
# with gr.Row():
|
| 398 |
+
# guidance = gr.Slider(label="Guidance scale", value=7.5, maximum=15)
|
| 399 |
+
# steps = gr.Slider(label="Steps", value=current_steps, minimum=2, maximum=100, step=1)
|
| 400 |
+
|
| 401 |
+
# with gr.Row():
|
| 402 |
+
# width = gr.Slider(label="Width", value=768, minimum=64, maximum=1024, step=8)
|
| 403 |
+
# height = gr.Slider(label="Height", value=768, minimum=64, maximum=1024, step=8)
|
| 404 |
+
|
| 405 |
+
# seed = gr.Slider(0, 2147483647, label='Seed (0 = random)', value=0, step=1)
|
| 406 |
+
# with gr.Accordion("Memory optimization"):
|
| 407 |
+
# attn_slicing = gr.Checkbox(label="Attention slicing (a bit slower, but uses less memory)", value=attn_slicing_enabled)
|
| 408 |
+
# # mem_eff_attn = gr.Checkbox(label="Memory efficient attention (xformers)", value=mem_eff_attn_enabled)
|
| 409 |
+
|
| 410 |
+
# inf_mode.change(on_mode_change, inputs=[inf_mode], outputs=[i2i_options, inpaint_info, upscale_info, strength], queue=False)
|
| 411 |
+
# steps.change(on_steps_change, inputs=[steps], outputs=[], queue=False)
|
| 412 |
+
# attn_slicing.change(lambda x: switch_attention_slicing(x), inputs=[attn_slicing], queue=False)
|
| 413 |
+
# # mem_eff_attn.change(lambda x: switch_mem_eff_attn(x), inputs=[mem_eff_attn], queue=False)
|
| 414 |
+
|
| 415 |
+
# inputs = [inf_mode, prompt, n_images, guidance, steps, width, height, seed, image, strength, neg_prompt]
|
| 416 |
+
# outputs = [gallery, error_output]
|
| 417 |
+
# prompt.submit(inference, inputs=inputs, outputs=outputs)
|
| 418 |
+
# generate.click(inference, inputs=inputs, outputs=outputs)
|
| 419 |
+
|
| 420 |
+
# app.load(update_state_info, inputs=state_info, outputs=state_info, every=0.5, show_progress=False)
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
app.queue()
|
| 424 |
+
app.launch(debug=True,share=True, height=768)
|
| 425 |
|
| 426 |
# UP 1
|