Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -84,72 +84,27 @@ def inpaint(
|
|
| 84 |
import torch
|
| 85 |
import torch.nn.functional as F
|
| 86 |
import numpy as np
|
|
|
|
| 87 |
|
| 88 |
image = image.convert("RGB")
|
| 89 |
mask = mask.convert("L")
|
| 90 |
width, height = calculate_optimal_dimensions(image)
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
callback_on_step_end = None
|
| 95 |
-
callback_on_step_end_tensor_inputs = None
|
| 96 |
-
|
| 97 |
-
# Si se activa la opción, preparar latentes originales para preservar zonas sin máscara
|
| 98 |
-
if preserve_unmasked:
|
| 99 |
-
np_img = np.array(image).astype(np.float32) / 255.0
|
| 100 |
-
img_t = torch.from_numpy(np_img).permute(2, 0, 1).unsqueeze(0).to(pipe.device)
|
| 101 |
-
img_t = F.interpolate(img_t, size=(height, width), mode='bilinear', align_corners=False)
|
| 102 |
-
img_t = (img_t * 2 - 1).to(dtype=pipe.vae.dtype)
|
| 103 |
-
|
| 104 |
-
np_mask = np.array(mask).astype(np.float32) / 255.0
|
| 105 |
-
mask_t = torch.from_numpy(np_mask).unsqueeze(0).unsqueeze(0).to(pipe.device)
|
| 106 |
-
mask_t = F.interpolate(mask_t, size=(height, width), mode='nearest')
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
latents_orig = latents_orig * scaling
|
| 112 |
-
|
| 113 |
-
# Ajustar máscara al tamaño de los latentes
|
| 114 |
-
latent_height = latents_orig.shape[2]
|
| 115 |
-
latent_width = latents_orig.shape[3]
|
| 116 |
-
mask_t = F.interpolate(mask_t, size=(latent_height, latent_width), mode="nearest")
|
| 117 |
-
|
| 118 |
-
def callback_on_step_end(pipe_self, i, t, callback_kwargs):
|
| 119 |
-
latents = callback_kwargs.get("latents", None)
|
| 120 |
-
if latents is not None:
|
| 121 |
-
# Verificar que tengamos 4 dimensiones [batch, channels, height, width]
|
| 122 |
-
if latents.dim() != 4:
|
| 123 |
-
print(f"⚠️ Warning: latents has {latents.dim()} dimensions, expected 4")
|
| 124 |
-
return callback_kwargs
|
| 125 |
-
|
| 126 |
-
# Ajustar dinámicamente los tamaños al del tensor actual
|
| 127 |
-
current_height = latents.shape[2]
|
| 128 |
-
current_width = latents.shape[3]
|
| 129 |
-
|
| 130 |
-
if mask_t.shape[-2:] != (current_height, current_width):
|
| 131 |
-
resized_mask = F.interpolate(mask_t, size=(current_height, current_width), mode="nearest")
|
| 132 |
-
else:
|
| 133 |
-
resized_mask = mask_t
|
| 134 |
-
|
| 135 |
-
if latents_orig.shape[-2:] != (current_height, current_width):
|
| 136 |
-
resized_latents_orig = F.interpolate(latents_orig, size=(current_height, current_width), mode="nearest")
|
| 137 |
-
else:
|
| 138 |
-
resized_latents_orig = latents_orig
|
| 139 |
-
|
| 140 |
-
# Mezclar solo en las áreas no enmascaradas
|
| 141 |
-
latents = latents * resized_mask + resized_latents_orig * (1 - resized_mask)
|
| 142 |
-
callback_kwargs["latents"] = latents
|
| 143 |
-
|
| 144 |
-
return callback_kwargs
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
callback_on_step_end_tensor_inputs = None
|
| 151 |
|
| 152 |
-
|
|
|
|
|
|
|
| 153 |
result = pipe(
|
| 154 |
image=image,
|
| 155 |
mask_image=mask,
|
|
@@ -160,10 +115,23 @@ def inpaint(
|
|
| 160 |
guidance_scale=guidance_scale,
|
| 161 |
strength=strength,
|
| 162 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
| 163 |
-
callback_on_step_end=callback_on_step_end,
|
| 164 |
-
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
|
| 165 |
).images[0]
|
| 166 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
return result.convert("RGBA"), prompt, seed
|
| 168 |
|
| 169 |
def inpaint_api(
|
|
|
|
| 84 |
import torch
|
| 85 |
import torch.nn.functional as F
|
| 86 |
import numpy as np
|
| 87 |
+
from PIL import Image
|
| 88 |
|
| 89 |
image = image.convert("RGB")
|
| 90 |
mask = mask.convert("L")
|
| 91 |
width, height = calculate_optimal_dimensions(image)
|
| 92 |
|
| 93 |
+
# Guardar imagen original redimensionada para el blending final
|
| 94 |
+
original_resized = image.resize((width, height), Image.Resampling.LANCZOS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
# Crear máscara suavizada para mejor blending
|
| 97 |
+
mask_resized = mask.resize((width, height), Image.Resampling.LANCZOS)
|
| 98 |
+
mask_array = np.array(mask_resized).astype(np.float32) / 255.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
+
# Aplicar un pequeño blur a la máscara para transiciones más suaves
|
| 101 |
+
from scipy.ndimage import gaussian_filter
|
| 102 |
+
mask_blurred = gaussian_filter(mask_array, sigma=2.0)
|
| 103 |
+
mask_blurred = np.clip(mask_blurred, 0, 1)
|
|
|
|
| 104 |
|
| 105 |
+
pipe.to("cuda")
|
| 106 |
+
|
| 107 |
+
# Ejecutar pipeline SIN callback (dejamos que el modelo haga su trabajo)
|
| 108 |
result = pipe(
|
| 109 |
image=image,
|
| 110 |
mask_image=mask,
|
|
|
|
| 115 |
guidance_scale=guidance_scale,
|
| 116 |
strength=strength,
|
| 117 |
generator=torch.Generator(device="cuda").manual_seed(seed),
|
|
|
|
|
|
|
| 118 |
).images[0]
|
| 119 |
|
| 120 |
+
# Si preserve_unmasked está activado, hacer blending en espacio de píxeles
|
| 121 |
+
if preserve_unmasked:
|
| 122 |
+
# Convertir a arrays numpy
|
| 123 |
+
result_array = np.array(result).astype(np.float32)
|
| 124 |
+
original_array = np.array(original_resized).astype(np.float32)
|
| 125 |
+
|
| 126 |
+
# Expandir máscara a 3 canales (RGB)
|
| 127 |
+
mask_3channel = np.stack([mask_blurred] * 3, axis=-1)
|
| 128 |
+
|
| 129 |
+
# Blending: donde mask=1 (blanco) usamos result, donde mask=0 (negro) usamos original
|
| 130 |
+
blended = result_array * mask_3channel + original_array * (1 - mask_3channel)
|
| 131 |
+
|
| 132 |
+
# Convertir de vuelta a imagen
|
| 133 |
+
result = Image.fromarray(blended.astype(np.uint8), mode='RGB')
|
| 134 |
+
|
| 135 |
return result.convert("RGBA"), prompt, seed
|
| 136 |
|
| 137 |
def inpaint_api(
|