X-HighVoltage-X commited on
Commit
d02105f
·
verified ·
1 Parent(s): c22d93c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -60
app.py CHANGED
@@ -84,72 +84,27 @@ def inpaint(
84
  import torch
85
  import torch.nn.functional as F
86
  import numpy as np
 
87
 
88
  image = image.convert("RGB")
89
  mask = mask.convert("L")
90
  width, height = calculate_optimal_dimensions(image)
91
 
92
- pipe.to("cuda")
93
-
94
- callback_on_step_end = None
95
- callback_on_step_end_tensor_inputs = None
96
-
97
- # Si se activa la opción, preparar latentes originales para preservar zonas sin máscara
98
- if preserve_unmasked:
99
- np_img = np.array(image).astype(np.float32) / 255.0
100
- img_t = torch.from_numpy(np_img).permute(2, 0, 1).unsqueeze(0).to(pipe.device)
101
- img_t = F.interpolate(img_t, size=(height, width), mode='bilinear', align_corners=False)
102
- img_t = (img_t * 2 - 1).to(dtype=pipe.vae.dtype)
103
-
104
- np_mask = np.array(mask).astype(np.float32) / 255.0
105
- mask_t = torch.from_numpy(np_mask).unsqueeze(0).unsqueeze(0).to(pipe.device)
106
- mask_t = F.interpolate(mask_t, size=(height, width), mode='nearest')
107
 
108
- with torch.no_grad():
109
- latents_orig = pipe.vae.encode(img_t).latent_dist.sample()
110
- scaling = getattr(pipe.vae.config, "scaling_factor", getattr(pipe, "vae_scale_factor", 0.13025))
111
- latents_orig = latents_orig * scaling
112
-
113
- # Ajustar máscara al tamaño de los latentes
114
- latent_height = latents_orig.shape[2]
115
- latent_width = latents_orig.shape[3]
116
- mask_t = F.interpolate(mask_t, size=(latent_height, latent_width), mode="nearest")
117
-
118
- def callback_on_step_end(pipe_self, i, t, callback_kwargs):
119
- latents = callback_kwargs.get("latents", None)
120
- if latents is not None:
121
- # Verificar que tengamos 4 dimensiones [batch, channels, height, width]
122
- if latents.dim() != 4:
123
- print(f"⚠️ Warning: latents has {latents.dim()} dimensions, expected 4")
124
- return callback_kwargs
125
-
126
- # Ajustar dinámicamente los tamaños al del tensor actual
127
- current_height = latents.shape[2]
128
- current_width = latents.shape[3]
129
-
130
- if mask_t.shape[-2:] != (current_height, current_width):
131
- resized_mask = F.interpolate(mask_t, size=(current_height, current_width), mode="nearest")
132
- else:
133
- resized_mask = mask_t
134
-
135
- if latents_orig.shape[-2:] != (current_height, current_width):
136
- resized_latents_orig = F.interpolate(latents_orig, size=(current_height, current_width), mode="nearest")
137
- else:
138
- resized_latents_orig = latents_orig
139
-
140
- # Mezclar solo en las áreas no enmascaradas
141
- latents = latents * resized_mask + resized_latents_orig * (1 - resized_mask)
142
- callback_kwargs["latents"] = latents
143
-
144
- return callback_kwargs
145
 
146
- callback_on_step_end_tensor_inputs = ["latents"]
147
-
148
- else:
149
- callback_on_step_end = None
150
- callback_on_step_end_tensor_inputs = None
151
 
152
- # Ejecutar pipeline
 
 
153
  result = pipe(
154
  image=image,
155
  mask_image=mask,
@@ -160,10 +115,23 @@ def inpaint(
160
  guidance_scale=guidance_scale,
161
  strength=strength,
162
  generator=torch.Generator(device="cuda").manual_seed(seed),
163
- callback_on_step_end=callback_on_step_end,
164
- callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
165
  ).images[0]
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  return result.convert("RGBA"), prompt, seed
168
 
169
  def inpaint_api(
 
84
  import torch
85
  import torch.nn.functional as F
86
  import numpy as np
87
+ from PIL import Image
88
 
89
  image = image.convert("RGB")
90
  mask = mask.convert("L")
91
  width, height = calculate_optimal_dimensions(image)
92
 
93
+ # Guardar imagen original redimensionada para el blending final
94
+ original_resized = image.resize((width, height), Image.Resampling.LANCZOS)
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Crear máscara suavizada para mejor blending
97
+ mask_resized = mask.resize((width, height), Image.Resampling.LANCZOS)
98
+ mask_array = np.array(mask_resized).astype(np.float32) / 255.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ # Aplicar un pequeño blur a la máscara para transiciones más suaves
101
+ from scipy.ndimage import gaussian_filter
102
+ mask_blurred = gaussian_filter(mask_array, sigma=2.0)
103
+ mask_blurred = np.clip(mask_blurred, 0, 1)
 
104
 
105
+ pipe.to("cuda")
106
+
107
+ # Ejecutar pipeline SIN callback (dejamos que el modelo haga su trabajo)
108
  result = pipe(
109
  image=image,
110
  mask_image=mask,
 
115
  guidance_scale=guidance_scale,
116
  strength=strength,
117
  generator=torch.Generator(device="cuda").manual_seed(seed),
 
 
118
  ).images[0]
119
 
120
+ # Si preserve_unmasked está activado, hacer blending en espacio de píxeles
121
+ if preserve_unmasked:
122
+ # Convertir a arrays numpy
123
+ result_array = np.array(result).astype(np.float32)
124
+ original_array = np.array(original_resized).astype(np.float32)
125
+
126
+ # Expandir máscara a 3 canales (RGB)
127
+ mask_3channel = np.stack([mask_blurred] * 3, axis=-1)
128
+
129
+ # Blending: donde mask=1 (blanco) usamos result, donde mask=0 (negro) usamos original
130
+ blended = result_array * mask_3channel + original_array * (1 - mask_3channel)
131
+
132
+ # Convertir de vuelta a imagen
133
+ result = Image.fromarray(blended.astype(np.uint8), mode='RGB')
134
+
135
  return result.convert("RGBA"), prompt, seed
136
 
137
  def inpaint_api(