rich-text-to-image

Runtime error

App Files Files Community

Songwei Ge commited on Mar 27, 2023

Commit

ab7db7f

1 Parent(s): b41079f

demo

Browse files

Files changed (2) hide show

app.py +4 -22
utils/attention_utils.py +11 -22

app.py CHANGED Viewed

@@ -23,25 +23,6 @@ Instructions placeholder.
 """
-example_instructions = [
-    "Make it a picasso painting",
-    "as if it were by modigliani",
-    "convert to a bronze statue",
-    "Turn it into an anime.",
-    "have it look like a graphic novel",
-    "make him gain weight",
-    "what would he look like bald?",
-    "Have him smile",
-    "Put him in a cocktail party.",
-    "move him at the beach.",
-    "add dramatic lighting",
-    "Convert to black and white",
-    "What if it were snowing?",
-    "Give him a leather jacket",
-    "Turn him into a cyborg!",
-    "make him wear a beanie",
-]
 def main():
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = RegionDiffusion(device)
@@ -90,9 +71,9 @@ def main():
                                             height=height, width=width, num_inference_steps=steps,
                                             guidance_scale=guidance_weight)
         print('time lapses to get attention maps: %.4f' % (time.time()-begin_time))
-        color_obj_masks = get_token_maps(
             model.attention_maps, run_dir, width//8, height//8, color_target_token_ids, seed)
-        model.masks = get_token_maps(
             model.attention_maps, run_dir, width//8, height//8, region_target_token_ids, seed, base_tokens)
         color_obj_masks = [transforms.functional.resize(color_obj_mask, (height, width),
                                                         interpolation=transforms.InterpolationMode.BICUBIC,
@@ -110,7 +91,8 @@ def main():
                                     text_format_dict=text_format_dict)
         print('time lapses to generate image from rich text: %.4f' %
             (time.time()-begin_time))
-        return [plain_img[0], rich_img[0]]
     with gr.Blocks() as demo:
         gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">Expressive Text-to-Image Generation with Rich Text</h1>

 """
 def main():
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model = RegionDiffusion(device)
                                             height=height, width=width, num_inference_steps=steps,
                                             guidance_scale=guidance_weight)
         print('time lapses to get attention maps: %.4f' % (time.time()-begin_time))
+        color_obj_masks, _ = get_token_maps(
             model.attention_maps, run_dir, width//8, height//8, color_target_token_ids, seed)
+        model.masks, token_maps = get_token_maps(
             model.attention_maps, run_dir, width//8, height//8, region_target_token_ids, seed, base_tokens)
         color_obj_masks = [transforms.functional.resize(color_obj_mask, (height, width),
                                                         interpolation=transforms.InterpolationMode.BICUBIC,
                                     text_format_dict=text_format_dict)
         print('time lapses to generate image from rich text: %.4f' %
             (time.time()-begin_time))
+        cat_img = np.concatenate([plain_img[0], rich_img[0]], 1)
+        return [cat_img, token_maps]
     with gr.Blocks() as demo:
         gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">Expressive Text-to-Image Generation with Rich Text</h1>

utils/attention_utils.py CHANGED Viewed

@@ -76,15 +76,19 @@ def plot_attention_maps(atten_map_list, obj_tokens, save_dir, seed, tokens_vis=N
         norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
         sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
         fig.colorbar(sm, cax=axs[-1])
         fig.tight_layout()
         plt.savefig(os.path.join(
             save_dir, 'token_mapes_seed%d_%s.png' % (seed, atten_names[i])), dpi=100)
         plt.close('all')
-def get_token_maps(attention_maps, save_dir, width, height, obj_tokens, seed=0, tokens_vis=None,
-                   preprocess=False):
     r"""Function to visualize attention maps.
     Args:
         save_dir (str): Path to save attention maps
@@ -177,23 +181,8 @@ def get_token_maps(attention_maps, save_dir, width, height, obj_tokens, seed=0,
     attention_maps_averaged_normalized = [
         attention_maps_averaged_normalized[i:i+1] for i in range(attention_maps_averaged_normalized.shape[0])]
-    if preprocess:
-        # it is possible to preprocess the attention maps here
-        selem = square(5)
-        attention_maps_averaged_eroded = [erosion(skimage.img_as_float(
-            map[0].numpy()*255), selem) for map in attention_maps_averaged_normalized[:2]]
-        attention_maps_averaged_eroded = [(torch.from_numpy(map).unsqueeze(
-            0)/255. > 0.8).float() for map in attention_maps_averaged_eroded]
-        attention_maps_averaged_eroded.append(
-            1 - torch.cat(attention_maps_averaged_eroded).sum(0, True))
-        plot_attention_maps([attention_maps_averaged, attention_maps_averaged_normalized,
-                            attention_maps_averaged_eroded], obj_tokens, save_dir, seed, tokens_vis)
-        attention_maps_averaged_eroded = [attn_mask.unsqueeze(1).repeat(
-            [1, 4, 1, 1]).cuda() for attn_mask in attention_maps_averaged_eroded]
-        return attention_maps_averaged_eroded
-    else:
-        plot_attention_maps([attention_maps_averaged, attention_maps_averaged_normalized],
-                            obj_tokens, save_dir, seed, tokens_vis)
-        attention_maps_averaged_normalized = [attn_mask.unsqueeze(1).repeat(
-            [1, 4, 1, 1]).cuda() for attn_mask in attention_maps_averaged_normalized]
-        return attention_maps_averaged_normalized

         norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
         sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
         fig.colorbar(sm, cax=axs[-1])
+        canvas = fig.canvas
+        canvas.draw()
+        width, height = canvas.get_width_height()
+        img = np.frombuffer(canvas.tostring_rgb(), dtype='uint8').reshape((height, width, 3))
         fig.tight_layout()
         plt.savefig(os.path.join(
             save_dir, 'token_mapes_seed%d_%s.png' % (seed, atten_names[i])), dpi=100)
         plt.close('all')
+    return img
+def get_token_maps(attention_maps, save_dir, width, height, obj_tokens, seed=0, tokens_vis=None):
     r"""Function to visualize attention maps.
     Args:
         save_dir (str): Path to save attention maps
     attention_maps_averaged_normalized = [
         attention_maps_averaged_normalized[i:i+1] for i in range(attention_maps_averaged_normalized.shape[0])]
+    token_maps_vis = plot_attention_maps([attention_maps_averaged, attention_maps_averaged_normalized],
+                        obj_tokens, save_dir, seed, tokens_vis)
+    attention_maps_averaged_normalized = [attn_mask.unsqueeze(1).repeat(
+        [1, 4, 1, 1]).cuda() for attn_mask in attention_maps_averaged_normalized]
+    return attention_maps_averaged_normalized, token_maps_vis