Spaces:

shi-labs
/

Versatile-Diffusion

Runtime error

App Files Files Community

3v324v23 commited on Feb 7, 2023

Commit

d5cd13d

1 Parent(s): 3c9b64a

new app

Browse files

Files changed (5) hide show

.gitignore +2 -0
app.py +46 -15
configs/model/optimus.yaml +2 -1
lib/model_zoo/common/get_model.py +8 -21
requirements.txt +1 -1

.gitignore CHANGED Viewed

@@ -7,3 +7,5 @@ log/
 log
 pretrained/
 pretrained

 log
 pretrained/
 pretrained
+gradio_cached_examples/
+gradio_cached_examples

app.py CHANGED Viewed

@@ -252,10 +252,6 @@ class vd_inference(object):
             assert False, 'Model type not supported'
         net = get_model()(cfgm)
-        if self.which == 'v1.0':
-            sd = torch.load('pretrained/vd-four-flow-v1-0.pth', map_location='cpu')
-        net.load_state_dict(sd, strict=False)
         if fp16:
             highlight_print('Running in FP16')
             if self.which == 'v1.0':
@@ -266,6 +262,20 @@ class vd_inference(object):
         else:
             self.dtype = torch.float32
         self.use_cuda = torch.cuda.is_available()
         if self.use_cuda:
             net.to('cuda')
@@ -855,9 +865,11 @@ def tcg_interface(with_example=False):
             cache_examples=cache_examples, )
     gr.HTML('<br><p id=myinst>&nbsp How to add mask: Please see the following instructions.</p><br>'+
-            '<img src="file/assets/demo/misc/mask_inst1.gif" style="float:left;max-width:450px;">'+
-            '<img src="file/assets/demo/misc/mask_inst2.gif" style="float:left;max-width:450px;">'+
-            '<img src="file/assets/demo/misc/mask_inst3.gif" style="float:left;max-width:450px;">',)
 def mcg_interface(with_example=False):
     num_img_input = 4
@@ -917,9 +929,11 @@ def mcg_interface(with_example=False):
             cache_examples=cache_examples, )
     gr.HTML('<br><p id=myinst>&nbsp How to add mask: Please see the following instructions.</p><br>'+
-            '<img src="file/assets/demo/misc/mask_inst1.gif" style="float:left;max-width:450px;">'+
-            '<img src="file/assets/demo/misc/mask_inst2.gif" style="float:left;max-width:450px;">'+
-            '<img src="file/assets/demo/misc/mask_inst3.gif" style="float:left;max-width:450px;">',)
 ###########
 # Example #
@@ -1017,6 +1031,21 @@ css = """
         margin: 0rem;
         color: #6B7280;
     }
 """
 if True:
@@ -1025,7 +1054,7 @@ if True:
             """
             <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
             <h1 style="font-weight: 900; font-size: 3rem; margin: 0rem">
-                Versatile Diffusion{}
             </h1>
             <h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
             We built <b>Versatile Diffusion (VD), the first unified multi-flow multimodal diffusion framework</b>, as a step towards <b>Universal Generative AI</b>.
@@ -1041,8 +1070,7 @@ if True:
             [<a href="https://github.com/SHI-Labs/Versatile-Diffusion" style="color:blue;">GitHub</a>]
             </h3>
             </div>
-            """.format(' '+vd_inference.which))
-            # .format('')) #
         with gr.Tab('Text-to-Image'):
             t2i_interface(with_example=True)
@@ -1061,7 +1089,10 @@ if True:
         gr.HTML(
             """
-            <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
             <h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
             <b>Caution</b>:
             We would like the raise the awareness of users of this demo of its potential issues and concerns.
@@ -1077,7 +1108,7 @@ if True:
             VD in this demo is meant only for research purposes.
             </h3>
             </div>
-            """)
     demo.launch(share=True)
     # demo.launch(debug=True)

             assert False, 'Model type not supported'
         net = get_model()(cfgm)
         if fp16:
             highlight_print('Running in FP16')
             if self.which == 'v1.0':
         else:
             self.dtype = torch.float32
+        if self.which == 'v1.0':
+            # if fp16:
+            #     sd = torch.load('pretrained/vd-four-flow-v1-0-fp16.pth', map_location='cpu')
+            # else:
+            #     sd = torch.load('pretrained/vd-four-flow-v1-0.pth', map_location='cpu')
+            from huggingface_hub import hf_hub_download
+            if fp16:
+                temppath = hf_hub_download('shi-labs/versatile-diffusion-model', 'pretrained_pth/vd-four-flow-v1-0-fp16.pth')
+            else:
+                temppath = hf_hub_download('shi-labs/versatile-diffusion-model', 'pretrained_pth/vd-four-flow-v1-0.pth')
+            sd = torch.load(temppath, map_location='cpu')
+        net.load_state_dict(sd, strict=False)
         self.use_cuda = torch.cuda.is_available()
         if self.use_cuda:
             net.to('cuda')
             cache_examples=cache_examples, )
     gr.HTML('<br><p id=myinst>&nbsp How to add mask: Please see the following instructions.</p><br>'+
+            '<div id="maskinst">'+
+                '<img src="file/assets/demo/misc/mask_inst1.gif">'+
+                '<img src="file/assets/demo/misc/mask_inst2.gif">'+
+                '<img src="file/assets/demo/misc/mask_inst3.gif">'+
+            '</div>')
 def mcg_interface(with_example=False):
     num_img_input = 4
             cache_examples=cache_examples, )
     gr.HTML('<br><p id=myinst>&nbsp How to add mask: Please see the following instructions.</p><br>'+
+            '<div id="maskinst">'+
+                '<img src="file/assets/demo/misc/mask_inst1.gif">'+
+                '<img src="file/assets/demo/misc/mask_inst2.gif">'+
+                '<img src="file/assets/demo/misc/mask_inst3.gif">'+
+            '</div>')
 ###########
 # Example #
         margin: 0rem;
         color: #6B7280;
     }
+    #maskinst {
+        text-align: justify;
+        min-width: 1200px;
+    }
+    #maskinst>img {
+        min-width:399px;
+        max-width:450px;
+        vertical-align: top;
+        display: inline-block;
+    }
+    #maskinst:after {
+        content: "";
+        width: 100%;
+        display: inline-block;
+    }
 """
 if True:
             """
             <div style="text-align: center; max-width: 1200px; margin: 20px auto;">
             <h1 style="font-weight: 900; font-size: 3rem; margin: 0rem">
+                Versatile Diffusion
             </h1>
             <h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
             We built <b>Versatile Diffusion (VD), the first unified multi-flow multimodal diffusion framework</b>, as a step towards <b>Universal Generative AI</b>.
             [<a href="https://github.com/SHI-Labs/Versatile-Diffusion" style="color:blue;">GitHub</a>]
             </h3>
             </div>
+            """)
         with gr.Tab('Text-to-Image'):
             t2i_interface(with_example=True)
         gr.HTML(
             """
+            <div style="text-align: justify; max-width: 1200px; margin: 20px auto;">
+            <h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
+            <b>Version</b>: {}
+            </h3>
             <h3 style="font-weight: 450; font-size: 0.8rem; margin: 0rem">
             <b>Caution</b>:
             We would like the raise the awareness of users of this demo of its potential issues and concerns.
             VD in this demo is meant only for research purposes.
             </h3>
             </div>
+            """.format(' '+vd_inference.which))
     demo.launch(share=True)
     # demo.launch(debug=True)

configs/model/optimus.yaml CHANGED Viewed

@@ -92,7 +92,6 @@ optimus_gpt2_tokenizer:
 optimus_v1:
   super_cfg: optimus
   type: optimus_vae_next
-  pth: pretrained/optimus-vae.pth
   args:
     encoder: MODEL(optimus_bert_encoder)
     decoder: MODEL(optimus_gpt2_decoder)
@@ -100,3 +99,5 @@ optimus_v1:
     tokenizer_decoder: MODEL(optimus_gpt2_tokenizer)
     args:
       latent_size: 768

 optimus_v1:
   super_cfg: optimus
   type: optimus_vae_next
   args:
     encoder: MODEL(optimus_bert_encoder)
     decoder: MODEL(optimus_gpt2_decoder)
     tokenizer_decoder: MODEL(optimus_gpt2_tokenizer)
     args:
       latent_size: 768
+  # pth: pretrained/optimus-vae.pth
+  hfm: ['shi-labs/versatile-diffusion-model', 'pretrained_pth/optimus-vae.pth']

lib/model_zoo/common/get_model.py CHANGED Viewed

@@ -8,27 +8,6 @@ from .utils import \
     get_total_param, get_total_param_sum, \
     get_unit
-# def load_state_dict(net, model_path):
-#     if isinstance(net, dict):
-#         for ni, neti in net.items():
-#             paras = torch.load(model_path[ni], map_location=torch.device('cpu'))
-#             new_paras = neti.state_dict()
-#             new_paras.update(paras)
-#             neti.load_state_dict(new_paras)
-#     else:
-#         paras = torch.load(model_path, map_location=torch.device('cpu'))
-#         new_paras = net.state_dict()
-#         new_paras.update(paras)
-#         net.load_state_dict(new_paras)
-#     return
-# def save_state_dict(net, path):
-#     if isinstance(net, (torch.nn.DataParallel,
-#                         torch.nn.parallel.DistributedDataParallel)):
-#         torch.save(net.module.state_dict(), path)
-#     else:
-#         torch.save(net.state_dict(), path)
 def singleton(class_):
     instances = {}
     def getinstance(*args, **kwargs):
@@ -94,6 +73,14 @@ class get_model(object):
             net.load_state_dict(sd, strict=strict_sd)
             if verbose:
                 print_log('Load pth from {}'.format(cfg.pth))
         # display param_num & param_sum
         if verbose:

     get_total_param, get_total_param_sum, \
     get_unit
 def singleton(class_):
     instances = {}
     def getinstance(*args, **kwargs):
             net.load_state_dict(sd, strict=strict_sd)
             if verbose:
                 print_log('Load pth from {}'.format(cfg.pth))
+        elif 'hfm' in cfg:
+            from huggingface_hub import hf_hub_download
+            temppath = hf_hub_download(cfg.hfm[0], cfg.hfm[1])
+            sd = torch.load(temppath, map_location='cpu')
+            strict_sd = cfg.get('strict_sd', True)
+            net.load_state_dict(sd, strict=strict_sd)
+            if verbose:
+                print_log('Load hfm from {}/{}'.format(*cfg.hfm))
         # display param_num & param_sum
         if verbose:

requirements.txt CHANGED Viewed

@@ -12,5 +12,5 @@ torchmetrics==0.7.3
 einops==0.3.0
 omegaconf==2.1.1
-huggingface-hub==0.10.1
 gradio==3.17.1

 einops==0.3.0
 omegaconf==2.1.1
+huggingface-hub==0.11.1
 gradio==3.17.1