Spaces:
Running
on
Zero
Running
on
Zero
fix temp files cache and move to ZeroGPU
Browse files
app.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import random
|
| 3 |
import torch
|
| 4 |
-
import os
|
| 5 |
from torch import inference_mode
|
| 6 |
-
from tempfile import NamedTemporaryFile
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
from models import load_model
|
| 9 |
import utils
|
|
|
|
| 10 |
from inversion_utils import inversion_forward_process, inversion_reverse_process
|
| 11 |
|
| 12 |
|
|
@@ -31,7 +37,7 @@ def randomize_seed_fn(seed, randomize_seed):
|
|
| 31 |
|
| 32 |
|
| 33 |
def invert(ldm_stable, x0, prompt_src, num_diffusion_steps, cfg_scale_src): # , ldm_stable):
|
| 34 |
-
ldm_stable.model.scheduler.set_timesteps(num_diffusion_steps, device=device)
|
| 35 |
|
| 36 |
with inference_mode():
|
| 37 |
w0 = ldm_stable.vae_encode(x0)
|
|
@@ -67,21 +73,22 @@ def sample(ldm_stable, zs, wts, steps, prompt_tar, tstart, cfg_scale_tar): # ,
|
|
| 67 |
|
| 68 |
return (16000, audio.squeeze().cpu().numpy())
|
| 69 |
|
| 70 |
-
|
| 71 |
-
def edit(
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
print(model_id)
|
| 87 |
if model_id == LDM2:
|
|
@@ -89,7 +96,9 @@ def edit(cache_dir,
|
|
| 89 |
elif model_id == LDM2_LARGE:
|
| 90 |
ldm_stable = ldm2_large
|
| 91 |
else: # MUSIC
|
| 92 |
-
|
|
|
|
|
|
|
| 93 |
|
| 94 |
# If the inversion was done for a different model, we need to re-run the inversion
|
| 95 |
if not do_inversion and (saved_inv_model is None or saved_inv_model != model_id):
|
|
@@ -99,29 +108,35 @@ def edit(cache_dir,
|
|
| 99 |
raise gr.Error('Input audio missing!')
|
| 100 |
x0 = utils.load_audio(input_audio, ldm_stable.get_fn_STFT(), device=device)
|
| 101 |
|
| 102 |
-
if not (do_inversion or randomize_seed):
|
| 103 |
-
if not os.path.exists(wtszs_file):
|
| 104 |
-
do_inversion = True
|
| 105 |
# Too much time has passed
|
|
|
|
|
|
|
| 106 |
|
| 107 |
if do_inversion or randomize_seed: # always re-run inversion
|
| 108 |
zs_tensor, wts_tensor = invert(ldm_stable=ldm_stable, x0=x0, prompt_src=source_prompt,
|
| 109 |
num_diffusion_steps=steps,
|
| 110 |
cfg_scale_src=cfg_scale_src)
|
| 111 |
-
f = NamedTemporaryFile("wb", dir=cache_dir, suffix=".pth", delete=False)
|
| 112 |
-
torch.save({'wts': wts_tensor, 'zs': zs_tensor}, f.name)
|
| 113 |
-
wtszs_file = f.name
|
| 114 |
# wtszs_file = gr.State(value=f.name)
|
| 115 |
# wts = gr.State(value=wts_tensor)
|
|
|
|
|
|
|
| 116 |
# zs = gr.State(value=zs_tensor)
|
| 117 |
# demo.move_resource_to_block_cache(f.name)
|
| 118 |
saved_inv_model = model_id
|
| 119 |
do_inversion = False
|
| 120 |
else:
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
| 126 |
# make sure t_start is in the right limit
|
| 127 |
# t_start = change_tstart_range(t_start, steps)
|
|
@@ -129,7 +144,8 @@ def edit(cache_dir,
|
|
| 129 |
output = sample(ldm_stable, zs_tensor, wts_tensor, steps, prompt_tar=target_prompt,
|
| 130 |
tstart=int(t_start / 100 * steps), cfg_scale_tar=cfg_scale_tar)
|
| 131 |
|
| 132 |
-
return output,
|
|
|
|
| 133 |
|
| 134 |
|
| 135 |
def get_example():
|
|
@@ -208,7 +224,7 @@ change <code style="display:inline; background-color: lightgrey; ">duration = mi
|
|
| 208 |
|
| 209 |
"""
|
| 210 |
|
| 211 |
-
with gr.Blocks(css='style.css'
|
| 212 |
def reset_do_inversion(do_inversion_user, do_inversion):
|
| 213 |
# do_inversion = gr.State(value=True)
|
| 214 |
do_inversion = True
|
|
@@ -219,18 +235,18 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
| 219 |
def clear_do_inversion_user(do_inversion_user):
|
| 220 |
do_inversion_user = False
|
| 221 |
return do_inversion_user
|
|
|
|
| 222 |
def post_match_do_inversion(do_inversion_user, do_inversion):
|
| 223 |
if do_inversion_user:
|
| 224 |
do_inversion = True
|
| 225 |
do_inversion_user = False
|
| 226 |
return do_inversion_user, do_inversion
|
| 227 |
|
| 228 |
-
|
| 229 |
gr.HTML(intro)
|
| 230 |
-
|
| 231 |
-
|
| 232 |
wtszs = gr.State()
|
| 233 |
-
cache_dir = gr.State(demo.GRADIO_CACHE)
|
| 234 |
saved_inv_model = gr.State()
|
| 235 |
# current_loaded_model = gr.State(value="cvssp/audioldm2-music")
|
| 236 |
# ldm_stable = load_model("cvssp/audioldm2-music", device, 200)
|
|
@@ -293,13 +309,13 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
| 293 |
outputs=[seed], queue=False).then(
|
| 294 |
fn=clear_do_inversion_user, inputs=[do_inversion_user], outputs=[do_inversion_user]).then(
|
| 295 |
fn=edit,
|
| 296 |
-
inputs=[cache_dir,
|
| 297 |
input_audio,
|
| 298 |
model_id,
|
| 299 |
do_inversion,
|
| 300 |
# current_loaded_model, ldm_stable,
|
| 301 |
-
|
| 302 |
-
|
| 303 |
saved_inv_model,
|
| 304 |
src_prompt,
|
| 305 |
tar_prompt,
|
|
@@ -309,7 +325,7 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
| 309 |
t_start,
|
| 310 |
randomize_seed
|
| 311 |
],
|
| 312 |
-
outputs=[output_audio, wtszs,
|
| 313 |
saved_inv_model, do_inversion] # , current_loaded_model, ldm_stable],
|
| 314 |
).then(post_match_do_inversion, inputs=[do_inversion_user, do_inversion], outputs=[do_inversion_user, do_inversion]
|
| 315 |
).then(lambda x: (demo.temp_file_sets.append(set([str(gr.utils.abspath(x))])) if type(x) is str else None),
|
|
@@ -332,4 +348,4 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
| 332 |
)
|
| 333 |
|
| 334 |
demo.queue()
|
| 335 |
-
demo.launch()
|
|
|
|
| 1 |
+
# Will be fixed soon, but meanwhile:
|
| 2 |
+
import os
|
| 3 |
+
if os.getenv('SPACES_ZERO_GPU') == "true":
|
| 4 |
+
os.environ['SPACES_ZERO_GPU'] = "1"
|
| 5 |
+
|
| 6 |
import gradio as gr
|
| 7 |
import random
|
| 8 |
import torch
|
|
|
|
| 9 |
from torch import inference_mode
|
| 10 |
+
# from tempfile import NamedTemporaryFile
|
| 11 |
+
from typing import Optional
|
| 12 |
import numpy as np
|
| 13 |
from models import load_model
|
| 14 |
import utils
|
| 15 |
+
import spaces
|
| 16 |
from inversion_utils import inversion_forward_process, inversion_reverse_process
|
| 17 |
|
| 18 |
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
def invert(ldm_stable, x0, prompt_src, num_diffusion_steps, cfg_scale_src): # , ldm_stable):
|
| 40 |
+
# ldm_stable.model.scheduler.set_timesteps(num_diffusion_steps, device=device)
|
| 41 |
|
| 42 |
with inference_mode():
|
| 43 |
w0 = ldm_stable.vae_encode(x0)
|
|
|
|
| 73 |
|
| 74 |
return (16000, audio.squeeze().cpu().numpy())
|
| 75 |
|
| 76 |
+
@spaces.GPU
|
| 77 |
+
def edit(
|
| 78 |
+
# cache_dir,
|
| 79 |
+
input_audio,
|
| 80 |
+
model_id: str,
|
| 81 |
+
do_inversion: bool,
|
| 82 |
+
# wtszs_file: str,
|
| 83 |
+
wts: Optional[torch.Tensor], zs: Optional[torch.Tensor],
|
| 84 |
+
saved_inv_model: str,
|
| 85 |
+
source_prompt="",
|
| 86 |
+
target_prompt="",
|
| 87 |
+
steps=200,
|
| 88 |
+
cfg_scale_src=3.5,
|
| 89 |
+
cfg_scale_tar=12,
|
| 90 |
+
t_start=45,
|
| 91 |
+
randomize_seed=True):
|
| 92 |
|
| 93 |
print(model_id)
|
| 94 |
if model_id == LDM2:
|
|
|
|
| 96 |
elif model_id == LDM2_LARGE:
|
| 97 |
ldm_stable = ldm2_large
|
| 98 |
else: # MUSIC
|
| 99 |
+
ldm_stable = ldm2_music
|
| 100 |
+
|
| 101 |
+
ldm_stable.model.scheduler.set_timesteps(steps, device=device)
|
| 102 |
|
| 103 |
# If the inversion was done for a different model, we need to re-run the inversion
|
| 104 |
if not do_inversion and (saved_inv_model is None or saved_inv_model != model_id):
|
|
|
|
| 108 |
raise gr.Error('Input audio missing!')
|
| 109 |
x0 = utils.load_audio(input_audio, ldm_stable.get_fn_STFT(), device=device)
|
| 110 |
|
| 111 |
+
# if not (do_inversion or randomize_seed):
|
| 112 |
+
# if not os.path.exists(wtszs_file):
|
| 113 |
+
# do_inversion = True
|
| 114 |
# Too much time has passed
|
| 115 |
+
if wts is None or zs is None:
|
| 116 |
+
do_inversion = True
|
| 117 |
|
| 118 |
if do_inversion or randomize_seed: # always re-run inversion
|
| 119 |
zs_tensor, wts_tensor = invert(ldm_stable=ldm_stable, x0=x0, prompt_src=source_prompt,
|
| 120 |
num_diffusion_steps=steps,
|
| 121 |
cfg_scale_src=cfg_scale_src)
|
| 122 |
+
# f = NamedTemporaryFile("wb", dir=cache_dir, suffix=".pth", delete=False)
|
| 123 |
+
# torch.save({'wts': wts_tensor, 'zs': zs_tensor}, f.name)
|
| 124 |
+
# wtszs_file = f.name
|
| 125 |
# wtszs_file = gr.State(value=f.name)
|
| 126 |
# wts = gr.State(value=wts_tensor)
|
| 127 |
+
wts = wts_tensor
|
| 128 |
+
zs = zs_tensor
|
| 129 |
# zs = gr.State(value=zs_tensor)
|
| 130 |
# demo.move_resource_to_block_cache(f.name)
|
| 131 |
saved_inv_model = model_id
|
| 132 |
do_inversion = False
|
| 133 |
else:
|
| 134 |
+
# wtszs = torch.load(wtszs_file, map_location=device)
|
| 135 |
+
# # wtszs = torch.load(wtszs_file.f, map_location=device)
|
| 136 |
+
# wts_tensor = wtszs['wts']
|
| 137 |
+
# zs_tensor = wtszs['zs']
|
| 138 |
+
wts_tensor = wts.to(device)
|
| 139 |
+
zs_tensor = zs.to(device)
|
| 140 |
|
| 141 |
# make sure t_start is in the right limit
|
| 142 |
# t_start = change_tstart_range(t_start, steps)
|
|
|
|
| 144 |
output = sample(ldm_stable, zs_tensor, wts_tensor, steps, prompt_tar=target_prompt,
|
| 145 |
tstart=int(t_start / 100 * steps), cfg_scale_tar=cfg_scale_tar)
|
| 146 |
|
| 147 |
+
return output, wts.cpu(), zs.cpu(), saved_inv_model, do_inversion
|
| 148 |
+
# return output, wtszs_file, saved_inv_model, do_inversion
|
| 149 |
|
| 150 |
|
| 151 |
def get_example():
|
|
|
|
| 224 |
|
| 225 |
"""
|
| 226 |
|
| 227 |
+
with gr.Blocks(css='style.css') as demo: #, delete_cache=(3600, 3600)) as demo:
|
| 228 |
def reset_do_inversion(do_inversion_user, do_inversion):
|
| 229 |
# do_inversion = gr.State(value=True)
|
| 230 |
do_inversion = True
|
|
|
|
| 235 |
def clear_do_inversion_user(do_inversion_user):
|
| 236 |
do_inversion_user = False
|
| 237 |
return do_inversion_user
|
| 238 |
+
|
| 239 |
def post_match_do_inversion(do_inversion_user, do_inversion):
|
| 240 |
if do_inversion_user:
|
| 241 |
do_inversion = True
|
| 242 |
do_inversion_user = False
|
| 243 |
return do_inversion_user, do_inversion
|
| 244 |
|
|
|
|
| 245 |
gr.HTML(intro)
|
| 246 |
+
wts = gr.State()
|
| 247 |
+
zs = gr.State()
|
| 248 |
wtszs = gr.State()
|
| 249 |
+
# cache_dir = gr.State(demo.GRADIO_CACHE)
|
| 250 |
saved_inv_model = gr.State()
|
| 251 |
# current_loaded_model = gr.State(value="cvssp/audioldm2-music")
|
| 252 |
# ldm_stable = load_model("cvssp/audioldm2-music", device, 200)
|
|
|
|
| 309 |
outputs=[seed], queue=False).then(
|
| 310 |
fn=clear_do_inversion_user, inputs=[do_inversion_user], outputs=[do_inversion_user]).then(
|
| 311 |
fn=edit,
|
| 312 |
+
inputs=[#cache_dir,
|
| 313 |
input_audio,
|
| 314 |
model_id,
|
| 315 |
do_inversion,
|
| 316 |
# current_loaded_model, ldm_stable,
|
| 317 |
+
wts, zs,
|
| 318 |
+
# wtszs,
|
| 319 |
saved_inv_model,
|
| 320 |
src_prompt,
|
| 321 |
tar_prompt,
|
|
|
|
| 325 |
t_start,
|
| 326 |
randomize_seed
|
| 327 |
],
|
| 328 |
+
outputs=[output_audio, wts, zs, # wtszs,
|
| 329 |
saved_inv_model, do_inversion] # , current_loaded_model, ldm_stable],
|
| 330 |
).then(post_match_do_inversion, inputs=[do_inversion_user, do_inversion], outputs=[do_inversion_user, do_inversion]
|
| 331 |
).then(lambda x: (demo.temp_file_sets.append(set([str(gr.utils.abspath(x))])) if type(x) is str else None),
|
|
|
|
| 348 |
)
|
| 349 |
|
| 350 |
demo.queue()
|
| 351 |
+
demo.launch(state_session_capacity=15)
|