Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- demo/MotionDirector_gradio.py +35 -12
- demo/motiondirector.py +17 -17
demo/MotionDirector_gradio.py
CHANGED
|
@@ -21,12 +21,14 @@ with gr.Blocks() as demo:
|
|
| 21 |
</a>
|
| 22 |
<div>
|
| 23 |
<h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
|
| 24 |
-
<h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned
|
|
|
|
| 25 |
</br>
|
| 26 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;>
|
| 27 |
-
<a href="https://arxiv.org/abs/2310.08465"
|
| 28 |
-
<a href=
|
| 29 |
-
<a href=
|
|
|
|
| 30 |
</div>
|
| 31 |
</div>
|
| 32 |
</div>
|
|
@@ -43,15 +45,24 @@ with gr.Blocks() as demo:
|
|
| 43 |
|
| 44 |
with gr.Row():
|
| 45 |
model_select = gr.Dropdown(
|
| 46 |
-
["1-1: [Cinematic Shots] --
|
| 47 |
"1-2: [Cinematic Shots] -- Zoom In",
|
| 48 |
"1-3: [Cinematic Shots] -- Zoom Out",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
"2-1: [Object Trajectory] -- Right to Left",
|
| 50 |
"2-2: [Object Trajectory] -- Left to Right",
|
| 51 |
"3-1: [Sports Concepts] -- Riding Bicycle",
|
| 52 |
"3-2: [Sports Concepts] -- Riding Horse",
|
| 53 |
"3-3: [Sports Concepts] -- Lifting Weights",
|
| 54 |
-
"3-4: [Sports Concepts] -- Playing Golf"
|
|
|
|
| 55 |
],
|
| 56 |
label="MotionDirector",
|
| 57 |
info="Which MotionDirector would you like to use!"
|
|
@@ -74,19 +85,31 @@ with gr.Blocks() as demo:
|
|
| 74 |
gr.Examples(
|
| 75 |
fn=motiondirector,
|
| 76 |
examples=[
|
| 77 |
-
["1-1: [Cinematic Shots] --
|
| 78 |
-
|
| 79 |
-
["1-
|
| 80 |
-
["
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
|
| 82 |
["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
|
| 83 |
["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
|
| 84 |
["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
|
| 85 |
-
["3-4: [Sports Concepts] -- Playing Golf", "A
|
|
|
|
| 86 |
],
|
| 87 |
inputs=[model_select, text_pormpt, random_seed],
|
| 88 |
outputs=generated_video,
|
| 89 |
)
|
| 90 |
|
| 91 |
demo.queue(max_size=15)
|
| 92 |
-
demo.launch(share=
|
|
|
|
| 21 |
</a>
|
| 22 |
<div>
|
| 23 |
<h1 >MotionDirector: Motion Customization of Text-to-Video Diffusion Models</h1>
|
| 24 |
+
<h5 style="margin: 0;">More MotionDirectors are on the way. Stay tuned 🔥!</h5>
|
| 25 |
+
<h5 style="margin: 0;"> If you like our project, please give us a star ✨ on Github for the latest update.</h5>
|
| 26 |
</br>
|
| 27 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;>
|
| 28 |
+
<a href="https://arxiv.org/abs/2310.08465"></a>
|
| 29 |
+
<a href="https://arxiv.org/abs/2310.08465"><img src="https://img.shields.io/badge/arXiv-2310.08465-b31b1b.svg"></a>
|
| 30 |
+
<a href="https://showlab.github.io/MotionDirector"><img src="https://img.shields.io/badge/Project_Page-MotionDirector-green"></a>
|
| 31 |
+
<a href="https://github.com/showlab/MotionDirector"><img src="https://img.shields.io/badge/Github-Code-blue"></a>
|
| 32 |
</div>
|
| 33 |
</div>
|
| 34 |
</div>
|
|
|
|
| 45 |
|
| 46 |
with gr.Row():
|
| 47 |
model_select = gr.Dropdown(
|
| 48 |
+
["1-1: [Cinematic Shots] -- Zoom Out",
|
| 49 |
"1-2: [Cinematic Shots] -- Zoom In",
|
| 50 |
"1-3: [Cinematic Shots] -- Zoom Out",
|
| 51 |
+
"1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1",
|
| 52 |
+
"1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2",
|
| 53 |
+
"1-5: [Cinematic Shots] -- Follow",
|
| 54 |
+
"1-6: [Cinematic Shots] -- Reverse Follow",
|
| 55 |
+
"1-7: [Cinematic Shots] -- Chest Transition",
|
| 56 |
+
"1-8: [Cinematic Shots] -- Mini Jib Reveal",
|
| 57 |
+
"1-9: [Cinematic Shots] -- Orbit",
|
| 58 |
+
"1-10: [Cinematic Shots] -- Pull Back",
|
| 59 |
"2-1: [Object Trajectory] -- Right to Left",
|
| 60 |
"2-2: [Object Trajectory] -- Left to Right",
|
| 61 |
"3-1: [Sports Concepts] -- Riding Bicycle",
|
| 62 |
"3-2: [Sports Concepts] -- Riding Horse",
|
| 63 |
"3-3: [Sports Concepts] -- Lifting Weights",
|
| 64 |
+
"3-4: [Sports Concepts] -- Playing Golf",
|
| 65 |
+
"3-5: [Sports Concepts] -- Skateboarding",
|
| 66 |
],
|
| 67 |
label="MotionDirector",
|
| 68 |
info="Which MotionDirector would you like to use!"
|
|
|
|
| 85 |
gr.Examples(
|
| 86 |
fn=motiondirector,
|
| 87 |
examples=[
|
| 88 |
+
["1-1: [Cinematic Shots] -- Zoom Out", "A spaceman standing on the moon captured with a zoom out.",
|
| 89 |
+
8323920],
|
| 90 |
+
["1-2: [Cinematic Shots] -- Zoom In", "A polar bear standing at the top of a snowy mountain captured with a zoom in.", 7938587],
|
| 91 |
+
["1-3: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 1", "A panda standing in front of an ancient Chinese temple captured with a dolly zoom.", 8238823],
|
| 92 |
+
["1-4: [Cinematic Shots] -- Dolly Zoom (Hitchcockian Zoom) 2", "A lion sitting on top of a cliff captured with a dolly zoom.", 1675932],
|
| 93 |
+
["1-5: [Cinematic Shots] -- Follow", "A fireman is walking through fire captured with a follow cinematic shot.", 2927089],
|
| 94 |
+
["1-6: [Cinematic Shots] -- Reverse Follow", "A fireman is walking through fire captured with a reverse follow cinematic shot.", 9759630],
|
| 95 |
+
["1-7: [Cinematic Shots] -- Chest Transition", "An ancient Roman soldier walks through the crowd on the street captured with a chest transition cinematic shot.", 3982271],
|
| 96 |
+
["1-8: [Cinematic Shots] -- Mini Jib Reveal",
|
| 97 |
+
"A British Redcoat soldier is walking through the mountains captured with a mini jib reveal cinematic shot.",
|
| 98 |
+
566917],
|
| 99 |
+
["1-9: [Cinematic Shots] -- Orbit", "A spaceman on the moon captured with an orbit cinematic shot.", 5899496],
|
| 100 |
+
["1-10: [Cinematic Shots] -- Pull Back", "A spaceman on the moon looking at a lunar rover captured with a pull back cinematic shot.",
|
| 101 |
+
5585865],
|
| 102 |
+
["2-1: [Object Trajectory] -- Right to Left", "A tank is running on the moon.", 2047046],
|
| 103 |
["2-2: [Object Trajectory] -- Left to Right", "A tiger is running in the forest.", 3463673],
|
| 104 |
["3-1: [Sports Concepts] -- Riding Bicycle", "An astronaut is riding a bicycle past the pyramids Mars 4K high quailty highly detailed.", 4422954],
|
| 105 |
["3-2: [Sports Concepts] -- Riding Horse", "A man riding an elephant through the jungle.", 6230765],
|
| 106 |
["3-3: [Sports Concepts] -- Lifting Weights", "A panda is lifting weights in a garden.", 1699276],
|
| 107 |
+
["3-4: [Sports Concepts] -- Playing Golf", "A monkey is playing golf on a field full of flowers.", 4156856],
|
| 108 |
+
["3-5: [Sports Concepts] -- Skateboarding", "An astronaut is skateboarding on Mars.", 6615212],
|
| 109 |
],
|
| 110 |
inputs=[model_select, text_pormpt, random_seed],
|
| 111 |
outputs=generated_video,
|
| 112 |
)
|
| 113 |
|
| 114 |
demo.queue(max_size=15)
|
| 115 |
+
demo.launch(share=True)
|
demo/motiondirector.py
CHANGED
|
@@ -85,11 +85,25 @@ def prepare_input_latents(
|
|
| 85 |
height: int,
|
| 86 |
width: int,
|
| 87 |
latents_path:str,
|
| 88 |
-
|
|
|
|
| 89 |
):
|
| 90 |
# initialize with random gaussian noise
|
| 91 |
scale = pipe.vae_scale_factor
|
| 92 |
shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
if noise_prior > 0.:
|
| 94 |
cached_latents = torch.load(latents_path)
|
| 95 |
if 'inversion_noise' not in cached_latents:
|
|
@@ -139,20 +153,6 @@ class MotionDirector():
|
|
| 139 |
latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
|
| 140 |
assert os.path.exists(lora_path)
|
| 141 |
|
| 142 |
-
if '1-' in model_select:
|
| 143 |
-
noise_prior = 0.3
|
| 144 |
-
elif '2-' in model_select:
|
| 145 |
-
noise_prior = 0.5
|
| 146 |
-
elif '3-' in model_select:
|
| 147 |
-
noise_prior = 0.
|
| 148 |
-
else:
|
| 149 |
-
noise_prior = 0.
|
| 150 |
-
|
| 151 |
-
if random_seed > 1000:
|
| 152 |
-
torch.manual_seed(random_seed)
|
| 153 |
-
else:
|
| 154 |
-
random_seed = random.randint(100, 10000000)
|
| 155 |
-
torch.manual_seed(random_seed)
|
| 156 |
device = "cuda"
|
| 157 |
with torch.autocast(device, dtype=torch.half):
|
| 158 |
# prepare input latents
|
|
@@ -164,7 +164,8 @@ class MotionDirector():
|
|
| 164 |
height=384,
|
| 165 |
width=384,
|
| 166 |
latents_path=latents_path,
|
| 167 |
-
|
|
|
|
| 168 |
)
|
| 169 |
video_frames = self.pipe(
|
| 170 |
prompt=text_pormpt,
|
|
@@ -177,7 +178,6 @@ class MotionDirector():
|
|
| 177 |
latents=init_latents
|
| 178 |
).frames
|
| 179 |
|
| 180 |
-
|
| 181 |
out_file = f"{out_name}_{random_seed}.mp4"
|
| 182 |
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
| 183 |
export_to_video(video_frames, out_file, 8)
|
|
|
|
| 85 |
height: int,
|
| 86 |
width: int,
|
| 87 |
latents_path:str,
|
| 88 |
+
model_select: str,
|
| 89 |
+
random_seed: int,
|
| 90 |
):
|
| 91 |
# initialize with random gaussian noise
|
| 92 |
scale = pipe.vae_scale_factor
|
| 93 |
shape = (batch_size, pipe.unet.config.in_channels, num_frames, height // scale, width // scale)
|
| 94 |
+
if random_seed > 1000:
|
| 95 |
+
torch.manual_seed(random_seed)
|
| 96 |
+
else:
|
| 97 |
+
random_seed = random.randint(100, 10000000)
|
| 98 |
+
torch.manual_seed(random_seed)
|
| 99 |
+
if '1-' in model_select:
|
| 100 |
+
noise_prior = 0.3
|
| 101 |
+
elif '2-' in model_select:
|
| 102 |
+
noise_prior = 0.5
|
| 103 |
+
elif '3-' in model_select:
|
| 104 |
+
noise_prior = 0.
|
| 105 |
+
else:
|
| 106 |
+
noise_prior = 0.
|
| 107 |
if noise_prior > 0.:
|
| 108 |
cached_latents = torch.load(latents_path)
|
| 109 |
if 'inversion_noise' not in cached_latents:
|
|
|
|
| 153 |
latents_path = f"{latents_folder}/{random.choice(os.listdir(latents_folder))}"
|
| 154 |
assert os.path.exists(lora_path)
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
device = "cuda"
|
| 157 |
with torch.autocast(device, dtype=torch.half):
|
| 158 |
# prepare input latents
|
|
|
|
| 164 |
height=384,
|
| 165 |
width=384,
|
| 166 |
latents_path=latents_path,
|
| 167 |
+
model_select=model_select,
|
| 168 |
+
random_seed=random_seed
|
| 169 |
)
|
| 170 |
video_frames = self.pipe(
|
| 171 |
prompt=text_pormpt,
|
|
|
|
| 178 |
latents=init_latents
|
| 179 |
).frames
|
| 180 |
|
|
|
|
| 181 |
out_file = f"{out_name}_{random_seed}.mp4"
|
| 182 |
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
| 183 |
export_to_video(video_frames, out_file, 8)
|