Spaces:
Configuration error
Configuration error
Commit
·
836b387
1
Parent(s):
3a89a07
huggingface app
Browse files- .gitignore +2 -1
- __pycache__/example.cpython-310.pyc +0 -0
- __pycache__/merge_config_gradio.cpython-310.pyc +0 -0
- __pycache__/test.cpython-310.pyc +0 -0
- app.py +302 -0
- assets/teaser/run_two_man.mp4 +0 -0
- config/demo_config.yaml +2 -2
- example.py +19 -0
- video_diffusion/data/__pycache__/dataset.cpython-310.pyc +0 -0
- video_diffusion/pipelines/__pycache__/ddim_spatial_temporal.cpython-310.pyc +0 -0
- webui/__pycache__/merge_config_gradio.cpython-310.pyc +0 -0
- webui/merge_config_gradio.py +112 -0
.gitignore
CHANGED
|
@@ -3,4 +3,5 @@ annotator/annotator_ckpts.tar.gz
|
|
| 3 |
result/**
|
| 4 |
trash/**
|
| 5 |
data/**
|
| 6 |
-
videograin_data.tar.gz
|
|
|
|
|
|
| 3 |
result/**
|
| 4 |
trash/**
|
| 5 |
data/**
|
| 6 |
+
videograin_data.tar.gz
|
| 7 |
+
off_app.sh
|
__pycache__/example.cpython-310.pyc
ADDED
|
Binary file (534 Bytes). View file
|
|
|
__pycache__/merge_config_gradio.cpython-310.pyc
ADDED
|
Binary file (2.93 kB). View file
|
|
|
__pycache__/test.cpython-310.pyc
ADDED
|
Binary file (10 kB). View file
|
|
|
app.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
from webui.merge_config_gradio import merge_config_then_run
|
| 10 |
+
|
| 11 |
+
import huggingface_hub
|
| 12 |
+
import shutil
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 17 |
+
pipe = merge_config_then_run()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
ARTICLE = r"""
|
| 21 |
+
If VideoGrain is helpful, please help to ⭐ the <a href='https://github.com/knightyxp/VideoGrain' target='_blank'>Github Repo</a>. Thanks!
|
| 22 |
+
[](https://github.com/knightyxp/VideoGrain)
|
| 23 |
+
---
|
| 24 |
+
📝 **Citation**
|
| 25 |
+
If our work is useful for your research, please consider citing:
|
| 26 |
+
```bibtex
|
| 27 |
+
@article{yang2025videograin,
|
| 28 |
+
title={VideoGrain: Modulating Space-Time Attention for Multi-grained Video Editing},
|
| 29 |
+
author={Yang, Xiangpeng and Zhu, Linchao and Fan, Hehe and Yang, Yi},
|
| 30 |
+
journal={ICLR},
|
| 31 |
+
year={2025}
|
| 32 |
+
}
|
| 33 |
+
```
|
| 34 |
+
📋 **License**
|
| 35 |
+
This project is licensed under <a rel="license" href="https://github.com/knightyxp/VideoGrain?tab=License-1-ov-file#readme">ReLER-Lab License 1.0</a>.
|
| 36 |
+
Redistribution and use for non-commercial purposes should follow this license.
|
| 37 |
+
📧 **Contact**
|
| 38 |
+
If you have any questions, please feel free to reach me out at <b>[email protected]</b>.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def update_layout_visibility(num):
|
| 44 |
+
"""
|
| 45 |
+
Given the user's selection (string) in ["2","3","4","5"],
|
| 46 |
+
return visibility updates for each of the 5 layout video inputs.
|
| 47 |
+
"""
|
| 48 |
+
n = int(num)
|
| 49 |
+
# Show layout_file1 if n >= 1, layout_file2 if n >= 2, etc.
|
| 50 |
+
return [
|
| 51 |
+
gr.update(visible=(n >= 1)),
|
| 52 |
+
gr.update(visible=(n >= 2)),
|
| 53 |
+
gr.update(visible=(n >= 3)),
|
| 54 |
+
gr.update(visible=(n >= 4)),
|
| 55 |
+
gr.update(visible=(n >= 5))
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
with gr.Blocks(css='style.css') as demo:
|
| 59 |
+
# gr.Markdown(TITLE)
|
| 60 |
+
|
| 61 |
+
gr.HTML(
|
| 62 |
+
"""
|
| 63 |
+
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
| 64 |
+
<h1 style="font-weight: 900; font-size: 2rem; margin: 0rem">
|
| 65 |
+
VideoGrain: Modulating Space-Time Attention for Multi-Grained Video Editing
|
| 66 |
+
</h1>
|
| 67 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
| 68 |
+
<a href="https://github.com/knightyxp">Xiangpeng Yang</a>
|
| 69 |
+
</h2>
|
| 70 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
| 71 |
+
<span class="link-block">
|
| 72 |
+
[<a href="https://arxiv.org/abs/2502.17258" target="_blank"
|
| 73 |
+
class="external-link ">
|
| 74 |
+
<span class="icon">
|
| 75 |
+
<i class="ai ai-arxiv"></i>
|
| 76 |
+
</span>
|
| 77 |
+
<span>arXiv</span>
|
| 78 |
+
</a>]
|
| 79 |
+
</span>
|
| 80 |
+
<!-- Github link -->
|
| 81 |
+
<span class="link-block">
|
| 82 |
+
[<a href="https://github.com/knightyxp/VideoGrain" target="_blank"
|
| 83 |
+
class="external-link ">
|
| 84 |
+
<span class="icon">
|
| 85 |
+
<i class="fab fa-github"></i>
|
| 86 |
+
</span>
|
| 87 |
+
<span>Code</span>
|
| 88 |
+
</a>]
|
| 89 |
+
</span>
|
| 90 |
+
<!-- Github link -->
|
| 91 |
+
<span class="link-block">
|
| 92 |
+
[<a href="https://knightyxp.github.io/VideoGrain_project_page" target="_blank"
|
| 93 |
+
class="external-link ">
|
| 94 |
+
<span class="icon">
|
| 95 |
+
<i class="fab fa-github"></i>
|
| 96 |
+
</span>
|
| 97 |
+
<span>Homepage</span>
|
| 98 |
+
</a>]
|
| 99 |
+
</span>
|
| 100 |
+
<!-- Github link -->
|
| 101 |
+
<span class="link-block">
|
| 102 |
+
[<a href="https://www.youtube.com/watch?v=XEM4Pex7F9E" target="_blank"
|
| 103 |
+
class="external-link ">
|
| 104 |
+
<span class="icon">
|
| 105 |
+
<i class="fab fa-youtube"></i>
|
| 106 |
+
</span>
|
| 107 |
+
<span>Youtube Video</span>
|
| 108 |
+
</a>]
|
| 109 |
+
</span>
|
| 110 |
+
</h2>
|
| 111 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
|
| 112 |
+
📕 TL;DR: VideoGrain is a zero-shot method for class-level, instance-level, and part-level video editing
|
| 113 |
+
</h2>
|
| 114 |
+
<h2 style="font-weight: 450; font-size: 1rem;">
|
| 115 |
+
Note that this page is a limited demo of VideoGrain. To run with more configurations, please check out our <a href="https://github.com/knightyxp/VideoGrain">github page.
|
| 116 |
+
</h2>
|
| 117 |
+
</div>
|
| 118 |
+
""")
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
gr.HTML("""
|
| 122 |
+
<p>We provide an <a href="https://github.com/knightyxp/VideoGrain?tab=readme-ov-file#editing-guidance-for-your-video"> Editing Guidance </a> to help users to choose hyperparameters when editing in-the-wild video.
|
| 123 |
+
<p>To remove the limitations or avoid queue on your own hardware, you may <a href="https://huggingface.co/spaces/XiangpengYang/VideoGrain?duplicate=true" style="display: inline-block; vertical-align: middle;"><img style="margin-top: 0em; margin-bottom: 0em; display: inline-block;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></p>
|
| 124 |
+
""")
|
| 125 |
+
|
| 126 |
+
with gr.Row():
|
| 127 |
+
with gr.Column():
|
| 128 |
+
with gr.Accordion('Input Video', open=True):
|
| 129 |
+
# user_input_video = gr.File(label='Input Source Video')
|
| 130 |
+
user_input_video = gr.Video(label='Input Source Video', source='upload', type='numpy', format="mp4", visible=True).style(height="auto")
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# Radio to choose how many layout videos to show
|
| 134 |
+
num_layouts = gr.Radio(
|
| 135 |
+
choices=["2", "3", "4", "5"],
|
| 136 |
+
label="Select Number of Editing Areas",
|
| 137 |
+
value="2", # default
|
| 138 |
+
info="Please select the number of editing areas"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
# Put all layout-video components in one Row to display them horizontally.
|
| 142 |
+
with gr.Row():
|
| 143 |
+
layout_file1 = gr.Video(
|
| 144 |
+
label="Layout Video 1",
|
| 145 |
+
type="numpy",
|
| 146 |
+
format="mp4",
|
| 147 |
+
visible=True
|
| 148 |
+
)
|
| 149 |
+
layout_file2 = gr.Video(
|
| 150 |
+
label="Layout Video 2",
|
| 151 |
+
type="numpy",
|
| 152 |
+
format="mp4",
|
| 153 |
+
visible=True
|
| 154 |
+
)
|
| 155 |
+
layout_file3 = gr.Video(
|
| 156 |
+
label="Layout Video 3",
|
| 157 |
+
type="numpy",
|
| 158 |
+
format="mp4",
|
| 159 |
+
visible=False
|
| 160 |
+
)
|
| 161 |
+
layout_file4 = gr.Video(
|
| 162 |
+
label="Layout Video 4",
|
| 163 |
+
type="numpy",
|
| 164 |
+
format="mp4",
|
| 165 |
+
visible=False
|
| 166 |
+
)
|
| 167 |
+
layout_file5 = gr.Video(
|
| 168 |
+
label="Layout Video 5",
|
| 169 |
+
type="numpy",
|
| 170 |
+
format="mp4",
|
| 171 |
+
visible=False
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Toggle visibility of the layout videos based on user selection
|
| 175 |
+
num_layouts.change(
|
| 176 |
+
fn=update_layout_visibility,
|
| 177 |
+
inputs=num_layouts,
|
| 178 |
+
outputs=[
|
| 179 |
+
layout_file1,
|
| 180 |
+
layout_file2,
|
| 181 |
+
layout_file3,
|
| 182 |
+
layout_file4,
|
| 183 |
+
layout_file5
|
| 184 |
+
]
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
prompt = gr.Textbox(label='Prompt',
|
| 188 |
+
info='Change the prompt, and extract each local prompt in the editing prompts.\
|
| 189 |
+
the local prompt order should be same as layout masks order.)',
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
model_id = gr.Dropdown(
|
| 193 |
+
label='Model ID',
|
| 194 |
+
choices=[
|
| 195 |
+
'stable-diffusion-v1-5/stable-diffusion-v1-5',
|
| 196 |
+
# add shape editing ckpt here
|
| 197 |
+
],
|
| 198 |
+
value='stable-diffusion-v1-5/stable-diffusion-v1-5')
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
run_button = gr.Button('Generate')
|
| 202 |
+
|
| 203 |
+
with gr.Column():
|
| 204 |
+
result = gr.Video(label='Result')
|
| 205 |
+
# result.style(height=512, width=512)
|
| 206 |
+
with gr.Row():
|
| 207 |
+
control_list = ['dwpose', 'depth_zoe', 'depth_midas']
|
| 208 |
+
control_type = gr.Dropdown(
|
| 209 |
+
choices=control_list,
|
| 210 |
+
label='Control type',
|
| 211 |
+
value='dwpose'
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# Checkbox group for "dwpose" options; default: hand selected, face not selected.
|
| 215 |
+
dwpose_options = gr.CheckboxGroup(
|
| 216 |
+
choices=["hand", "face"],
|
| 217 |
+
label="DW Pose Options",
|
| 218 |
+
value=["hand"],
|
| 219 |
+
visible=True # Initially visible since default control_type is "dwpose"
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Update the visibility of the dwpose_options based on the selected control type
|
| 223 |
+
control_type.change(
|
| 224 |
+
fn=lambda x: gr.update(visible=(x == "dwpose")),
|
| 225 |
+
inputs=control_type,
|
| 226 |
+
outputs=dwpose_options
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
controlnet_conditioning_scale = gr.Slider(label='ControlNet conditioning scale',
|
| 230 |
+
minimum=0.0,
|
| 231 |
+
maximum=1.0,
|
| 232 |
+
value=1.0,
|
| 233 |
+
step=0.1)
|
| 234 |
+
|
| 235 |
+
with gr.Accordion('Editing config for VideoGrian', open=True):
|
| 236 |
+
use_pnp = gr.Checkbox(
|
| 237 |
+
label="Use PnP",
|
| 238 |
+
value=False,
|
| 239 |
+
info="Check to enable PnP functionality."
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
pnp_inject_steps = gr.Slider(label='pnp inject steps',
|
| 243 |
+
info='PnP inject steps for temporal consistency',
|
| 244 |
+
minimum=0,
|
| 245 |
+
maximum=10,
|
| 246 |
+
step=1,
|
| 247 |
+
value=0)
|
| 248 |
+
|
| 249 |
+
flatten_res = gr.CheckboxGroup(
|
| 250 |
+
choices=["1", "2", "4", "8"],
|
| 251 |
+
label="Flatten Resolution",
|
| 252 |
+
value=["1"],
|
| 253 |
+
info="Select one or more flatten resolution factors. Mapping: 1 -> 64, 2 -> 32 (64/2), 4 -> 16 (64/4), 8 -> 8 (64/8)."
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
with gr.Row():
|
| 258 |
+
from example import style_example
|
| 259 |
+
examples = style_example
|
| 260 |
+
|
| 261 |
+
# gr.Examples(examples=examples,
|
| 262 |
+
# inputs=[
|
| 263 |
+
# model_id,
|
| 264 |
+
# user_input_video,
|
| 265 |
+
# layout_files,
|
| 266 |
+
# prompt,
|
| 267 |
+
# model_id,
|
| 268 |
+
# control_type,
|
| 269 |
+
# dwpose_options,
|
| 270 |
+
# controlnet_conditioning_scale,
|
| 271 |
+
# use_pnp,
|
| 272 |
+
# pnp_inject_steps,
|
| 273 |
+
# flatten_res,
|
| 274 |
+
# ],
|
| 275 |
+
# outputs=result,
|
| 276 |
+
# fn=pipe.run,
|
| 277 |
+
# cache_examples=True,
|
| 278 |
+
# # cache_examples=os.getenv('SYSTEM') == 'spaces'
|
| 279 |
+
# )
|
| 280 |
+
gr.Markdown(ARTICLE)
|
| 281 |
+
inputs = [
|
| 282 |
+
model_id,
|
| 283 |
+
user_input_video,
|
| 284 |
+
num_layouts,
|
| 285 |
+
layout_file1,
|
| 286 |
+
layout_file2,
|
| 287 |
+
layout_file3,
|
| 288 |
+
layout_file4,
|
| 289 |
+
layout_file5,
|
| 290 |
+
prompt,
|
| 291 |
+
model_id,
|
| 292 |
+
control_type,
|
| 293 |
+
dwpose_options,
|
| 294 |
+
controlnet_conditioning_scale,
|
| 295 |
+
use_pnp,
|
| 296 |
+
pnp_inject_steps,
|
| 297 |
+
flatten_res,
|
| 298 |
+
]
|
| 299 |
+
prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
|
| 300 |
+
run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
|
| 301 |
+
|
| 302 |
+
demo.queue().launch()
|
assets/teaser/run_two_man.mp4
ADDED
|
Binary file (149 kB). View file
|
|
|
config/demo_config.yaml
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
pretrained_model_path: "
|
| 2 |
logdir: ./result/run_two_man/instance_level/3cls_spider_polar_vis_cross_attn
|
| 3 |
|
| 4 |
dataset_config:
|
|
@@ -13,7 +13,7 @@ dataset_config:
|
|
| 13 |
|
| 14 |
control_config:
|
| 15 |
control_type: "dwpose"
|
| 16 |
-
pretrained_controlnet_path: "
|
| 17 |
controlnet_conditioning_scale: 1.0
|
| 18 |
hand: True
|
| 19 |
face: False
|
|
|
|
| 1 |
+
pretrained_model_path: "/home/xianyang/Data/code/FateZero/ckpt/stable-diffusion-v1-5"
|
| 2 |
logdir: ./result/run_two_man/instance_level/3cls_spider_polar_vis_cross_attn
|
| 3 |
|
| 4 |
dataset_config:
|
|
|
|
| 13 |
|
| 14 |
control_config:
|
| 15 |
control_type: "dwpose"
|
| 16 |
+
pretrained_controlnet_path: "/home/xianyang/Data/code/FateZero/ckpt/control_v11p_sd15_openpose"
|
| 17 |
controlnet_conditioning_scale: 1.0
|
| 18 |
hand: True
|
| 19 |
face: False
|
example.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
num_steps = 15
|
| 2 |
+
style_example = [
|
| 3 |
+
[
|
| 4 |
+
'CompVis/stable-diffusion-v1-5',
|
| 5 |
+
'data/run_two_man/run_two_man.mp4',
|
| 6 |
+
'Man in red hoddie and man in gray shirt are jogging in forest',
|
| 7 |
+
'left man → Spiderman, right man → Polar Bear + Sunglasses, ground → grassy meadow, trees → cherry blossoms',
|
| 8 |
+
0.8,
|
| 9 |
+
0.8,
|
| 10 |
+
"instance+part",
|
| 11 |
+
10,
|
| 12 |
+
num_steps,
|
| 13 |
+
7.5,
|
| 14 |
+
# input video argument
|
| 15 |
+
None, 0, 8, 1, 0,0,0,0
|
| 16 |
+
|
| 17 |
+
],
|
| 18 |
+
|
| 19 |
+
]
|
video_diffusion/data/__pycache__/dataset.cpython-310.pyc
CHANGED
|
Binary files a/video_diffusion/data/__pycache__/dataset.cpython-310.pyc and b/video_diffusion/data/__pycache__/dataset.cpython-310.pyc differ
|
|
|
video_diffusion/pipelines/__pycache__/ddim_spatial_temporal.cpython-310.pyc
CHANGED
|
Binary files a/video_diffusion/pipelines/__pycache__/ddim_spatial_temporal.cpython-310.pyc and b/video_diffusion/pipelines/__pycache__/ddim_spatial_temporal.cpython-310.pyc differ
|
|
|
webui/__pycache__/merge_config_gradio.cpython-310.pyc
ADDED
|
Binary file (2.68 kB). View file
|
|
|
webui/merge_config_gradio.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from test import *
|
| 2 |
+
|
| 3 |
+
import copy
|
| 4 |
+
import gradio as gr
|
| 5 |
+
|
| 6 |
+
class merge_config_then_run():
|
| 7 |
+
def __init__(self) -> None:
|
| 8 |
+
# Load the tokenizer
|
| 9 |
+
pretrained_model_path = '/home/xianyang/Data/code/FateZero/ckpt/stable-diffusion-v1-5'
|
| 10 |
+
self.tokenizer = None
|
| 11 |
+
self.text_encoder = None
|
| 12 |
+
self.vae = None
|
| 13 |
+
self.unet = None
|
| 14 |
+
|
| 15 |
+
cache_ckpt = True
|
| 16 |
+
if cache_ckpt:
|
| 17 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 18 |
+
pretrained_model_path,
|
| 19 |
+
# 'FateZero/ckpt/stable-diffusion-v1-4',
|
| 20 |
+
subfolder="tokenizer",
|
| 21 |
+
use_fast=False,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Load models and create wrapper for stable diffusion
|
| 25 |
+
self.text_encoder = CLIPTextModel.from_pretrained(
|
| 26 |
+
pretrained_model_path,
|
| 27 |
+
subfolder="text_encoder",
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
self.vae = AutoencoderKL.from_pretrained(
|
| 31 |
+
pretrained_model_path,
|
| 32 |
+
subfolder="vae",
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
self.unet = UNetPseudo3DConditionModel.from_2d_model(
|
| 36 |
+
os.path.join(pretrained_model_path, "unet"), model_config=model_config
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
def run(
|
| 40 |
+
self,
|
| 41 |
+
# def merge_config_then_run(
|
| 42 |
+
model_id,
|
| 43 |
+
data_path,
|
| 44 |
+
source_prompt,
|
| 45 |
+
target_prompt,
|
| 46 |
+
cross_replace_steps,
|
| 47 |
+
self_replace_steps,
|
| 48 |
+
enhance_words,
|
| 49 |
+
enhance_words_value,
|
| 50 |
+
num_steps,
|
| 51 |
+
guidance_scale,
|
| 52 |
+
user_input_video=None,
|
| 53 |
+
|
| 54 |
+
# Temporal and spatial crop of the video
|
| 55 |
+
start_sample_frame=0,
|
| 56 |
+
n_sample_frame=8,
|
| 57 |
+
stride=1,
|
| 58 |
+
left_crop=0,
|
| 59 |
+
right_crop=0,
|
| 60 |
+
top_crop=0,
|
| 61 |
+
bottom_crop=0,
|
| 62 |
+
):
|
| 63 |
+
# , ] = inputs
|
| 64 |
+
default_edit_config='config/demo_config.yaml'
|
| 65 |
+
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
|
| 66 |
+
|
| 67 |
+
dataset_time_string = get_time_string()
|
| 68 |
+
config_now = copy.deepcopy(Omegadict_default_edit_config)
|
| 69 |
+
print(f"config_now['pretrained_model_path'] = model_id {model_id}")
|
| 70 |
+
# config_now['pretrained_model_path'] = model_id
|
| 71 |
+
config_now['dataset_config']['prompt'] = source_prompt
|
| 72 |
+
config_now['dataset_config']['path'] = data_path
|
| 73 |
+
# ImageSequenceDataset_dict = { }
|
| 74 |
+
offset_dict = {
|
| 75 |
+
"left": left_crop,
|
| 76 |
+
"right": right_crop,
|
| 77 |
+
"top": top_crop,
|
| 78 |
+
"bottom": bottom_crop,
|
| 79 |
+
}
|
| 80 |
+
ImageSequenceDataset_dict = {
|
| 81 |
+
"start_sample_frame" : start_sample_frame,
|
| 82 |
+
"n_sample_frame" : n_sample_frame,
|
| 83 |
+
"sampling_rate" : stride,
|
| 84 |
+
"offset": offset_dict,
|
| 85 |
+
}
|
| 86 |
+
config_now['dataset_config'].update(ImageSequenceDataset_dict)
|
| 87 |
+
if user_input_video and data_path is None:
|
| 88 |
+
raise gr.Error('You need to upload a video or choose a provided video')
|
| 89 |
+
if user_input_video is not None:
|
| 90 |
+
if isinstance(user_input_video, str):
|
| 91 |
+
config_now['dataset_config']['path'] = user_input_video
|
| 92 |
+
elif hasattr(user_input_video, 'name') and user_input_video.name is not None:
|
| 93 |
+
config_now['dataset_config']['path'] = user_input_video.name
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# editing config
|
| 97 |
+
config_now['editing_config']['prompts'] = [target_prompt]
|
| 98 |
+
|
| 99 |
+
config_now['editing_config']['guidance_scale'] = guidance_scale
|
| 100 |
+
config_now['editing_config']['num_inference_steps'] = num_steps
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
logdir = default_edit_config.replace('config', 'result').replace('.yml', '').replace('.yaml', '')+f'_{dataset_time_string}'
|
| 104 |
+
config_now['logdir'] = logdir
|
| 105 |
+
print(f'Saving at {logdir}')
|
| 106 |
+
save_path = test(tokenizer = self.tokenizer,
|
| 107 |
+
text_encoder = self.text_encoder,
|
| 108 |
+
vae = self.vae,
|
| 109 |
+
unet = self.unet,
|
| 110 |
+
config=default_edit_config, **config_now)
|
| 111 |
+
mp4_path = save_path.replace('_0.gif', '_0_0_0.mp4')
|
| 112 |
+
return mp4_path
|