Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,6 +32,14 @@ def set_tts():
|
|
| 32 |
def create_kd_talker():
|
| 33 |
return Inferencer()
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
@spaces.GPU
|
| 36 |
def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
|
| 37 |
global result_dir
|
|
@@ -60,28 +68,26 @@ def main():
|
|
| 60 |
device = "cuda"
|
| 61 |
else:
|
| 62 |
device = "cpu"
|
| 63 |
-
|
| 64 |
with gr.Blocks(analytics_enabled=False) as interface:
|
| 65 |
-
gr.
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
</div>
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
reference_audio_type = gr.Textbox(value="upload", visible=False)
|
| 78 |
-
|
| 79 |
with gr.Row():
|
| 80 |
with gr.Column(variant="panel"):
|
| 81 |
with gr.Tabs(elem_id="kdtalker_source_image"):
|
| 82 |
with gr.TabItem("Upload image"):
|
| 83 |
source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
|
| 84 |
-
|
| 85 |
with gr.Tabs(elem_id="kdtalker_driven_audio"):
|
| 86 |
with gr.TabItem("Upload"):
|
| 87 |
upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
|
|
@@ -117,8 +123,23 @@ def main():
|
|
| 117 |
],
|
| 118 |
outputs=[gen_video]
|
| 119 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
return interface
|
| 121 |
|
| 122 |
|
| 123 |
demo = main()
|
| 124 |
-
demo.queue().launch()
|
|
|
|
| 32 |
def create_kd_talker():
|
| 33 |
return Inferencer()
|
| 34 |
|
| 35 |
+
example_folder = "example"
|
| 36 |
+
example_choices = ["Example 1", "Example 2", "Example 3"]
|
| 37 |
+
example_mapping = {
|
| 38 |
+
"Example 1": {"audio": os.path.join(example_folder, "example1.wav"), "image": os.path.join(example_folder, "example1.png")},
|
| 39 |
+
"Example 2": {"audio": os.path.join(example_folder, "example2.wav"), "image": os.path.join(example_folder, "example2.png")},
|
| 40 |
+
"Example 3": {"audio": os.path.join(example_folder, "example3.wav"), "image": os.path.join(example_folder, "example3.png")},
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
@spaces.GPU
|
| 44 |
def predict(prompt, upload_reference_audio, microphone_reference_audio, reference_audio_type):
|
| 45 |
global result_dir
|
|
|
|
| 68 |
device = "cuda"
|
| 69 |
else:
|
| 70 |
device = "cpu"
|
|
|
|
| 71 |
with gr.Blocks(analytics_enabled=False) as interface:
|
| 72 |
+
with gr.Row():
|
| 73 |
+
gr.HTML(
|
| 74 |
+
"""
|
| 75 |
+
<div align='center'>
|
| 76 |
+
<h2> Unlock Pose Diversity: Accurate and Efficient Implicit Keypoint-based Spatiotemporal Diffusion for Audio-driven Talking Portrait </h2>
|
| 77 |
+
<div style="display: flex; justify-content: center; align-items: center; gap: 20px;">
|
| 78 |
+
<img src='https://newstatic.dukekunshan.edu.cn/mainsite/2021/08/07161629/large_dku-Logo-e1649298929570.png' alt='Logo' width='150'/>
|
| 79 |
+
<img src='https://www.xjtlu.edu.cn/wp-content/uploads/2023/12/7c52fd62e9cf26cb493faa7f91c2782.png' width='250'/>
|
| 80 |
+
</div>
|
| 81 |
</div>
|
| 82 |
+
"""
|
| 83 |
+
)
|
| 84 |
+
driven_audio_type = gr.Textbox(value="upload", visible=False)
|
| 85 |
+
reference_audio_type = gr.Textbox(value="upload", visible=False)
|
|
|
|
|
|
|
| 86 |
with gr.Row():
|
| 87 |
with gr.Column(variant="panel"):
|
| 88 |
with gr.Tabs(elem_id="kdtalker_source_image"):
|
| 89 |
with gr.TabItem("Upload image"):
|
| 90 |
source_image = gr.Image(label="Source image", sources="upload", type="filepath", scale=256)
|
|
|
|
| 91 |
with gr.Tabs(elem_id="kdtalker_driven_audio"):
|
| 92 |
with gr.TabItem("Upload"):
|
| 93 |
upload_driven_audio = gr.Audio(label="Upload audio", sources="upload", type="filepath")
|
|
|
|
| 123 |
],
|
| 124 |
outputs=[gen_video]
|
| 125 |
)
|
| 126 |
+
with gr.TabItem("Example"):
|
| 127 |
+
example_choice = gr.Dropdown(choices=example_choices, label="Choose an example")
|
| 128 |
+
def load_example(choice):
|
| 129 |
+
example = example_mapping.get(choice, {})
|
| 130 |
+
audio_path = example.get("audio", "")
|
| 131 |
+
image_path = example.get("image", "")
|
| 132 |
+
return [audio_path, image_path]
|
| 133 |
+
example_choice.change(
|
| 134 |
+
fn=load_example,
|
| 135 |
+
inputs=[example_choice],
|
| 136 |
+
outputs=[upload_driven_audio, source_image]
|
| 137 |
+
)
|
| 138 |
+
example_choice.change(set_upload, outputs=driven_audio_type)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
return interface
|
| 142 |
|
| 143 |
|
| 144 |
demo = main()
|
| 145 |
+
demo.queue().launch(share=True)
|