savvyf commited on
Commit
4f66ca8
Β·
verified Β·
1 Parent(s): 991dfe0

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +48 -7
  2. app.py +142 -0
  3. gitmodules +3 -0
  4. requirements.txt +8 -0
README.md CHANGED
@@ -1,12 +1,53 @@
1
  ---
2
- title: Appkelvo
3
- emoji: 🌍
4
- colorFrom: blue
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.49.0
 
 
 
8
  app_file: app.py
9
- pinned: false
 
 
 
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: CogVideoX-2B
3
+ emoji: πŸŽ₯
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
+ sdk_version: 5.34.2
8
+ suggested_hardware: a10g-large
9
+ suggested_storage: large
10
+ app_port: 7860
11
  app_file: app.py
12
+ models:
13
+ - THUDM/CogVideoX-2b
14
+ tags:
15
+ - cogvideox
16
+ - video-generation
17
+ - thudm
18
+ short_description: Text-to-Video
19
+ disable_embedding: false
20
  ---
21
 
22
+ # CogVideoX HF Space
23
+
24
+ ## How to run this space
25
+
26
+ CogVideoX does not rely on any external API models.
27
+ However, during the training of CogVideoX, we used relatively long prompts. To enable users to achieve rendering with
28
+ shorter prompts, we integrated an LLM to refine the prompts for better results.
29
+ This step is not mandatory, but we recommend using an LLM to enhance the prompts.
30
+
31
+ ### Using with GLM-4 Model
32
+
33
+ ```shell
34
+ OPENAI_BASE_URL=https://open.bigmodel.cn/api/paas/v4/ OPENAI_API_KEY="ZHIPUAI_API_KEY" python gradio_demo.py
35
+ ```
36
+
37
+ ### Using with OpenAI GPT-4 Model
38
+
39
+ ```shell
40
+ OPENAI_API_KEY="OPENAI_API_KEY" python gradio_demo.py
41
+ ```
42
+
43
+ and change `app.py` here:
44
+
45
+ ```
46
+ model="glm-4-0520" # change to GPT-4o
47
+ ```
48
+
49
+ ### Not using LLM to refine prompts.
50
+
51
+ ```shell
52
+ python app.py
53
+ ```
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import threading
3
+ import time
4
+ import gradio as gr
5
+ import torch
6
+ from diffusers import CogVideoXPipeline
7
+ from diffusers.utils import export_to_video
8
+ from datetime import datetime, timedelta
9
+ from openai import OpenAI
10
+ import spaces
11
+ import moviepy as mp
12
+
13
+ dtype = torch.float16
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=dtype).to(device)
16
+
17
+ os.makedirs("./output", exist_ok=True)
18
+ os.makedirs("./gradio_tmp", exist_ok=True)
19
+
20
+ sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
21
+
22
+ For example, outputting "a beautiful morning in the woods with the sun peeking through the trees" will trigger your partner bot to output a video of a forest morning, as described. You will be prompted by people looking to create detailed, amazing videos.
23
+
24
+ You will only ever output a single video description per user request.
25
+ When modifications are requested, refactor the entire description to integrate suggestions.
26
+ Other times the user will not want modifications but a new video. In that case, ignore previous conversation history.
27
+ """
28
+
29
+ def convert_prompt(prompt: str, retry_times: int = 3) -> str:
30
+ if not os.environ.get("OPENAI_API_KEY"):
31
+ return prompt
32
+ client = OpenAI()
33
+ text = prompt.strip()
34
+ for _ in range(retry_times):
35
+ response = client.chat.completions.create(
36
+ messages=[
37
+ {"role": "system", "content": sys_prompt},
38
+ {"role": "user", "content": f'Create a detailed imaginative video caption for: "{text}"'},
39
+ ],
40
+ model="glm-4-0520",
41
+ temperature=0.01,
42
+ top_p=0.7,
43
+ stream=False,
44
+ max_tokens=250,
45
+ )
46
+ if response.choices:
47
+ return response.choices[0].message.content
48
+ return prompt
49
+
50
+ @spaces.GPU(duration=240)
51
+ def infer(prompt: str, num_inference_steps: int, guidance_scale: float, progress=gr.Progress(track_tqdm=True)):
52
+ torch.cuda.empty_cache()
53
+ video = pipe(
54
+ prompt=prompt,
55
+ num_videos_per_prompt=1,
56
+ num_inference_steps=num_inference_steps,
57
+ num_frames=49,
58
+ guidance_scale=guidance_scale,
59
+ ).frames[0]
60
+ return video
61
+
62
+ def save_video(tensor):
63
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
64
+ video_path = f"./output/{timestamp}.mp4"
65
+ os.makedirs(os.path.dirname(video_path), exist_ok=True)
66
+ export_to_video(tensor, video_path)
67
+ return video_path
68
+
69
+ def convert_to_gif(video_path):
70
+ clip = mp.VideoFileClip(video_path)
71
+ clip = clip.with_fps(8)
72
+ clip = clip.resized(height=240)
73
+ gif_path = video_path.replace(".mp4", ".gif")
74
+ clip.write_gif(gif_path, fps=8)
75
+ return gif_path
76
+
77
+ def delete_old_files():
78
+ while True:
79
+ now = datetime.now()
80
+ cutoff = now - timedelta(minutes=10)
81
+ for directory in ["./output", "./gradio_tmp"]:
82
+ for filename in os.listdir(directory):
83
+ file_path = os.path.join(directory, filename)
84
+ if os.path.isfile(file_path):
85
+ file_mtime = datetime.fromtimestamp(os.path.getmtime(file_path))
86
+ if file_mtime < cutoff:
87
+ os.remove(file_path)
88
+ time.sleep(600)
89
+
90
+ threading.Thread(target=delete_old_files, daemon=True).start()
91
+
92
+ with gr.Blocks() as demo:
93
+ with gr.Row():
94
+ with gr.Column():
95
+ prompt = gr.Textbox(
96
+ label="Prompt (Less than 200 Words)",
97
+ placeholder="Enter your prompt here",
98
+ lines=5
99
+ )
100
+
101
+ with gr.Row():
102
+ gr.Markdown("✨ Click enhance to polish your prompt with GLM-4.")
103
+ enhance_button = gr.Button("✨ Enhance Prompt (Optional)")
104
+
105
+ with gr.Column():
106
+ gr.Markdown("**Optional Parameters:** Default values are recommended.")
107
+ with gr.Row():
108
+ num_inference_steps = gr.Number(label="Inference Steps", value=50)
109
+ guidance_scale = gr.Number(label="Guidance Scale", value=6.0)
110
+ generate_button = gr.Button("🎬 Generate Video")
111
+
112
+ with gr.Column():
113
+ video_output = gr.Video(label="Generated Video", width=720, height=480)
114
+ with gr.Row():
115
+ download_video_button = gr.File(label="πŸ“₯ Download Video", visible=False)
116
+ download_gif_button = gr.File(label="πŸ“₯ Download GIF", visible=False)
117
+
118
+ def generate(prompt, num_inference_steps, guidance_scale, progress=gr.Progress(track_tqdm=True)):
119
+ tensor = infer(prompt, num_inference_steps, guidance_scale, progress=progress)
120
+ video_path = save_video(tensor)
121
+ video_update = gr.update(visible=True, value=video_path)
122
+ gif_path = convert_to_gif(video_path)
123
+ gif_update = gr.update(visible=True, value=gif_path)
124
+ return video_path, video_update, gif_update
125
+
126
+ def enhance_prompt_func(prompt):
127
+ return convert_prompt(prompt, retry_times=1)
128
+
129
+ generate_button.click(
130
+ generate,
131
+ inputs=[prompt, num_inference_steps, guidance_scale],
132
+ outputs=[video_output, download_video_button, download_gif_button]
133
+ )
134
+
135
+ enhance_button.click(
136
+ enhance_prompt_func,
137
+ inputs=[prompt],
138
+ outputs=[prompt]
139
+ )
140
+
141
+ if __name__ == "__main__":
142
+ demo.launch()
gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "CogVideo"]
2
+ path = CogVideo
3
+ url = https://github.com/THUDM/CogVideo
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ imageio-ffmpeg==0.5.1
2
+ diffusers==0.30.1
3
+ numpy==1.26.0
4
+ transformers==4.44.2
5
+ moviepy==2.2.1
6
+ openai==1.42.0
7
+ git+https://github.com/huggingface/accelerate.git@main#egg=accelerate
8
+ sentencepiece==0.2.0