zR
commited on
Commit
·
15b8c5a
1
Parent(s):
57c28fa
update
Browse files- .idea/.gitignore +8 -0
- .idea/CogVideoX-2b.iml +8 -0
- .idea/inspectionProfiles/Project_Default.xml +21 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +7 -0
- .idea/modules.xml +8 -0
- .idea/vcs.xml +6 -0
- README.md +16 -14
- README_zh.md +5 -3
.idea/.gitignore
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Default ignored files
|
| 2 |
+
/shelf/
|
| 3 |
+
/workspace.xml
|
| 4 |
+
# Editor-based HTTP Client requests
|
| 5 |
+
/httpRequests/
|
| 6 |
+
# Datasource local storage ignored files
|
| 7 |
+
/dataSources/
|
| 8 |
+
/dataSources.local.xml
|
.idea/CogVideoX-2b.iml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<module type="PYTHON_MODULE" version="4">
|
| 3 |
+
<component name="NewModuleRootManager">
|
| 4 |
+
<content url="file://$MODULE_DIR$" />
|
| 5 |
+
<orderEntry type="inheritedJdk" />
|
| 6 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
| 7 |
+
</component>
|
| 8 |
+
</module>
|
.idea/inspectionProfiles/Project_Default.xml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<profile version="1.0">
|
| 3 |
+
<option name="myName" value="Project Default" />
|
| 4 |
+
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
| 5 |
+
<option name="ignoredPackages">
|
| 6 |
+
<value>
|
| 7 |
+
<list size="8">
|
| 8 |
+
<item index="0" class="java.lang.String" itemvalue="openai" />
|
| 9 |
+
<item index="1" class="java.lang.String" itemvalue="sse_starlette" />
|
| 10 |
+
<item index="2" class="java.lang.String" itemvalue="fastapi" />
|
| 11 |
+
<item index="3" class="java.lang.String" itemvalue="timm" />
|
| 12 |
+
<item index="4" class="java.lang.String" itemvalue="gradio" />
|
| 13 |
+
<item index="5" class="java.lang.String" itemvalue="uvicorn" />
|
| 14 |
+
<item index="6" class="java.lang.String" itemvalue="diffusers" />
|
| 15 |
+
<item index="7" class="java.lang.String" itemvalue="transformers" />
|
| 16 |
+
</list>
|
| 17 |
+
</value>
|
| 18 |
+
</option>
|
| 19 |
+
</inspection_tool>
|
| 20 |
+
</profile>
|
| 21 |
+
</component>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<component name="InspectionProjectProfileManager">
|
| 2 |
+
<settings>
|
| 3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
| 4 |
+
<version value="1.0" />
|
| 5 |
+
</settings>
|
| 6 |
+
</component>
|
.idea/misc.xml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="Black">
|
| 4 |
+
<option name="sdkName" value="Python 3.9" />
|
| 5 |
+
</component>
|
| 6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
| 7 |
+
</project>
|
.idea/modules.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="ProjectModuleManager">
|
| 4 |
+
<modules>
|
| 5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/CogVideoX-2b.iml" filepath="$PROJECT_DIR$/.idea/CogVideoX-2b.iml" />
|
| 6 |
+
</modules>
|
| 7 |
+
</component>
|
| 8 |
+
</project>
|
.idea/vcs.xml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<project version="4">
|
| 3 |
+
<component name="VcsDirectoryMappings">
|
| 4 |
+
<mapping directory="" vcs="Git" />
|
| 5 |
+
</component>
|
| 6 |
+
</project>
|
README.md
CHANGED
|
@@ -87,18 +87,18 @@ inference: false
|
|
| 87 |
CogVideoX is an open-source video generation model that shares the same origins as [清影](https://chatglm.cn/video).
|
| 88 |
The table below provides a list of the video generation models we currently offer, along with their basic information.
|
| 89 |
|
| 90 |
-
| Model Name | CogVideoX-2B (Current Repos)
|
| 91 |
-
|
| 92 |
-
| Supported Prompt Language | English
|
| 93 |
-
| GPU Memory Required for Inference |
|
| 94 |
-
| GPU Memory Required for Fine-tuning (bs=1) | 46.2GB
|
| 95 |
-
| Prompt Length | 226 Tokens
|
| 96 |
-
| Video Length | 6 seconds
|
| 97 |
-
| Frames Per Second | 8 frames
|
| 98 |
-
| Resolution | 720 * 480
|
| 99 |
-
| Positional Embeddings | Sinusoidal
|
| 100 |
-
| Quantized Inference | Not Supported
|
| 101 |
-
| Multi-card Inference | Not Supported
|
| 102 |
|
| 103 |
## Quick Start 🤗
|
| 104 |
|
|
@@ -111,7 +111,7 @@ optimizations and conversions to get a better experience.**
|
|
| 111 |
|
| 112 |
```shell
|
| 113 |
pip install --upgrade opencv-python transformers
|
| 114 |
-
pip install git+https://github.com/huggingface/diffusers.git@
|
| 115 |
```
|
| 116 |
|
| 117 |
2. Run the code
|
|
@@ -121,7 +121,7 @@ import torch
|
|
| 121 |
from diffusers import CogVideoXPipeline
|
| 122 |
from diffusers.utils import export_to_video
|
| 123 |
|
| 124 |
-
prompt = "A
|
| 125 |
|
| 126 |
pipe = CogVideoXPipeline.from_pretrained(
|
| 127 |
"THUDM/CogVideoX-2b",
|
|
@@ -146,6 +146,8 @@ video = pipe(
|
|
| 146 |
export_to_video(video, "output.mp4", fps=8)
|
| 147 |
```
|
| 148 |
|
|
|
|
|
|
|
| 149 |
If the generated model appears “all green” and not viewable in the default MAC player, it is a normal phenomenon (due to
|
| 150 |
OpenCV saving video issues). Simply use a different player to view the video.
|
| 151 |
|
|
|
|
| 87 |
CogVideoX is an open-source video generation model that shares the same origins as [清影](https://chatglm.cn/video).
|
| 88 |
The table below provides a list of the video generation models we currently offer, along with their basic information.
|
| 89 |
|
| 90 |
+
| Model Name | CogVideoX-2B (Current Repos) |
|
| 91 |
+
|--------------------------------------------|--------------------------------------------------|
|
| 92 |
+
| Supported Prompt Language | English |
|
| 93 |
+
| GPU Memory Required for Inference | 36GB (will be optimized before the PR is merged) |
|
| 94 |
+
| GPU Memory Required for Fine-tuning (bs=1) | 46.2GB |
|
| 95 |
+
| Prompt Length | 226 Tokens |
|
| 96 |
+
| Video Length | 6 seconds |
|
| 97 |
+
| Frames Per Second | 8 frames |
|
| 98 |
+
| Resolution | 720 * 480 |
|
| 99 |
+
| Positional Embeddings | Sinusoidal |
|
| 100 |
+
| Quantized Inference | Not Supported |
|
| 101 |
+
| Multi-card Inference | Not Supported |
|
| 102 |
|
| 103 |
## Quick Start 🤗
|
| 104 |
|
|
|
|
| 111 |
|
| 112 |
```shell
|
| 113 |
pip install --upgrade opencv-python transformers
|
| 114 |
+
pip install git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers # Still in PR
|
| 115 |
```
|
| 116 |
|
| 117 |
2. Run the code
|
|
|
|
| 121 |
from diffusers import CogVideoXPipeline
|
| 122 |
from diffusers.utils import export_to_video
|
| 123 |
|
| 124 |
+
prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
|
| 125 |
|
| 126 |
pipe = CogVideoXPipeline.from_pretrained(
|
| 127 |
"THUDM/CogVideoX-2b",
|
|
|
|
| 146 |
export_to_video(video, "output.mp4", fps=8)
|
| 147 |
```
|
| 148 |
|
| 149 |
+
**Using a single A100 GPU, generating a video with the above configuration takes approximately 90 seconds**
|
| 150 |
+
|
| 151 |
If the generated model appears “all green” and not viewable in the default MAC player, it is a normal phenomenon (due to
|
| 152 |
OpenCV saving video issues). Simply use a different player to view the video.
|
| 153 |
|
README_zh.md
CHANGED
|
@@ -76,7 +76,7 @@ CogVideoX是 [清影](https://chatglm.cn/video) 同源的开源版本视频生
|
|
| 76 |
| Model Name | CogVideoX-2B (当前仓库) |
|
| 77 |
|---------------|---------------------|
|
| 78 |
| 提示词语言 | English |
|
| 79 |
-
| 推理显存消耗 |
|
| 80 |
| 微调显存消耗 (bs=1) | 46.2GB |
|
| 81 |
| 提示词长度上限 | 226 Tokens |
|
| 82 |
| 视频生成长度 | 6 seconds |
|
|
@@ -95,8 +95,8 @@ CogVideoX是 [清影](https://chatglm.cn/video) 同源的开源版本视频生
|
|
| 95 |
1. 安装对应的依赖
|
| 96 |
|
| 97 |
```shell
|
| 98 |
-
pip install --upgrade opencv-python transformers
|
| 99 |
-
pip install git+https://github.com/huggingface/diffusers.git@
|
| 100 |
```
|
| 101 |
|
| 102 |
2. 运行代码
|
|
@@ -131,6 +131,8 @@ video = pipe(
|
|
| 131 |
export_to_video(video, "output.mp4", fps=8)
|
| 132 |
```
|
| 133 |
|
|
|
|
|
|
|
| 134 |
如果您生成的模型在 MAC 默认播放器上表现为 "全绿" 无法正常观看,属于正常现象 (OpenCV保存视频问题),仅需更换一个播放器观看。
|
| 135 |
|
| 136 |
## 深入研究
|
|
|
|
| 76 |
| Model Name | CogVideoX-2B (当前仓库) |
|
| 77 |
|---------------|---------------------|
|
| 78 |
| 提示词语言 | English |
|
| 79 |
+
| 推理显存消耗 | 36GB(会在PR合并之前优化) |
|
| 80 |
| 微调显存消耗 (bs=1) | 46.2GB |
|
| 81 |
| 提示词长度上限 | 226 Tokens |
|
| 82 |
| 视频生成长度 | 6 seconds |
|
|
|
|
| 95 |
1. 安装对应的依赖
|
| 96 |
|
| 97 |
```shell
|
| 98 |
+
pip install --upgrade opencv-python transformers acc
|
| 99 |
+
pip install git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers # Still in PR
|
| 100 |
```
|
| 101 |
|
| 102 |
2. 运行代码
|
|
|
|
| 131 |
export_to_video(video, "output.mp4", fps=8)
|
| 132 |
```
|
| 133 |
|
| 134 |
+
**使用单卡A100按照上述配置生成一次视频大约需要90秒**。
|
| 135 |
+
|
| 136 |
如果您生成的模型在 MAC 默认播放器上表现为 "全绿" 无法正常观看,属于正常现象 (OpenCV保存视频问题),仅需更换一个播放器观看。
|
| 137 |
|
| 138 |
## 深入研究
|