update

Files changed (9) hide show

.idea/.gitignore +8 -0
.idea/CogVideoX-2b.iml +8 -0
.idea/inspectionProfiles/Project_Default.xml +21 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
README.md +16 -14
README_zh.md +5 -3

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/CogVideoX-2b.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,21 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="8">
+            <item index="0" class="java.lang.String" itemvalue="openai" />
+            <item index="1" class="java.lang.String" itemvalue="sse_starlette" />
+            <item index="2" class="java.lang.String" itemvalue="fastapi" />
+            <item index="3" class="java.lang.String" itemvalue="timm" />
+            <item index="4" class="java.lang.String" itemvalue="gradio" />
+            <item index="5" class="java.lang.String" itemvalue="uvicorn" />
+            <item index="6" class="java.lang.String" itemvalue="diffusers" />
+            <item index="7" class="java.lang.String" itemvalue="transformers" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.9" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/CogVideoX-2b.iml" filepath="$PROJECT_DIR$/.idea/CogVideoX-2b.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -87,18 +87,18 @@ inference: false
 CogVideoX is an open-source video generation model that shares the same origins as [清影](https://chatglm.cn/video).
 The table below provides a list of the video generation models we currently offer, along with their basic information.
-| Model Name                                 | CogVideoX-2B (Current Repos) |
-|--------------------------------------------|------------------------------|
-| Supported Prompt Language                  | English                      |
-| GPU Memory Required for Inference          | 21.6GB                       |
-| GPU Memory Required for Fine-tuning (bs=1) | 46.2GB                       |
-| Prompt Length                              | 226 Tokens                   |
-| Video Length                               | 6 seconds                    |
-| Frames Per Second                          | 8 frames                     |
-| Resolution                                 | 720 * 480                    |
-| Positional Embeddings                      | Sinusoidal                   |
-| Quantized Inference                        | Not Supported                |
-| Multi-card Inference                       | Not Supported                |
 ## Quick Start 🤗
@@ -111,7 +111,7 @@ optimizations and conversions to get a better experience.**
 ```shell
 pip install --upgrade opencv-python transformers
-pip install git+https://github.com/huggingface/diffusers.git@32da2e7673cfe0475a47c41b859f5fbd8bf17a40#egg=diffusers # Still in PR
 ```
 2. Run the code
@@ -121,7 +121,7 @@ import torch
 from diffusers import CogVideoXPipeline
 from diffusers.utils import export_to_video
-prompt = "A girl ridding a bike"
 pipe = CogVideoXPipeline.from_pretrained(
     "THUDM/CogVideoX-2b",
@@ -146,6 +146,8 @@ video = pipe(
 export_to_video(video, "output.mp4", fps=8)
 ```
 If the generated model appears “all green” and not viewable in the default MAC player, it is a normal phenomenon (due to
 OpenCV saving video issues). Simply use a different player to view the video.

 CogVideoX is an open-source video generation model that shares the same origins as [清影](https://chatglm.cn/video).
 The table below provides a list of the video generation models we currently offer, along with their basic information.
+| Model Name                                 | CogVideoX-2B (Current Repos)                     |
+|--------------------------------------------|--------------------------------------------------|
+| Supported Prompt Language                  | English                                          |
+| GPU Memory Required for Inference          | 36GB (will be optimized before the PR is merged) |
+| GPU Memory Required for Fine-tuning (bs=1) | 46.2GB                                           |
+| Prompt Length                              | 226 Tokens                                       |
+| Video Length                               | 6 seconds                                        |
+| Frames Per Second                          | 8 frames                                         |
+| Resolution                                 | 720 * 480                                        |
+| Positional Embeddings                      | Sinusoidal                                       |
+| Quantized Inference                        | Not Supported                                    |
+| Multi-card Inference                       | Not Supported                                    |
 ## Quick Start 🤗
 ```shell
 pip install --upgrade opencv-python transformers
+pip install git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers # Still in PR
 ```
 2. Run the code
 from diffusers import CogVideoXPipeline
 from diffusers.utils import export_to_video
+prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
 pipe = CogVideoXPipeline.from_pretrained(
     "THUDM/CogVideoX-2b",
 export_to_video(video, "output.mp4", fps=8)
 ```
+**Using a single A100 GPU, generating a video with the above configuration takes approximately 90 seconds**
 If the generated model appears “all green” and not viewable in the default MAC player, it is a normal phenomenon (due to
 OpenCV saving video issues). Simply use a different player to view the video.

README_zh.md CHANGED Viewed

@@ -76,7 +76,7 @@ CogVideoX是 [清影](https://chatglm.cn/video) 同源的开源版本视频生
 | Model Name    | CogVideoX-2B (当前仓库) |
 |---------------|---------------------|
 | 提示词语言         | English             |
-| 推理显存消耗        | 21.6GB              |
 | 微调显存消耗 (bs=1) | 46.2GB              |
 | 提示词长度上限       | 226 Tokens          |
 | 视频生成长度        | 6 seconds           |
@@ -95,8 +95,8 @@ CogVideoX是 [清影](https://chatglm.cn/video) 同源的开源版本视频生
 1. 安装对应的依赖
 ```shell
-pip install --upgrade opencv-python transformers
-pip install git+https://github.com/huggingface/diffusers.git@32da2e7673cfe0475a47c41b859f5fbd8bf17a40#egg=diffusers # Still in PR
 ```
 2. 运行代码
@@ -131,6 +131,8 @@ video = pipe(
 export_to_video(video, "output.mp4", fps=8)
 ```
 如果您生成的模型在 MAC 默认播放器上表现为 "全绿" 无法正常观看，属于正常现象 (OpenCV保存视频问题)，仅需更换一个播放器观看。
 ## 深入研究

 | Model Name    | CogVideoX-2B (当前仓库) |
 |---------------|---------------------|
 | 提示词语言         | English             |
+| 推理显存消耗        | 36GB（会在PR合并之前优化)    |
 | 微调显存消耗 (bs=1) | 46.2GB              |
 | 提示词长度上限       | 226 Tokens          |
 | 视频生成长度        | 6 seconds           |
 1. 安装对应的依赖
 ```shell
+pip install --upgrade opencv-python transformers acc
+pip install git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers # Still in PR
 ```
 2. 运行代码
 export_to_video(video, "output.mp4", fps=8)
 ```
+**使用单卡A100按照上述配置生成一次视频大约需要90秒**。
 如果您生成的模型在 MAC 默认播放器上表现为 "全绿" 无法正常观看，属于正常现象 (OpenCV保存视频问题)，仅需更换一个播放器观看。
 ## 深入研究