Upload folder using huggingface_hub
Browse files- .gitattributes +7 -0
- README.md +53 -0
- README_from_modelscope.md +82 -0
- assets/image_1_full.jpg +3 -0
- assets/image_1_original.jpg +0 -0
- assets/image_1_ours.jpg +3 -0
- assets/image_2_full.jpg +3 -0
- assets/image_2_original.jpg +0 -0
- assets/image_2_ours.jpg +3 -0
- assets/image_3_full.jpg +3 -0
- assets/image_3_original.jpg +3 -0
- assets/image_3_ours.jpg +3 -0
- assets/prompts.txt +4 -0
- configuration.json +1 -0
- model.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/image_1_full.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
assets/image_1_ours.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
assets/image_2_full.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
assets/image_2_ours.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
assets/image_3_full.jpg filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
assets/image_3_original.jpg filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
assets/image_3_ours.jpg filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
---
|
| 4 |
+
# Qwen-Image LoRA Distillation Acceleration Model
|
| 5 |
+
|
| 6 |
+
## Model Introduction
|
| 7 |
+
|
| 8 |
+
This model is a distilled and accelerated LoRA version of [Qwen-Image](https://www.modelscope.cn/models/Qwen/Qwen-Image). We follow the same training procedure as used in [DiffSynth-Studio/Qwen-Image-Distill-Full](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-Full), but replace the trainable model parameters with LoRA, making it easier to integrate into various image generation frameworks.
|
| 9 |
+
|
| 10 |
+
The training framework is built on [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio). The training data consists of 16,000 images generated by the original model using randomly sampled prompts from [DiffusionDB](https://www.modelscope.cn/datasets/AI-ModelScope/diffusiondb). The training process ran for approximately one day on 8 * MI308X GPUs.
|
| 11 |
+
|
| 12 |
+
## Performance Comparison
|
| 13 |
+
|
| 14 |
+
||Original Model|Original Model|Accelerated Model|
|
| 15 |
+
|-|-|-|-|
|
| 16 |
+
|Inference Steps|40|15|15|
|
| 17 |
+
|CFG Scale|4|1|1|
|
| 18 |
+
|Forward Passes|80|15|15|
|
| 19 |
+
|Example 1||||
|
| 20 |
+
|Example 2||||
|
| 21 |
+
|Example 3||||
|
| 22 |
+
|
| 23 |
+
## Inference Code
|
| 24 |
+
|
| 25 |
+
```shell
|
| 26 |
+
git clone https://github.com/modelscope/DiffSynth-Studio.git
|
| 27 |
+
cd DiffSynth-Studio
|
| 28 |
+
pip install -e .
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
```python
|
| 32 |
+
from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
|
| 33 |
+
from modelscope import snapshot_download
|
| 34 |
+
import torch
|
| 35 |
+
|
| 36 |
+
snapshot_download("DiffSynth-Studio/Qwen-Image-Distill-LoRA", local_dir="models/DiffSynth-Studio/Qwen-Image-Distill-LoRA")
|
| 37 |
+
pipe = QwenImagePipeline.from_pretrained(
|
| 38 |
+
torch_dtype=torch.bfloat16,
|
| 39 |
+
device="cuda",
|
| 40 |
+
model_configs=[
|
| 41 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
|
| 42 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
|
| 43 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
| 44 |
+
],
|
| 45 |
+
tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
|
| 46 |
+
)
|
| 47 |
+
pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Distill-LoRA/model.safetensors")
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
prompt = "Exquisite portrait, underwater girl, flowing blue dress, gently floating hair, translucent lighting, surrounded by bubbles, serene expression, intricate details, dreamy and ethereal."
|
| 51 |
+
image = pipe(prompt, seed=0, num_inference_steps=15, cfg_scale=1)
|
| 52 |
+
image.save("image.jpg")
|
| 53 |
+
```
|
README_from_modelscope.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
base_model: Qwen/Qwen-Image
|
| 3 |
+
frameworks:
|
| 4 |
+
- Pytorch
|
| 5 |
+
license: Apache License 2.0
|
| 6 |
+
tags:
|
| 7 |
+
- LoRA
|
| 8 |
+
vision_foundation: QWEN_IMAGE_20_B
|
| 9 |
+
|
| 10 |
+
#model-type:
|
| 11 |
+
##如 gpt、phi、llama、chatglm、baichuan 等
|
| 12 |
+
#- gpt
|
| 13 |
+
|
| 14 |
+
#domain:
|
| 15 |
+
##如 nlp、cv、audio、multi-modal
|
| 16 |
+
#- nlp
|
| 17 |
+
|
| 18 |
+
#language:
|
| 19 |
+
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
| 20 |
+
#- cn
|
| 21 |
+
|
| 22 |
+
#metrics:
|
| 23 |
+
##如 CIDEr、Blue、ROUGE 等
|
| 24 |
+
#- CIDEr
|
| 25 |
+
|
| 26 |
+
#tags:
|
| 27 |
+
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
| 28 |
+
#- pretrained
|
| 29 |
+
|
| 30 |
+
#tools:
|
| 31 |
+
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
| 32 |
+
#- vllm
|
| 33 |
+
---
|
| 34 |
+
# Qwen-Image LoRA 蒸馏加速模型
|
| 35 |
+
|
| 36 |
+
## 模型介绍
|
| 37 |
+
|
| 38 |
+
本模型是 [Qwen-Image](https://www.modelscope.cn/models/Qwen/Qwen-Image) 的蒸馏加速 LoRA,我们沿用了模型 [DiffSynth-Studio/Qwen-Image-Distill-Full](https://modelscope.cn/models/DiffSynth-Studio/Qwen-Image-Distill-Full) 的训练流程,将可训练模型参数改为 LoRA,从而更方便地集成到各类图像生成框架中。
|
| 39 |
+
|
| 40 |
+
训练框架基于 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) 构建,训练数据是由原模型根据 [DiffusionDB](https://www.modelscope.cn/datasets/AI-ModelScope/diffusiondb) 中随机抽取的提示词生成的 1.6 万张图,训练程序在 8 * MI308X GPU 上运行了约 1 天。
|
| 41 |
+
|
| 42 |
+
## 效果展示
|
| 43 |
+
|
| 44 |
+
||原版模型|原版模型|加速模型|
|
| 45 |
+
|-|-|-|-|
|
| 46 |
+
|推理步数|40|15|15|
|
| 47 |
+
|CFG scale|4|1|1|
|
| 48 |
+
|前向推理次数|80|15|15|
|
| 49 |
+
|样例1||||
|
| 50 |
+
|样例2||||
|
| 51 |
+
|样例3||||
|
| 52 |
+
|
| 53 |
+
## 推理代码
|
| 54 |
+
|
| 55 |
+
```shell
|
| 56 |
+
git clone https://github.com/modelscope/DiffSynth-Studio.git
|
| 57 |
+
cd DiffSynth-Studio
|
| 58 |
+
pip install -e .
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
```python
|
| 62 |
+
from diffsynth.pipelines.qwen_image import QwenImagePipeline, ModelConfig
|
| 63 |
+
from modelscope import snapshot_download
|
| 64 |
+
import torch
|
| 65 |
+
|
| 66 |
+
snapshot_download("DiffSynth-Studio/Qwen-Image-Distill-LoRA", local_dir="models/DiffSynth-Studio/Qwen-Image-Distill-LoRA")
|
| 67 |
+
pipe = QwenImagePipeline.from_pretrained(
|
| 68 |
+
torch_dtype=torch.bfloat16,
|
| 69 |
+
device="cuda",
|
| 70 |
+
model_configs=[
|
| 71 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="transformer/diffusion_pytorch_model*.safetensors"),
|
| 72 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="text_encoder/model*.safetensors"),
|
| 73 |
+
ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
|
| 74 |
+
],
|
| 75 |
+
tokenizer_config=ModelConfig(model_id="Qwen/Qwen-Image", origin_file_pattern="tokenizer/"),
|
| 76 |
+
)
|
| 77 |
+
pipe.load_lora(pipe.dit, "models/DiffSynth-Studio/Qwen-Image-Distill-LoRA/model.safetensors")
|
| 78 |
+
|
| 79 |
+
prompt = "精致肖像,水下少女,蓝裙飘逸,发丝轻扬,光影透澈,气泡环绕,面容恬静,细节精致,梦幻唯美。"
|
| 80 |
+
image = pipe(prompt, seed=0, num_inference_steps=15, cfg_scale=1)
|
| 81 |
+
image.save("image.jpg")
|
| 82 |
+
```
|
assets/image_1_full.jpg
ADDED
|
Git LFS Details
|
assets/image_1_original.jpg
ADDED
|
assets/image_1_ours.jpg
ADDED
|
Git LFS Details
|
assets/image_2_full.jpg
ADDED
|
Git LFS Details
|
assets/image_2_original.jpg
ADDED
|
assets/image_2_ours.jpg
ADDED
|
Git LFS Details
|
assets/image_3_full.jpg
ADDED
|
Git LFS Details
|
assets/image_3_original.jpg
ADDED
|
Git LFS Details
|
assets/image_3_ours.jpg
ADDED
|
Git LFS Details
|
assets/prompts.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
动漫风格,一个漂亮的少女在教室里,身后右边的黑板上写着“Qwen-Image-Distill 更快速的生图”以及“DiffSynth-Studio Team”
|
| 2 |
+
精致肖像,水下少女,蓝裙飘逸,发丝轻扬,光影透澈,气泡环绕,面容恬静,细节精致,梦幻唯美。
|
| 3 |
+
唯美动漫画面,一位二次元美少女,坐在公园的长椅上,落日的霞光洒在少女脸上,少女露出动人的微笑,整体色调为橙色
|
| 4 |
+
绿意盎然的森林间,皮克斯风2.5D渲染,一辆小车悠然驶过辽阔草原,光影柔和,画面温暖梦幻。
|
configuration.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"aigc_model":true,"framework":"Pytorch","model_file_location":"model.safetensors"}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5c81b6f8d8560634481d6ac05b4a0ac49b099cb718cd204591affb7bc2aee65
|
| 3 |
+
size 472047152
|