Spaces:

USF00
/

Text_to_Video_Demo

Configuration error

App Files Files Community

USF00 commited on 18 days ago

Commit

2b67076

0 Parent(s):

Initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +47 -0
Custom Resolutions Instructions.txt +16 -0
Dockerfile +92 -0
LICENSE.txt +46 -0
README.md +266 -0
defaults/ReadMe.txt +13 -0
defaults/animate.json +17 -0
defaults/fantasy.json +11 -0
defaults/flf2v_720p.json +16 -0
defaults/flux.json +15 -0
defaults/flux_chroma.json +17 -0
defaults/flux_dev_kontext.json +16 -0
defaults/flux_dev_umo.json +23 -0
defaults/flux_dev_uso.json +16 -0
defaults/flux_krea.json +15 -0
defaults/flux_schnell.json +16 -0
defaults/flux_srpo.json +14 -0
defaults/flux_srpo_uso.json +16 -0
defaults/fun_inp.json +13 -0
defaults/fun_inp_1.3B.json +11 -0
defaults/hunyuan.json +12 -0
defaults/hunyuan_avatar.json +12 -0
defaults/hunyuan_custom.json +12 -0
defaults/hunyuan_custom_audio.json +12 -0
defaults/hunyuan_custom_edit.json +12 -0
defaults/hunyuan_i2v.json +12 -0
defaults/hunyuan_t2v_accvideo.json +30 -0
defaults/hunyuan_t2v_fast.json +32 -0
defaults/i2v.json +13 -0
defaults/i2v_2_2.json +25 -0
defaults/i2v_2_2_multitalk.json +18 -0
defaults/i2v_720p.json +14 -0
defaults/i2v_fusionix.json +11 -0
defaults/i2v_palingenesis_2_2.json +18 -0
defaults/infinitetalk.json +16 -0
defaults/infinitetalk_multi.json +16 -0
defaults/ltxv_13B.json +19 -0
defaults/ltxv_distilled.json +15 -0
defaults/lucy_edit.json +20 -0
defaults/lucy_edit_fastwan.json +17 -0
defaults/lynx.json +18 -0
defaults/moviigen.json +16 -0
defaults/multitalk.json +15 -0
defaults/multitalk_720p.json +13 -0
defaults/phantom_1.3B.json +11 -0
defaults/phantom_14B.json +13 -0
defaults/qwen_image_20B.json +21 -0
defaults/qwen_image_edit_20B.json +18 -0
defaults/qwen_image_edit_plus_20B.json +17 -0
defaults/recam_1.3B.json +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,47 @@

+.*
+*.py[cod]
+# *.jpg
+*.jpeg
+# *.png
+*.gif
+*.bmp
+*.mp4
+*.mov
+*.mkv
+*.log
+*.zip
+*.pt
+*.pth
+*.ckpt
+*.safetensors
+#*.json
+# *.txt
+*.backup
+*.pkl
+*.html
+*.pdf
+*.whl
+*.exe
+cache
+__pycache__/
+storage/
+samples/
+!.gitignore
+!requirements.txt
+.DS_Store
+*DS_Store
+google/
+Wan2.1-T2V-14B/
+Wan2.1-T2V-1.3B/
+Wan2.1-I2V-14B-480P/
+Wan2.1-I2V-14B-720P/
+outputs/
+outputs2/
+gradio_outputs/
+ckpts/
+loras/
+loras_i2v/
+settings/
+wgp_config.json

Custom Resolutions Instructions.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+You can override the choice of Resolutions offered by WanGP, if you create a file "resolutions.json" in the main WanGP folder.
+This file is composed of a list of 2 elements sublists. Each 2 elements sublist should have the format ["Label", "WxH"] where W, H are respectively the Width and Height of the resolution. Please make sure that W and H are multiples of 16. The letter "x" should be placed inbetween these two dimensions.
+Here is below a sample "resolutions.json" file :
+[
+	["1280x720 (16:9, 720p)", "1280x720"],
+	["720x1280 (9:16, 720p)", "720x1280"],
+	["1024x1024 (1:1, 720p)", "1024x1024"],
+	["1280x544 (21:9, 720p)", "1280x544"],
+	["544x1280 (9:21, 720p)", "544x1280"],
+	["1104x832 (4:3, 720p)", "1104x832"],
+	["832x1104 (3:4, 720p)", "832x1104"],
+    ["960x960 (1:1, 720p)", "960x960"],
+    ["832x480 (16:9, 480p)", "832x480"]
+]

Dockerfile ADDED Viewed

	@@ -0,0 +1,92 @@

+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+# Build arg for GPU architectures - specify which CUDA compute capabilities to compile for
+# Common values:
+#   7.0  - Tesla V100
+#   7.5  - RTX 2060, 2070, 2080, Titan RTX
+#   8.0  - A100, A800 (Ampere data center)
+#   8.6  - RTX 3060, 3070, 3080, 3090 (Ampere consumer)
+#   8.9  - RTX 4070, 4080, 4090 (Ada Lovelace)
+#   9.0  - H100, H800 (Hopper data center)
+#   12.0 - RTX 5070, 5080, 5090 (Blackwell) - Note: sm_120 architecture
+#
+# Examples:
+#   RTX 3060: --build-arg CUDA_ARCHITECTURES="8.6"
+#   RTX 4090: --build-arg CUDA_ARCHITECTURES="8.9"
+#   Multiple: --build-arg CUDA_ARCHITECTURES="8.0;8.6;8.9"
+#
+# Note: Including 8.9 or 9.0 may cause compilation issues on some setups
+# Default includes 8.0 and 8.6 for broad Ampere compatibility
+ARG CUDA_ARCHITECTURES="8.0;8.6"
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system dependencies
+RUN apt update && \
+    apt install -y \
+    python3 python3-pip git wget curl cmake ninja-build \
+    libgl1 libglib2.0-0 ffmpeg && \
+    apt clean
+WORKDIR /workspace
+COPY requirements.txt .
+# Upgrade pip first
+RUN pip install --upgrade pip setuptools wheel
+# Install requirements if exists
+RUN pip install -r requirements.txt
+# Install PyTorch with CUDA support
+RUN pip install --extra-index-url https://download.pytorch.org/whl/cu124 \
+    torch==2.6.0+cu124 torchvision==0.21.0+cu124
+# Install SageAttention from git (patch GPU detection)
+ENV TORCH_CUDA_ARCH_LIST="${CUDA_ARCHITECTURES}"
+ENV FORCE_CUDA="1"
+ENV MAX_JOBS="1"
+COPY <<EOF /tmp/patch_setup.py
+import os
+with open('setup.py', 'r') as f:
+    content = f.read()
+# Get architectures from environment variable
+arch_list = os.environ.get('TORCH_CUDA_ARCH_LIST')
+arch_set = '{' + ', '.join([f'"{arch}"' for arch in arch_list.split(';')]) + '}'
+# Replace the GPU detection section
+old_section = '''compute_capabilities = set()
+device_count = torch.cuda.device_count()
+for i in range(device_count):
+    major, minor = torch.cuda.get_device_capability(i)
+    if major < 8:
+        warnings.warn(f"skipping GPU {i} with compute capability {major}.{minor}")
+        continue
+    compute_capabilities.add(f"{major}.{minor}")'''
+new_section = 'compute_capabilities = ' + arch_set + '''
+print(f"Manually set compute capabilities: {compute_capabilities}")'''
+content = content.replace(old_section, new_section)
+with open('setup.py', 'w') as f:
+    f.write(content)
+EOF
+RUN git clone https://github.com/thu-ml/SageAttention.git /tmp/sageattention && \
+    cd /tmp/sageattention && \
+    python3 /tmp/patch_setup.py && \
+    pip install --no-build-isolation .
+RUN useradd -u 1000 -ms /bin/bash user
+RUN chown -R user:user /workspace
+RUN mkdir /home/user/.cache && \
+    chown -R user:user /home/user/.cache
+COPY entrypoint.sh /workspace/entrypoint.sh
+ENTRYPOINT ["/workspace/entrypoint.sh"]

LICENSE.txt ADDED Viewed

	@@ -0,0 +1,46 @@

+WanGP NON-COMMERCIAL EVALUATION LICENSE 1.0
+Definitions
+1.1 “Software” means the source code, binaries, libraries, utilities and UI released under this license.
+1.2 “Output” means images, videos or other media produced by running the Software.
+1.3 “Commercial Use” means:
+a) selling, sublicensing, renting, leasing, or otherwise distributing the Software, in whole or in part, for a fee or other consideration; or
+b) offering the Software (or any derivative) as part of a paid product or hosted service; or
+c) using the Software (or any derivative) to provide cloud-based or backend services, where end users access or pay for those services.
+License Grant
+Subject to Section 3:
+a) You are granted a worldwide, non-exclusive, royalty-free, revocable license to use, reproduce, modify and distribute the Software for non-commercial purposes only.
+b) You are granted a worldwide, non-exclusive, royalty-free, irrevocable license to use, reproduce, modify and distribute the Output for any purpose, including commercial sale, provided that any commercial distribution of the Output includes a clear notice that the Output was produced (in whole or in part) using WanGP, along with a hyperlink to the WanGP application’s About tab or repository.
+Restrictions
+3.1 You MAY NOT distribute, sublicense or otherwise make available the Software (or any derivative) for Commercial Use.
+3.2 You MAY sell, license or otherwise commercially exploit the Output without restriction.
+3.3 If you wish to use the Software for Commercial Use, you must obtain a separate commercial license from the Licensor.
+Third-Party Components 4.1 The Software includes components licensed under various open-source licenses (e.g., Apache 2.0, MIT, BSD). 4.2 You must comply with all applicable terms of those third-party licenses, including preservation of copyright notices, inclusion of required license texts, and patent-grant provisions. 4.3 You can find the full text of each third-party license via the “About” tab in the WanGP application, which provides links to their original GitHub repositories.
+Attribution
+5.1 You must give appropriate credit by including:
+• a copy of this license (or a link to it), and
+• a notice that your use is based on “WanGP”.
+5.2 You may do so in any reasonable manner, but not in any way that suggests the Licensor endorses you or your use.
+Disclaimer of Warranty & Liability
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE.
+Commercial Licensing The Licensor may offer commercial licenses for the Software, which grant rights to use the Software for Commercial Use. Please contact [[email protected]] for terms and pricing.
+Effective Date & Previous Versions
+8.1 This license is effective as of the date the LICENSE file is updated in the WanGP repository.
+8.2 Any copies of the Software obtained under prior license terms before this Effective Date remain governed by those prior terms; such granted rights are irrevocable.
+8.3 Use of the Software after the release of any subsequent version by the Licensor is subject to the terms of the then-current license, unless a separate agreement is in place.
+Acceptable Use / Moral Clause
+9.1 You MAY NOT use the Software or the Output to facilitate or produce content that is illegal, harmful, violent, harassing, defamatory, fraudulent, or otherwise violates applicable laws or fundamental human rights.
+9.2 You MAY NOT deploy the Software or Output in contexts that promote hate speech, extremist ideology, human rights abuses, or other actions that could foreseeably cause significant harm to individuals or groups.
+9.3 The Licensor reserves the right to terminate the rights granted under this license if a licensee materially breaches this Acceptable Use clause.
+END OF LICENSE

README.md ADDED Viewed

	@@ -0,0 +1,266 @@

+# WanGP
+-----
+<p align="center">
+<b>WanGP by DeepBeepMeep : The best Open Source Video Generative Models Accessible to the GPU Poor</b>
+</p>
+WanGP supports the Wan (and derived models), Hunyuan Video and LTV Video models with:
+- Low VRAM requirements (as low as 6 GB of VRAM is sufficient for certain models)
+- Support for old Nvidia GPUs (RTX 10XX, 20xx, ...)
+- Support for AMD GPUs Radeon RX 76XX, 77XX, 78XX & 79XX, instructions in the Installation Section Below.
+- Very Fast on the latest GPUs
+- Easy to use Full Web based interface
+- Auto download of the required model adapted to your specific architecture
+- Tools integrated to facilitate Video Generation : Mask Editor, Prompt Enhancer, Temporal and Spatial Generation, MMAudio, Video Browser, Pose / Depth / Flow extractor
+- Loras Support to customize each model
+- Queuing system : make your shopping list of videos to generate and come back later
+**Discord Server to get Help from Other Users and show your Best Videos:** https://discord.gg/g7efUW9jGV
+**Follow DeepBeepMeep on Twitter/X to get the Latest News**: https://x.com/deepbeepmeep
+## 🔥 Latest Updates :
+### October 6 2025: WanGP v8.994 - A few last things before the Big Unknown ...
+This new version hasn't any new model...
+...but temptation to upgrade will be high as it contains a few Loras related features that may change your Life:
+- **Ready to use Loras Accelerators Profiles** per type of model that you can apply on your current *Generation Settings*. Next time I will recommend a *Lora Accelerator*, it will be only one click away. And best of all of the required Loras will be downloaded automatically. When you apply an *Accelerator Profile*, input fields like the *Number of Denoising Steps* *Activated Loras*, *Loras Multipliers* (such as "1;0 0;1" ...) will be automatically filled. However your video specific fields will be preserved, so it will be easy to switch between Profiles to experiment. With *WanGP 8.993*, the *Accelerator Loras* are now merged with *Non Accelerator Loras". Things are getting too easy...
+- **Embedded Loras URL** : WanGP will now try to remember every Lora URLs it sees. For instance if someone sends you some settings that contain Loras URLs or you extract the Settings of Video generated by a friend with Loras URLs, these URLs will be automatically added to *WanGP URL Cache*. Conversely everything you will share (Videos, Settings, Lset files) will contain the download URLs if they are known. You can also download directly a Lora in WanGP by using the *Download Lora* button a the bottom. The Lora will be immediatly available and added to WanGP lora URL cache. This will work with *Hugging Face* as a repository. Support for CivitAi will come as soon as someone will nice enough to post a GitHub PR ...
+- **.lset file** supports embedded Loras URLs. It has never been easier to share a Lora with a friend. As a reminder a .lset file can be created directly from *WanGP Web Interface* and it contains a list of Loras and their multipliers, a Prompt and Instructions how to use these loras (like the Lora's *Trigger*). So with embedded Loras URL, you can send an .lset file by email or share it on discord: it is just a 1 KB tiny text, but with it other people will be able to use Gigabytes Loras as these will be automatically downloaded.
+I have created the new Discord Channel **share-your-settings** where you can post your *Settings* or *Lset files*. I will be pleased to add new Loras Accelerators in the list of WanGP *Accelerators Profiles if you post some good ones there.
+*With the 8.993 update*, I have added support for **Scaled FP8 format**. As a sample case, I have created finetunes for the **Wan 2.2 PalinGenesis** Finetune which is quite popular recently. You will find it in 3 flavors : *t2v*, *i2v* and *Lightning Accelerated for t2v*.
+The *Scaled FP8 format* is widely used as it the format used by ... *ComfyUI*. So I except a flood of Finetunes in the *share-your-finetune* channel. If not it means this feature was useless and I will remove it &#x1F608;&#x1F608;&#x1F608;
+Not enough Space left on your SSD to download more models ? Would like to reuse Scaled FP8 files in your ComfyUI Folder without duplicating them ? Here comes *WanGP 8.994* **Multiple Checkpoints Folders** : you just need to move the files into different folders / hard drives or reuse existing folders and let know WanGP about it in the *Config Tab* and WanGP will be able to put all the parts together.
+Last but not least the Lora's documentation has been updated.
+*update 8.991*: full power of *Vace Lynx* unleashed with new combinations such as Landscape + Face / Clothes + Face  / Injectd Frame (Start/End frames/...) + Face
+*update 8.992*: optimized gen with Lora, should be 10% faster if many loras
+*update 8.993*: Support for *Scaled FP8* format and samples *Paligenesis* finetunes, merged Loras Accelerators and Non Accelerators
+*update 8.994*: Added custom checkpoints folders
+### September 30 2025: WanGP v8.9 - Combinatorics
+This new version of WanGP introduces **Wan 2.1 Lynx** the best Control Net so far to transfer *Facial Identity*. You will be amazed to recognize your friends even with a completely different hair style. Congrats to the *Byte Dance team* for this achievement. Lynx works quite with well *Fusionix t2v* 10 steps.
+*WanGP 8.9* also illustrate how existing WanGP features can be easily combined with new models. For instance with *Lynx* you will get out of the box *Video to Video* and *Image/Text to Image*.
+Another fun combination is *Vace* + *Lynx*, which works much better than *Vace StandIn*. I have added sliders to change the weight of Vace & Lynx to allow you to tune the effects.
+### September 28 2025: WanGP v8.76 - ~~Here Are Two Three New Contenders in the Vace Arena !~~ The Never Ending Release
+So in ~~today's~~ this release you will find two Wannabe Vace that covers each only a subset of Vace features but offers some interesting advantages:
+- **Wan 2.2 Animate**: this model is specialized in *Body Motion* and *Facial Motion transfers*. It does that very well. You can use this model to either *Replace* a person in an in Video or *Animate* the person of your choice using an existing *Pose Video* (remember *Animate Anyone* ?). By default it will keep the original soundtrack. *Wan 2.2 Animate* seems to be under the hood a derived i2v model and should support the corresponding Loras Accelerators (for instance *FusioniX t2v*). Also as a WanGP exclusivity, you will find support for *Outpainting*.
+In order to use Wan 2.2 Animate you will need first to stop by the *Mat Anyone* embedded tool, to extract the *Video Mask* of the person from which you want to extract the motion.
+With version WanGP 8.74, there is an extra option that allows you to apply *Relighting* when Replacing a person. Also, you can now Animate a person without providing a Video Mask to target the source of the motion (with the risk it will be less precise)
+For those of you who have a mask halo effect when Animating a character I recommend trying *SDPA attention* and to use the *FusioniX i2v* lora. If this issue persists (this will depend on the control video) you have now a choice of the two *Animate Mask Options* in *WanGP 8.76*. The old masking option which was a WanGP exclusive has been renamed *See Through Mask* because the background behind the animated character was preserved but this creates sometime visual artifacts. The new option which has the shorter name is what you may find elsewhere online. As it uses internally a much larger mask, there is no halo. However the immediate background behind the character is not preserved and may end completely different.
+- **Lucy Edit**: this one claims to be a *Nano Banana* for Videos. Give it a video and asks it to change it (it is specialized in clothes changing) and voila ! The nice thing about it is that is it based on the *Wan 2.2 5B* model and therefore is very fast especially if you the *FastWan* finetune that is also part of the package.
+Also because I wanted to spoil you:
+- **Qwen Edit Plus**: also known as the *Qwen Edit 25th September Update* which is specialized in combining multiple Objects / People. There is also a new support for *Pose transfer* & *Recolorisation*. All of this made easy to use in WanGP. You will find right now only the quantized version since HF crashes when uploading the unquantized version.
+- **T2V Video 2 Video Masking**: ever wanted to apply a Lora, a process (for instance Upsampling) or a Text Prompt on only a (moving) part of a Source Video. Look no further, I have added *Masked Video 2 Video* (which works also in image2image) in the *Text 2 Video* models. As usual you just need to use *Matanyone* to creatre the mask.
+*Update 8.71*: fixed Fast Lucy Edit that didnt contain the lora
+*Update 8.72*: shadow drop of Qwen Edit Plus
+*Update 8.73*: Qwen Preview & InfiniteTalk Start image
+*Update 8.74*: Animate Relighting / Nomask mode , t2v Masked Video to Video
+*Update 8.75*: REDACTED
+*Update 8.76*: Alternate Animate masking that fixes the mask halo effect that some users have
+### September 15 2025: WanGP v8.6 - Attack of the Clones
+- The long awaited **Vace for Wan 2.2** is at last here or maybe not: it has been released by the *Fun Team* of *Alibaba* and it is not official. You can play with the vanilla version (**Vace Fun**) or with the one accelerated with Loras (**Vace Fan Cocktail**)
+- **First Frame / Last Frame for Vace** : Vace models are so powerful that they could do *First frame / Last frame* since day one using the *Injected Frames* feature. However this required to compute by hand the locations of each end frame since this feature expects frames positions. I made it easier to compute these locations by using the "L" alias :
+For a video Gen from scratch *"1 L L L"* means the 4 Injected Frames will be injected like this: frame no 1 at the first position, the next frame at the end of the first window, then the following frame at the end of the next window, and so on ....
+If you *Continue a Video* , you just need *"L L L"* since the first frame is the last frame of the *Source Video*. In any case remember that numeral frames positions (like "1") are aligned by default to the beginning of the source window, so low values such as 1 will be considered in the past unless you change this behaviour in *Sliding Window Tab/ Control Video, Injected Frames aligment*.
+- **Qwen Edit Inpainting** exists now in two versions: the original version of the previous release and a Lora based version. Each version has its pros and cons. For instance the Lora version supports also **Outpainting** ! However it tends to change slightly the original image even outside the outpainted area.
+- **Better Lipsync with all the Audio to Video models**: you probably noticed that *Multitalk*, *InfiniteTalk* or *Hunyuan Avatar* had so so lipsync when the audio provided contained some background music. The problem should be solved now thanks to an automated background music removal all done by IA. Don't worry you will still hear the music as it is added back in the generated Video.
+### September 11 2025: WanGP v8.5/8.55 - Wanna be a Cropper or a Painter ?
+I have done some intensive internal refactoring of the generation pipeline to ease support of existing models or add new models. Nothing really visible but this makes WanGP is little more future proof.
+Otherwise in the news:
+- **Cropped Input Image Prompts**: as quite often most *Image Prompts* provided (*Start Image, Input Video, Reference Image,  Control Video, ...*) rarely matched your requested *Output Resolution*. In that case I used the resolution you gave either as a *Pixels Budget* or as an *Outer Canvas* for the Generated Video. However in some occasion you really want the requested Output Resolution and nothing else. Besides some models deliver much better Generations if you stick to one of their supported resolutions. In order to address this need I have added a new Output Resolution choice in the *Configuration Tab*:  **Dimensions Correspond to the Ouput Weight & Height as the Prompt Images will be Cropped to fit Exactly these dimensins**. In short if needed the *Input Prompt Images* will be cropped (centered cropped for the moment). You will see this can make quite a difference for some models
+- *Qwen Edit* has now a new sub Tab called **Inpainting**, that lets you target with a brush which part of the *Image Prompt* you want to modify. This is quite convenient if you find that Qwen Edit modifies usually too many things. Of course, as there are more constraints for Qwen Edit don't be surprised if sometime it will return the original image unchanged. A piece of advise: describe in your *Text Prompt* where (for instance *left to the man*, *top*, ...) the parts that you want to modify are located.
+The mask inpainting is fully compatible with *Matanyone Mask generator*: generate first an *Image Mask* with Matanyone, transfer it to the current Image Generator and modify the mask with the *Paint Brush*. Talking about matanyone I have fixed a bug that caused a mask degradation with long videos (now WanGP Matanyone is as good as the original app and still requires 3 times less VRAM)
+- This **Inpainting Mask Editor** has been added also to *Vace Image Mode*. Vace is probably still one of best Image Editor today. Here is a very simple & efficient workflow that do marvels with Vace:
+Select *Vace Cocktail > Control Image Process = Perform Inpainting & Area Processed = Masked Area > Upload a Control Image, then draw your mask directly on top of the image & enter a text Prompt that describes the expected change > Generate > Below the Video Gallery click 'To Control Image' > Keep on doing more changes*.
+Doing more sophisticated thing Vace Image Editor works very well too: try Image Outpainting, Pose transfer, ...
+For the best quality I recommend to set in *Quality Tab* the option: "*Generate a 9 Frames Long video...*"
+**update 8.55**: Flux Festival
+- **Inpainting Mode** also added for *Flux Kontext*
+- **Flux SRPO** : new finetune with x3 better quality vs Flux Dev according to its authors. I have also created a *Flux SRPO USO* finetune which is certainly the best open source *Style Transfer* tool available
+- **Flux UMO**: model specialized in combining multiple reference objects / people together. Works quite well at 768x768
+Good luck with finding your way through all the Flux models names !
+### September 5 2025: WanGP v8.4 - Take me to Outer Space
+You have probably seen these short AI generated movies created using *Nano Banana* and the *First Frame - Last Frame* feature of *Kling 2.0*. The idea is to generate an image, modify a part of it with Nano Banana and give the these two images to Kling that will generate the Video between these two images, use now the previous Last Frame as the new First Frame, rinse and repeat and you get a full movie.
+I have made it easier to do just that with *Qwen Edit* and *Wan*:
+- **End Frames can now be combined with Continue a Video** (and not just a Start Frame)
+- **Multiple End Frames can be inputed**, each End Frame will be used for a different Sliding Window
+You can plan in advance all your shots (one shot = one Sliding Window) : I recommend using Wan 2.2 Image to Image with multiple End Frames (one for each shot / Sliding Window), and a different Text Prompt for each shot / Sliding Winow (remember to enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*)
+The results can quite be impressive. However, Wan 2.1 & 2.2 Image 2 Image are restricted to a single overlap frame when using Slide Windows, which means only one frame is reeused for the motion. This may be unsufficient if you are trying to connect two shots with fast movement.
+This is where *InfinitTalk* comes into play. Beside being one best models to generate animated audio driven avatars, InfiniteTalk uses internally more one than motion frames. It is quite good to maintain the motions between two shots. I have tweaked InfinitTalk so that **its motion engine can be used even if no audio is provided**.
+So here is how to use InfiniteTalk: enable *Sliding Windows/Text Prompts Will be used for a new Sliding Window of the same Video Generation*), and if you continue an existing Video  *Misc/Override Frames per Second" should be set to "Source Video*. Each Reference Frame inputed will play the same role as the End Frame except it wont be exactly an End Frame (it will correspond more to a middle frame, the actual End Frame will differ but will be close)
+You will find below a 33s movie I have created using these two methods. Quality could be much better as I havent tuned at all the settings (I couldn't bother, I used 10 steps generation without Loras Accelerators for most of the gens).
+### September 2 2025: WanGP v8.31 - At last the pain stops
+- This single new feature should give you the strength to face all the potential bugs of this new release:
+**Images Management (multiple additions or deletions, reordering) for Start Images / End Images / Images References.**
+- Unofficial **Video to Video (Non Sparse this time) for InfinitTalk**. Use the Strength Noise slider to decide how much motion of the original window you want to keep. I have also *greatly reduced the VRAM requirements for Multitalk / Infinitalk* (especially the multispeakers version & when generating at 1080p).
+- **Experimental Sage 3 Attention support**: you will need to deserve this one, first you need a Blackwell GPU (RTX50xx) and request an access to Sage 3 Github repo, then you will have to compile Sage 3, install it and cross your fingers ...
+*update 8.31: one shouldnt talk about bugs if one doesn't want to attract bugs*
+See full changelog: **[Changelog](docs/CHANGELOG.md)**
+## 📋 Table of Contents
+- [🚀 Quick Start](#-quick-start)
+- [📦 Installation](#-installation)
+- [🎯 Usage](#-usage)
+- [📚 Documentation](#-documentation)
+- [🔗 Related Projects](#-related-projects)
+## 🚀 Quick Start
+**One-click installation:**
+- Get started instantly with [Pinokio App](https://pinokio.computer/)
+- Use Redtash1 [One Click Install with Sage](https://github.com/Redtash1/Wan2GP-Windows-One-Click-Install-With-Sage)
+**Manual installation:**
+```bash
+git clone https://github.com/deepbeepmeep/Wan2GP.git
+cd Wan2GP
+conda create -n wan2gp python=3.10.9
+conda activate wan2gp
+pip install torch==2.7.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu128
+pip install -r requirements.txt
+```
+**Run the application:**
+```bash
+python wgp.py
+```
+**Update the application:**
+If using Pinokio use Pinokio to update otherwise:
+Get in the directory where WanGP is installed and:
+```bash
+git pull
+pip install -r requirements.txt
+```
+## 🐳 Docker:
+**For Debian-based systems (Ubuntu, Debian, etc.):**
+```bash
+./run-docker-cuda-deb.sh
+```
+This automated script will:
+- Detect your GPU model and VRAM automatically
+- Select optimal CUDA architecture for your GPU
+- Install NVIDIA Docker runtime if needed
+- Build a Docker image with all dependencies
+- Run WanGP with optimal settings for your hardware
+**Docker environment includes:**
+- NVIDIA CUDA 12.4.1 with cuDNN support
+- PyTorch 2.6.0 with CUDA 12.4 support
+- SageAttention compiled for your specific GPU architecture
+- Optimized environment variables for performance (TF32, threading, etc.)
+- Automatic cache directory mounting for faster subsequent runs
+- Current directory mounted in container - all downloaded models, loras, generated videos and files are saved locally
+**Supported GPUs:** RTX 40XX, RTX 30XX, RTX 20XX, GTX 16XX, GTX 10XX, Tesla V100, A100, H100, and more.
+## 📦 Installation
+### Nvidia
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/INSTALLATION.md)** - Complete setup instructions for RTX 10XX to RTX 50XX
+### AMD
+For detailed installation instructions for different GPU generations:
+- **[Installation Guide](docs/AMD-INSTALLATION.md)** - Complete setup instructions for Radeon RX 76XX, 77XX, 78XX & 79XX
+## 🎯 Usage
+### Basic Usage
+- **[Getting Started Guide](docs/GETTING_STARTED.md)** - First steps and basic usage
+- **[Models Overview](docs/MODELS.md)** - Available models and their capabilities
+### Advanced Features
+- **[Loras Guide](docs/LORAS.md)** - Using and managing Loras for customization
+- **[Finetunes](docs/FINETUNES.md)** - Add manually new models to WanGP
+- **[VACE ControlNet](docs/VACE.md)** - Advanced video control and manipulation
+- **[Command Line Reference](docs/CLI.md)** - All available command line options
+## 📚 Documentation
+- **[Changelog](docs/CHANGELOG.md)** - Latest updates and version history
+- **[Troubleshooting](docs/TROUBLESHOOTING.md)** - Common issues and solutions
+## 📚 Video Guides
+- Nice Video that explain how to use Vace:\
+https://www.youtube.com/watch?v=FMo9oN2EAvE
+- Another Vace guide:\
+https://www.youtube.com/watch?v=T5jNiEhf9xk
+## 🔗 Related Projects
+### Other Models for the GPU Poor
+- **[HuanyuanVideoGP](https://github.com/deepbeepmeep/HunyuanVideoGP)** - One of the best open source Text to Video generators
+- **[Hunyuan3D-2GP](https://github.com/deepbeepmeep/Hunyuan3D-2GP)** - Image to 3D and text to 3D tool
+- **[FluxFillGP](https://github.com/deepbeepmeep/FluxFillGP)** - Inpainting/outpainting tools based on Flux
+- **[Cosmos1GP](https://github.com/deepbeepmeep/Cosmos1GP)** - Text to world generator and image/video to world
+- **[OminiControlGP](https://github.com/deepbeepmeep/OminiControlGP)** - Flux-derived application for object transfer
+- **[YuE GP](https://github.com/deepbeepmeep/YuEGP)** - Song generator with instruments and singer's voice
+---
+<p align="center">
+Made with ❤️ by DeepBeepMeep
+</p>

defaults/ReadMe.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+Please dot not modify any file in this Folder.
+If you want to change a property of a default model, copy the corrresponding model file in the ./finetunes folder and modify the properties you want to change in the new file.
+If a property is not in the new file, it will be inherited automatically from the default file that matches the same name file.
+For instance to hide a model:
+{
+	"model":
+	{
+		"visible": false
+	}
+}

defaults/animate.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Animate 14B",
+        "architecture": "animate",
+        "description": "Wan-Animate takes a video and a character image as input, and generates a video in either 'Animation' or 'Replacement' mode. Sliding Window of 81 frames at least are recommeded to obtain the best Style continuity.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_fp16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_14B_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs" :
+		[
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_animate_relighting_lora.safetensors"
+		],
+        "group": "wan2_2"
+    }
+}

defaults/fantasy.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fantasy Talking 720p 14B",
+		"architecture" : "fantasy",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_fantasy_speaking_14B_bf16.safetensors"]],
+		"description": "The Fantasy Talking model corresponds to the original Wan image 2 video model combined with the Fantasy Speaking module to process an audio Input.",
+		"URLs": "i2v_720p"
+	},
+    "resolution": "1280x720"
+}

defaults/flf2v_720p.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+	"model":
+	{
+		"name": "First Last Frame to Video 720p (FLF2V) 14B",
+		"architecture" : "flf2v_720p",
+		"visible" : true,
+		"description": "The First Last Frame 2 Video model is the official model Image 2 Video model that supports Start and End frames.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_FLF2V_720p_14B_quanto_mfp16_int8.safetensors"
+		],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720"
+}

defaults/flux.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev 12B",
+        "architecture": "flux",
+        "description": "FLUX.1 Dev is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_chroma.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Flux 1 Chroma 1 HD 8.9B",
+        "architecture": "flux_chroma",
+        "description": "FLUX.1 Chroma is a 8.9 billion parameters model. As a base model, Chroma1 is intentionally designed to be an excellent starting point for finetuning. It provides a strong, neutral foundation for developers, researchers, and artists to create specialized models..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-chroma_hd_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+	"guidance_scale": 3.0,
+    "num_inference_steps": 20,
+    "batch_size": 1
+}

defaults/flux_dev_kontext.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Kontext 12B",
+        "architecture": "flux_dev_kontext",
+        "description": "FLUX.1 Kontext is a 12 billion parameter rectified flow transformer capable of editing images based on instructions stored in the Prompt. Please be aware that Flux Kontext is picky on the resolution of the input image and the output dimensions may not match the dimensions of the input image.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1_kontext_dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+	"prompt": "add a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_dev_umo.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+    "model": {
+        "name": "Flux 1 UMO Dev 12B",
+        "architecture": "flux_dev_umo",
+        "description": "FLUX.1 UMO Dev is a model that can Edit Images with a specialization in combining multiple image references (resized internally at 512x512 max) to produce an Image output. Best Image preservation at 768x768 Resolution Output.",
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-UMO_dit_lora_bf16.safetensors"],
+		"resolutions":  [ ["1024x1024 (1:1)", "1024x1024"],
+						["768x1024 (3:4)", "768x1024"],
+						["1024x768 (4:3)", "1024x768"],
+						["512x1024 (1:2)", "512x1024"],
+						["1024x512 (2:1)", "1024x512"],
+						["768x768 (1:1)", "768x768"],
+						["768x512 (3:2)", "768x512"],
+						["512x768 (2:3)", "512x768"]]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "768x768",
+    "batch_size": 1
+}

defaults/flux_dev_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO Dev 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO Dev is a model that can Edit Images with a specialization in Style Transfers (up to two).",
+		"modules": [ ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_projector_bf16.safetensors"]],
+        "URLs": "flux",
+		"loras": ["https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-dev-USO_dit_lora_bf16.safetensors"]
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_krea.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+    "model": {
+        "name": "Flux 1 Dev Krea 12B",
+        "architecture": "flux",
+        "description": "Cutting-edge output quality, with a focus on aesthetic photography..",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-krea-dev_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/flux_schnell.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 Schnell 12B",
+        "architecture": "flux_schnell",
+        "description": "FLUX.1 Schnell is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions. As a distilled model it requires fewer denoising steps.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-schnell_quanto_bf16_int8.safetensors"
+        ],
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "num_inference_steps": 10,
+    "batch_size": 1
+}

defaults/flux_srpo.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model": {
+        "name": "Flux 1 Dev SRPO 12B",
+        "architecture": "flux",
+        "description": "By fine-tuning the FLUX.1.dev model with optimized denoising and online reward adjustment, SRPO improves its human-evaluated realism and aesthetic quality by over 3x.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Flux/resolve/main/flux1-srpo-dev_quanto_bf16_int8.safetensors"
+        ]
+    },
+    "prompt": "draw a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/flux_srpo_uso.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Flux 1 USO SRPO 12B",
+        "architecture": "flux_dev_uso",
+        "description": "FLUX.1 USO SRPO is a model that can Edit Images with a specialization in Style Transfers (up to two). It leverages the improved Image quality brought by the SRPO process",
+		"modules": [ "flux_dev_uso"],
+        "URLs": "flux_srpo",
+		"loras": "flux_dev_uso"
+    },
+	"prompt": "the man is wearing a hat",
+	"embedded_guidance_scale": 4,
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/fun_inp.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 14B",
+		"architecture" : "fun_inp",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_14B_quanto_fp16_int8.safetensors"
+		]
+	}
+}

defaults/fun_inp_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Fun InP image2video 1.3B",
+		"architecture" : "fun_inp_1.3B",
+		"description": "The Fun model is an alternative image 2 video that supports out the box End Image fixing (contrary to the original Wan image 2 video model). The 1.3B adds also image 2 to video capability  to the 1.3B model.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_Fun_InP_1.3B_bf16.safetensors"
+		]
+	}
+}

defaults/hunyuan.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Text2video 720p 13B",
+		"architecture" : "hunyuan",
+		"description": "Probably the best text 2 video model available.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_720_quanto_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_avatar.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Avatar 720p 13B",
+		"architecture" : "hunyuan_avatar",
+		"description": "With the Hunyuan Video Avatar model you can animate a person based on the content of an audio input. Please note that the video generator works by processing 128 frames segment at a time (even if you ask less). The good news is that it will concatenate multiple segments for long video generation (max 3 segments recommended as the quality will get worse).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_avatar_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom 720p 13B",
+		"architecture" : "hunyuan_custom",
+		"description": "The Hunyuan Video Custom model is probably the best model to transfer people (only people for the moment) as it is quite good to keep their identity. However it is slow as to get good results, you need to generate 720p videos with 30 steps.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_audio.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Audio 720p 13B",
+		"architecture" : "hunyuan_custom_audio",
+		"description": "The Hunyuan Video Custom Audio model can be used to generate scenes of a person speaking given a Reference Image and a Recorded Voice or Song. The reference image is not a start image and therefore one can represent the person in a different context.The video length can be anything up to 10s. It is also quite good to generate no sound Video based on a person.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_audio_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_custom_edit.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Custom Edit 720p 13B",
+		"architecture" : "hunyuan_custom_edit",
+		"description": "The Hunyuan Video Custom Edit model can be used to do Video inpainting on a person (add accessories or completely replace the person). You will need in any case to define a Video Mask which will indicate which area of the Video should be edited.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_custom_edit_720_quanto_bf16_int8.safetensors"
+		]
+	}
+}

defaults/hunyuan_i2v.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+	"model":
+	{
+		"name": "Hunyuan Video Image2video 720p 13B",
+		"architecture" : "hunyuan_i2v",
+		"description": "A good looking image 2 video model, but not so good in prompt adherence.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_bf16v2.safetensors",
+			"https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/hunyuan_video_i2v_720_quanto_int8v2.safetensors"
+		]
+	}
+}

defaults/hunyuan_t2v_accvideo.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "model": {
+    "name": "Hunyuan Video Text2video 720p AccVideo 13B",
+    "architecture": "hunyuan",
+    "description": " AccVideo is a novel efficient distillation method to accelerate video diffusion models with synthetic datset. Our method is 8.5x faster than HunyuanVideo.",
+    "URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/accvideo_hunyuan_video_720_quanto_int8.safetensors"
+    ],
+    "preload_URLs": [
+    ],
+    "auto_quantize": true
+  },
+  "negative_prompt": "",
+  "resolution": "832x480",
+  "video_length": 81,
+  "seed": 42,
+  "num_inference_steps": 5,
+  "flow_shift": 7,
+  "embedded_guidance_scale": 6,
+  "repeat_generation": 1,
+  "loras_multipliers": "",
+  "temporal_upsampling": "",
+  "spatial_upsampling": "",
+  "RIFLEx_setting": 0,
+  "slg_start_perc": 10,
+  "slg_end_perc": 90,
+  "prompt_enhancer": "",
+  "activated_loras": [
+  ]
+}

defaults/hunyuan_t2v_fast.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "model": {
+    "name": "Hunyuan Video Text2video 720p FastHunyuan 13B",
+    "architecture": "hunyuan",
+    "description": "Fast Hunyuan is an accelerated HunyuanVideo model. It can sample high quality videos with 6 diffusion steps.",
+	"settings_dir": [ "" ],
+    "URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8.safetensors"
+    ],
+    "preload_URLs": [
+      "https://huggingface.co/DeepBeepMeep/HunyuanVideo/resolve/main/fast_hunyuan_video_720_quanto_int8_map.json"
+    ],
+    "auto_quantize": true
+  },
+  "negative_prompt": "",
+  "resolution": "832x480",
+  "video_length": 81,
+  "seed": 42,
+  "num_inference_steps": 6,
+  "flow_shift": 17,
+  "embedded_guidance_scale": 6,
+  "repeat_generation": 1,
+  "loras_multipliers": "",
+  "temporal_upsampling": "",
+  "spatial_upsampling": "",
+  "RIFLEx_setting": 0,
+  "slg_start_perc": 10,
+  "slg_end_perc": 90,
+  "prompt_enhancer": "",
+  "activated_loras": [
+  ]
+}

defaults/i2v.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 480p 14B",
+		"architecture" : "i2v",
+		"description": "The standard Wan Image 2 Video specialized to generate 480p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well)",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_480p_14B_quanto_mfp16_int8.safetensors"
+		]
+	}
+}

defaults/i2v_2_2.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+	"model":
+	{
+		"name": "Wan2.2 Image2video 14B",
+		"architecture" : "i2v_2_2",
+		"description": "Wan 2.2 Image 2 Video model. Contrary to the Wan Image2video 2.1 this model is structurally close to the t2v model. You will need consequently to store Loras for this model in the t2v Lora Folder.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_high_quanto_mfp16_int8.safetensors"
+		],
+		"URLs2": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_image2video_14B_low_quanto_mfp16_int8.safetensors"
+		],
+		"group": "wan2_2"
+	},
+	"guidance_phases": 2,
+	"switch_threshold" : 900,
+	"guidance_scale" : 3.5,
+	"guidance2_scale" : 3.5,
+	"flow_shift" : 5
+}

defaults/i2v_2_2_multitalk.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+	"model":
+	{
+		"name": "Wan2.2 Multitalk 14B",
+		"architecture" : "i2v_2_2_multitalk",
+		"description": "The Multitalk module of Wan 2.1 has been combined with the Wan 2.2 image 2 video. It lets you have up to two people have a conversation.",
+		"modules": ["multitalk"],
+		"URLs": "i2v_2_2",
+		"URLs2": "i2v_2_2",
+		"group": "wan2_2",
+		"visible": false
+	},
+	"switch_threshold" : 900,
+	"guidance_scale" : 3.5,
+	"guidance2_scale" : 3.5,
+	"flow_shift" : 5
+}

defaults/i2v_720p.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 720p 14B",
+		"architecture" : "i2v",
+		"description": "The standard Wan Image 2 Video specialized to generate 720p images. It also offers Start and End Image support (End Image is not supported in the original model but seems to work well).",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_image2video_720p_14B_quanto_mfp16_int8.safetensors"
+		]
+	},
+    "resolution": "1280x720"
+}

defaults/i2v_fusionix.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Wan2.1 Image2video 480p FusioniX 14B",
+		"architecture" : "i2v",
+		"description": "A powerful merged image-to-video model based on the original WAN 2.1 I2V model, enhanced using multiple open-source components and LoRAs to boost motion realism, temporal consistency, and expressive detail.",
+		"URLs": "i2v",
+		"settings_dir": [ "" ],
+		"loras": ["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/loras_accelerators/Wan2.1_I2V_14B_FusionX_LoRA.safetensors"]
+	}
+}

defaults/i2v_palingenesis_2_2.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+	"model":
+	{
+		"name": "Wan2.2 Image2video Palingenesis 14B",
+		"architecture" : "i2v_2_2",
+		"description": "Wan 2.2 Image 2 Video model. Contrary to the Wan Image2video 2.1 this model is structurally close to the t2v model. Palingenesis a finetune praised for its high quality.",
+		"URLs": [ "https://huggingface.co/eddy1111111/WAN22.XX_Palingenesis/resolve/main/WAN22.XX_Palingenesis_high_i2v_fix.safetensors"],
+		"URLs2": [ "https://huggingface.co/eddy1111111/WAN22.XX_Palingenesis/resolve/main/WAN22.XX_Palingenesis_low_i2v_fix.safetensors"],
+		"group": "wan2_2"
+	},
+	"ignore_unused_weights": true,
+	"guidance_phases": 2,
+	"switch_threshold" : 900,
+	"guidance_scale" : 3.5,
+	"guidance2_scale" : 3.5,
+	"flow_shift" : 5
+}

defaults/infinitetalk.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Infinitetalk Single Speaker 480p 14B",
+        "architecture": "infinitetalk",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_mbf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mbf16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_single_14B_quanto_mfp16_int8.safetensors"
+            ]
+        ],
+        "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the single speaker version. Sliding Window size must be 81 frames to get smooth transitions between shots.",
+		"one_speaker_only": true,
+        "URLs": "i2v"
+    }
+}

defaults/infinitetalk_multi.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "model": {
+        "name": "Infinitetalk Multi Speakers 480p 14B",
+        "architecture": "infinitetalk",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_mbf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mfp16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_infinitetalk_multi_14B_quanto_mbf16_int8.safetensors"
+            ]
+        ],
+        "description": "The Infinitetalk model is an improved version of Multitalk that supports very long videos. This is the multi speakers version.Sliding Window size must be 81 frames to get smooth transitions between shots",
+		"multi_speakers_only": true,
+        "URLs": "i2v"
+    }
+}

defaults/ltxv_13B.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+	"model":
+	{
+		"name": "LTX Video 0.9.8 13B",
+		"architecture" : "ltxv_13B",
+		"description": "LTX Video is a fast model that can be used to generate very very long videos (up to 1800 frames !).It is recommended to keep the number of steps to 30 or you will need to update the file 'ltxv_video/configs/ltxv-13b-0.9.8-dev.yaml'.The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_dev_quanto_bf16_int8.safetensors"
+		],
+		"preload_URLs" : [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-pose-control-diffusers.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-depth-control-diffusers.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv-097-ic-lora-canny-control-diffusers.safetensors"
+		],
+		"LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-dev.yaml"
+	},
+	"num_inference_steps": 30
+}

defaults/ltxv_distilled.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+	"model":
+	{
+		"name": "LTX Video 0.9.8 Distilled 13B",
+		"architecture" : "ltxv_13B",
+		"description": "LTX Video is a fast model that can be used to generate very long videos (up to 1800 frames !).This distilled version is a very fast version and retains a high level of quality. The LTX Video model expects very long prompts, so don't hesitate to use the Prompt Enhancer.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_bf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/LTX_Video/resolve/main/ltxv_0.9.8_13B_distilled_quanto_bf16_int8.safetensors"
+			],
+		"preload_URLs" : "ltxv_13B",
+		"LTXV_config": "models/ltx_video/configs/ltxv-13b-0.9.8-distilled.yaml"
+	},
+	"num_inference_steps": 6
+}

defaults/lucy_edit.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+    "model": {
+        "name": "Wan2.2 Lucy Edit 5B",
+        "architecture": "lucy_edit",
+        "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_mbf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mbf16_int8.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Wan2.2/resolve/main/wan2.2_lucy_edit_quanto_mfp16_int8.safetensors"
+        ],
+		"settings_dir": "ti2v_2_2",
+        "group": "wan2_2"
+    },
+	"prompt": "change the clothes to red",
+    "video_length": 81,
+    "guidance_scale": 5,
+    "flow_shift": 5,
+    "num_inference_steps": 30,
+    "resolution": "1280x720"
+}

defaults/lucy_edit_fastwan.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Wan2.2 Lucy Edit FastWan 5B",
+        "architecture": "lucy_edit",
+        "description": "Lucy Edit is a video editing model that performs instruction-guided edits on videos using free-text prompts. It supports a variety of edits, such as clothing & accessory changes, character changes, object insertions, and scene replacements while preserving the motion and composition perfectly. This is the FastWan version for faster generation.",
+        "URLs": "lucy_edit",
+        "group": "wan2_2",
+		"settings_dir": [ "" ],
+		"loras": "ti2v_2_2_fastwan"
+    },
+	"prompt": "change the clothes to red",
+    "video_length": 81,
+    "guidance_scale": 1,
+    "flow_shift": 3,
+    "num_inference_steps": 5,
+    "resolution": "1280x720"
+}

defaults/lynx.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "Wan2.1 Lynx 14B",
+        "modules": [
+            [
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_bf16.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_bf16_int8.safetensors",
+                "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_module_14B_quanto_fp16_int8.safetensors"
+            ]
+        ],
+        "architecture": "lynx",
+        "description": "The Lynx ControlNet offers State of the Art Identity Preservation. You need to provide a Reference Image which is a close up of a person face to transfer this person in the Video.",
+        "URLs": "t2v",
+        "preload_URLs": [
+            "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_lynx_full_arc_resampler.safetensors"
+        ]
+    }
+}

defaults/moviigen.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+	"model":
+	{
+		"name": "MoviiGen 1080p 14B",
+		"architecture" : "t2v",
+		"description": "MoviiGen 1.1, a cutting-edge video generation model that excels in cinematic aesthetics and visual quality. Use it to generate videos in 720p or 1080p in the 21:9 ratio.",
+		"URLs": [
+             "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_mbf16.safetensors",
+			 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mbf16_int8.safetensors",
+			 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_moviigen1.1_14B_quanto_mfp16_int8.safetensors"
+		],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720",
+    "video_length": 81
+}

defaults/multitalk.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+	"model":
+	{
+		"name": "Multitalk 480p 14B",
+		"architecture" : "multitalk",
+		"modules": [
+					["https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_mbf16.safetensors",
+					 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mbf16_int8.safetensors",
+					 "https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_multitalk_14B_quanto_mfp16_int8.safetensors"]
+		],
+		"description": "The Multitalk model corresponds to the original Wan image 2 video model combined with the Multitalk module. It lets you have up to two people have a conversation.",
+		"URLs": "i2v",
+		"teacache_coefficients" : [-3.02331670e+02,  2.23948934e+02, -5.25463970e+01,  5.87348440e+00, -2.01973289e-01]
+	}
+}

defaults/multitalk_720p.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Multitalk 720p 14B",
+		"architecture" : "multitalk",
+		"modules": ["multitalk"],
+		"description": "The Multitalk model corresponds to the original Wan image 2 video 720p model combined with the Multitalk module. It lets you have up to two people have a conversation.",
+		"URLs": "i2v_720p",
+		"teacache_coefficients" : [-114.36346466,   65.26524496,  -18.82220707,    4.91518089,   -0.23412683],
+		"auto_quantize": true
+	},
+    "resolution": "1280x720"
+}

defaults/phantom_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "Phantom 1.3B",
+		"architecture" : "phantom_1.3B",
+		"description": "The Phantom model is specialized in transferring people or objects of your choice into a generated Video. It produces very nice results when used at 720p.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2_1_phantom_1.3B_mbf16.safetensors"
+		]
+	}
+}

defaults/phantom_14B.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+	"model":
+	{
+		"name": "Phantom 14B",
+		"architecture" : "phantom_14B",
+		"description": "The Phantom model is specialized in transferring people or objects of your choice into a generated Video. It produces very nice results when used at 720p.",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_phantom_14B_mbf16.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_phantom_14B_quanto_mbf16_int8.safetensors",
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_phantom_14B_quanto_mfp16_int8.safetensors"
+		]
+	}
+}

defaults/qwen_image_20B.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "model": {
+        "name": "Qwen Image 20B",
+        "architecture": "qwen_image_20B",
+        "description": "Qwen Image is generative model that will generate very high quality images. It is one of the few models capable to generate in the image very long texts.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_20B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_20B_quanto_bf16_int8.safetensors"
+        ],
+		"xresolutions":  [ ["1328x1328 (1:1)", "1328x1328"],
+						["1664x928 (16:9)", "1664x928"],
+						["928x1664 (9:16)", "928x1664"],
+						["1472x1140 (4:3)", "1472x1140"],
+						["1140x1472 (3:4)", "1140x1472"]],
+		"attention": {"<89" : "sdpa"},
+        "image_outputs": true
+    },
+    "prompt": "draw a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/qwen_image_edit_20B.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "model": {
+        "name": "Qwen Image Edit 20B",
+        "architecture": "qwen_image_edit_20B",
+        "description": "Qwen Image Edit is a generative model that can generate very high quality images with long texts in it. Best results will be at 720p. Use it to edit a Subject or combine multiple Subjects. ",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_bf16.safetensors",
+            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_20B_quanto_bf16_int8.safetensors"
+        ],
+		"preload_URLs": ["https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_inpainting.safetensors"],
+        "attention": {
+            "<89": "sdpa"
+        }
+    },
+    "prompt": "add a hat",
+    "resolution": "1280x720",
+    "batch_size": 1
+}

defaults/qwen_image_edit_plus_20B.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "model": {
+        "name": "Qwen Image Edit Plus 20B",
+        "architecture": "qwen_image_edit_plus_20B",
+        "description": "Qwen Image Edit Plus is a generative model that can generate very high quality images with long texts in it. Best results will be at 720p. This model is optimized to combine multiple Subjects & Objects.",
+        "URLs": [
+            "https://huggingface.co/DeepBeepMeep/Qwen_image/resolve/main/qwen_image_edit_plus_20B_quanto_bf16_int8.safetensors"
+        ],
+        "preload_URLs": "qwen_image_edit_20B",
+        "attention": {
+            "<89": "sdpa"
+        }
+    },
+    "prompt": "add a hat",
+    "resolution": "1024x1024",
+    "batch_size": 1
+}

defaults/recam_1.3B.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"model":
+	{
+		"name": "ReCamMaster 1.3B",
+		"architecture" : "recam_1.3B",
+		"description": "The Recam Master in theory should allow you to replay a video by applying a different camera movement. The model supports only video that are at least 81 frames long (any frame beyond will be ignored)",
+		"URLs": [
+			"https://huggingface.co/DeepBeepMeep/Wan2.1/resolve/main/wan2.1_recammaster_1.3B_bf16.safetensors"
+		]
+	}
+}