Spaces:

jbilcke-hf
/

VideoModelStudio

Paused

File size: 2,664 Bytes

c550ede
829df0b
7e21e7f
 
 
 
 
 
 
 
 
 
 
 
 
9d0a236
 
7e21e7f
9d0a236
 
 
 
 
 
afaa1bd
9d0a236
afaa1bd
9d0a236
 
 
 
829df0b
 
6a27da1
9d0a236
c550ede
0cbf47d
 
 
2fea0cc
 
 
68220ff
 
 
 
d31b788
 
0cbf47d
efa1d68
c499b99
aa773d2
5069ba3
 
2d4beed
6f7875d
 
91fb4ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
871b95b
 
0cbf47d
 
91fb4ef
 
6472a9e
f20bd02
364fc34
f20bd02

--find-links https://download.pytorch.org/whl/torch_stable.html

# we seem to have an issue with Torch 2.8
# I believe it works but it is incompatible with older weights formats?
# it looks like they changed the checkpoint format or something
# python3.10/site-packages/torch/distributed/checkpoint/default_planner.py", line 471, in create_default_local_load_plan
# RuntimeError: Missing key in checkpoint state_dict: lr_scheduler._is_initial.
#
#torch==2.8.0
#torchvision==0.23.0
#torchdata==0.11.0
#torchao==0.12.0
#torchcodec
# flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
#
# if we revert back to torch 2.7, then we get another error:
#  Missing key in checkpoint state_dict: optimizer.param_groups.scale_shift_table.decoupled_weight_decay.
#
#torch==2.7.1
#torchvision==0.22.1
#torchdata==0.11.0
#torchao==0.12.0
#torchcodec==0.5.0
#flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.7cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
#
# so in the end, we have to revert back to the 2.6:
#
torch==2.6.0
torchvision==0.21.0
torchdata==0.10.1
torchao==0.9.0

# for torch 2.6, we must use torchcodec 0.2
torchcodec==0.2.1 --index-url=https://download.pytorch.org/whl/cu128
flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl

# something broke in Transformers > 4.55.4
transformers==4.55.4

# For GPU monitoring of NVIDIA chipsets
pynvml

# Pin datasets to 3.6.0 to avoid VideoDecoder issues with 4.0.0
# see https://github.com/huggingface/finetrainers/issues/424#issuecomment-3255342554
datasets==3.6.0

# we are waiting for the next PyPI release
#finetrainers==0.1.0
finetrainers @ git+https://github.com/huggingface/finetrainers.git@main
# temporary fix for pip install bug:
#finetrainers @ git+https://github.com/jbilcke-hf/finetrainers-patches.git@fix_missing_sft_trainer_files

# it is recommended to always use the latest version
diffusers @ git+https://github.com/huggingface/diffusers.git@main

imageio
imageio-ffmpeg

# for youtube video download
pytube
pytubefix

# for scene splitting
scenedetect[opencv]

# for llava video / captionning
pillow
pillow-avif-plugin
polars
einops
open_clip_torch
av==14.1.0

# for some reason LLaVA-NeXT has ceased to work,
# but I think it it due to a breaking change in Transformers
git+https://github.com/LLaVA-VL/LLaVA-NeXT.git

# for our frontend
gradio==5.33.1
gradio_toggle
gradio_modal

# used for the monitor
matplotlib