# Core AI stack diffusers==0.34.0 transformers==4.53.1 tokenizers>=0.20.3 accelerate>=1.1.1 tqdm imageio imageio-ffmpeg einops sentencepiece open_clip_torch>=2.29.0 # Video & media moviepy==1.0.3 av ffmpeg-python pygame>=2.1.0 sounddevice>=0.4.0 soundfile mutagen pyloudnorm librosa==0.11.0 speechbrain==1.0.3 audio-separator==0.36.1 # UI & interaction gradio==5.29.0 dashscope loguru # Vision & segmentation opencv-python>=4.12.0.88 segment-anything rembg[gpu]==2.0.65 onnxruntime-gpu==1.22 decord timm insightface @ https://github.com/deepbeepmeep/insightface/raw/refs/heads/master/wheels/insightface-0.7.3-cp310-cp310-win_amd64.whl ; sys_platform == "win32" and python_version == "3.10" insightface==0.7.3 ; sys_platform == "linux" facexlib==0.3.0 # Config & orchestration omegaconf hydra-core easydict pydantic==2.10.6 # Math & modeling torchdiffeq>=0.2.5 tensordict>=0.6.1 mmgp==3.6.1 peft==0.15.0 matplotlib # Utilities ftfy piexif nvidia-ml-py misaki # Optional / commented out # transformers==4.46.3 # for llamallava pre-patch # rembg==2.0.65 # non-GPU fallback # huggingface_hub[hf_xet] # slows down everything # num2words # spacy