ruslanmv's picture
First commit
239225b
# ================================================================
# Makefile — AI Story Server (Python 3.11)
# ================================================================
# Common usage:
# make help
# make install # CPU-friendly install
# make install-cuda # build llama-cpp-python with CUDA/cuBLAS offload
# make precache # download models + compute voice latents once
# make run # run the Gradio app (prefers GPU if available)
# make clean # clean caches (keeps venv)
# make deepclean # remove venv + caches
# ---------------------------------------------------------------
# ---- Configurable vars ----
PYTHON ?= python3.11
VENV ?= .venv
PY := $(VENV)/bin/python
PIP := $(VENV)/bin/pip
APP ?= app.py
PORT ?= 7860
# Core runtime deps (CPU-safe). Torch comes via transitive deps where needed;
# you may pin torch externally if required by your environment.
REQS = \
"numpy<2" \
"gradio==4.27.0" \
"python-dotenv" \
"huggingface_hub" \
"ffmpeg-python" \
"nltk" \
"emoji" \
"langid" \
"noisereduce" \
"TTS" \
"llama-cpp-python>=0.2.90"
# Dev tools (optional)
DEV_REQS = \
"ruff" \
"black" \
"pip-tools"
# ================================================================
# Meta
# ================================================================
.PHONY: help venv install install-no-llama install-cuda install-dev \
precache run run-gpu check-ffmpeg check-python lint format \
freeze deps-update clean deepclean
help:
@echo "Targets:"
@echo " install - Create venv (Python 3.11) and install CPU-safe deps"
@echo " install-cuda - Build llama-cpp-python with CUDA/cuBLAS offload + install deps"
@echo " install-dev - Install dev tools (ruff, black, pip-tools)"
@echo " precache - Download models & compute voice latents once (no UI)"
@echo " run - Run Gradio app on PORT=$(PORT) (prefers native GPU if present)"
@echo " run-gpu - Run app forcing CUDA_VISIBLE_DEVICES (default 0)"
@echo " lint - Run ruff"
@echo " format - Run black and ruff --fix"
@echo " freeze - Write requirements.txt from current venv"
@echo " deps-update - Upgrade runtime deps"
@echo " check-ffmpeg - Verify ffmpeg is installed"
@echo " check-python - Verify Python 3.11 is available"
@echo " clean - Clear caches/artifacts (keeps venv)"
@echo " deepclean - Remove venv and caches"
# ================================================================
# Environment / setup
# ================================================================
check-python:
@command -v $(PYTHON) >/dev/null 2>&1 || \
{ echo "ERROR: $(PYTHON) not found. Please install Python 3.11 and retry."; exit 1; }
@echo "OK: $(PYTHON) found."
venv: check-python
$(PYTHON) -m venv $(VENV)
@echo "Virtual environment created at $(VENV)"
install-no-llama: venv
$(PIP) install --upgrade pip setuptools wheel
$(PIP) install "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS
install: venv
$(PIP) install --upgrade pip setuptools wheel
# CPU-friendly install of all deps including llama-cpp-python
$(PIP) install $(REQS)
# CUDA/cuBLAS build for llama-cpp-python (requires CUDA toolkit & compiler)
install-cuda: venv
$(PIP) install --upgrade pip setuptools wheel
@echo "Building llama-cpp-python with CUDA/cuBLAS…"
@export CMAKE_ARGS="-DLLAMA_CUBLAS=on"; \
export LLAMA_CUBLAS=1; \
$(PIP) install --no-binary=:all: --force-reinstall "llama-cpp-python>=0.2.90"
# Install the rest of the deps (excluding llama-cpp-python which we just built)
$(MAKE) install-no-llama
@echo "CUDA install complete."
install-dev: venv
$(PIP) install --upgrade pip
$(PIP) install $(DEV_REQS)
# ================================================================
# Utility checks
# ================================================================
check-ffmpeg:
@command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found. Install ffmpeg and retry."; exit 1; }
@ffmpeg -version | head -n 1
# ================================================================
# Workflow targets
# ================================================================
# Pre-download model assets and compute voice latents (runs your app's functions)
precache: install check-ffmpeg
$(PY) - <<- 'PY'
from app import precache_assets, init_models_and_latents
precache_assets()
init_models_and_latents()
print("Precache complete.")
PY
run: install
@echo "Starting app on port $(PORT)…"
PORT=$(PORT) $(PY) $(APP)
# Run, preferring a specific GPU (default GPU 0). App itself auto-detects CUDA.
run-gpu: install
@echo "Starting app with CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} on port $(PORT)…"
CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} PORT=$(PORT) $(PY) $(APP)
# Lint / format
lint: install-dev
$(VENV)/bin/ruff check .
format: install-dev
$(VENV)/bin/black .
$(VENV)/bin/ruff check --fix .
# Freeze dependency snapshot
freeze:
@echo "Writing requirements.txt from current venv…"
$(VENV)/bin/pip freeze > requirements.txt
@echo "requirements.txt updated."
# Upgrade runtime deps (keeps numpy<2 guard)
deps-update: venv
$(PIP) install --upgrade pip
$(PIP) install --upgrade "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS "llama-cpp-python>=0.2.90"
# ================================================================
# Cleanup
# ================================================================
clean:
@echo "Cleaning caches…"
@rm -rf __pycache__ */__pycache__
@rm -rf .pytest_cache .ruff_cache
@rm -rf voices/*.tmp
@rm -rf ~/.cache/huggingface/hub/tmp
@rm -rf ~/.cache/huggingface/transformers
@rm -rf ~/.cache/torch
@rm -rf ~/.cache/pip
@rm -rf ~/.local/share/tts/tmp
@echo "Done."
deepclean: clean
@echo "Removing venv and model caches…"
@rm -rf $(VENV)
@rm -rf ~/.local/share/tts
@rm -rf voices
@echo "Done."