Spaces:

ruslanmv
/

ai-story-server-cpu

Running on Zero

File size: 6,070 Bytes

# ================================================================
# Makefile — AI Story Server (Python 3.11)
# ================================================================
# Common usage:
#   make help
#   make install           # CPU-friendly install
#   make install-cuda      # build llama-cpp-python with CUDA/cuBLAS offload
#   make precache          # download models + compute voice latents once
#   make run               # run the Gradio app (prefers GPU if available)
#   make clean             # clean caches (keeps venv)
#   make deepclean         # remove venv + caches
# ---------------------------------------------------------------

# ---- Configurable vars ----
PYTHON ?= python3.11
VENV   ?= .venv
PY     := $(VENV)/bin/python
PIP    := $(VENV)/bin/pip

APP    ?= app.py
PORT   ?= 7860

# Core runtime deps (CPU-safe). Torch comes via transitive deps where needed;
# you may pin torch externally if required by your environment.
REQS = \
	"numpy<2" \
	"gradio==4.27.0" \
	"python-dotenv" \
	"huggingface_hub" \
	"ffmpeg-python" \
	"nltk" \
	"emoji" \
	"langid" \
	"noisereduce" \
	"TTS" \
	"llama-cpp-python>=0.2.90"

# Dev tools (optional)
DEV_REQS = \
	"ruff" \
	"black" \
	"pip-tools"

# ================================================================
# Meta
# ================================================================
.PHONY: help venv install install-no-llama install-cuda install-dev \
        precache run run-gpu check-ffmpeg check-python lint format \
        freeze deps-update clean deepclean

help:
	@echo "Targets:"
	@echo "  install          - Create venv (Python 3.11) and install CPU-safe deps"
	@echo "  install-cuda     - Build llama-cpp-python with CUDA/cuBLAS offload + install deps"
	@echo "  install-dev      - Install dev tools (ruff, black, pip-tools)"
	@echo "  precache         - Download models & compute voice latents once (no UI)"
	@echo "  run              - Run Gradio app on PORT=$(PORT) (prefers native GPU if present)"
	@echo "  run-gpu          - Run app forcing CUDA_VISIBLE_DEVICES (default 0)"
	@echo "  lint             - Run ruff"
	@echo "  format           - Run black and ruff --fix"
	@echo "  freeze           - Write requirements.txt from current venv"
	@echo "  deps-update      - Upgrade runtime deps"
	@echo "  check-ffmpeg     - Verify ffmpeg is installed"
	@echo "  check-python     - Verify Python 3.11 is available"
	@echo "  clean            - Clear caches/artifacts (keeps venv)"
	@echo "  deepclean        - Remove venv and caches"

# ================================================================
# Environment / setup
# ================================================================
check-python:
	@command -v $(PYTHON) >/dev/null 2>&1 || \
	{ echo "ERROR: $(PYTHON) not found. Please install Python 3.11 and retry."; exit 1; }
	@echo "OK: $(PYTHON) found."

venv: check-python
	$(PYTHON) -m venv $(VENV)
	@echo "Virtual environment created at $(VENV)"

install-no-llama: venv
	$(PIP) install --upgrade pip setuptools wheel
	$(PIP) install "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS

install: venv
	$(PIP) install --upgrade pip setuptools wheel
	# CPU-friendly install of all deps including llama-cpp-python
	$(PIP) install $(REQS)

# CUDA/cuBLAS build for llama-cpp-python (requires CUDA toolkit & compiler)
install-cuda: venv
	$(PIP) install --upgrade pip setuptools wheel
	@echo "Building llama-cpp-python with CUDA/cuBLAS…"
	@export CMAKE_ARGS="-DLLAMA_CUBLAS=on"; \
	export LLAMA_CUBLAS=1; \
	$(PIP) install --no-binary=:all: --force-reinstall "llama-cpp-python>=0.2.90"
	# Install the rest of the deps (excluding llama-cpp-python which we just built)
	$(MAKE) install-no-llama
	@echo "CUDA install complete."

install-dev: venv
	$(PIP) install --upgrade pip
	$(PIP) install $(DEV_REQS)

# ================================================================
# Utility checks
# ================================================================
check-ffmpeg:
	@command -v ffmpeg >/dev/null 2>&1 || { echo "ERROR: ffmpeg not found. Install ffmpeg and retry."; exit 1; }
	@ffmpeg -version | head -n 1

# ================================================================
# Workflow targets
# ================================================================
# Pre-download model assets and compute voice latents (runs your app's functions)
precache: install check-ffmpeg
	$(PY) - <<- 'PY'
	from app import precache_assets, init_models_and_latents
	precache_assets()
	init_models_and_latents()
	print("Precache complete.")
	PY

run: install
	@echo "Starting app on port $(PORT)…"
	PORT=$(PORT) $(PY) $(APP)

# Run, preferring a specific GPU (default GPU 0). App itself auto-detects CUDA.
run-gpu: install
	@echo "Starting app with CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} on port $(PORT)…"
	CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} PORT=$(PORT) $(PY) $(APP)

# Lint / format
lint: install-dev
	$(VENV)/bin/ruff check .

format: install-dev
	$(VENV)/bin/black .
	$(VENV)/bin/ruff check --fix .

# Freeze dependency snapshot
freeze:
	@echo "Writing requirements.txt from current venv…"
	$(VENV)/bin/pip freeze > requirements.txt
	@echo "requirements.txt updated."

# Upgrade runtime deps (keeps numpy<2 guard)
deps-update: venv
	$(PIP) install --upgrade pip
	$(PIP) install --upgrade "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS "llama-cpp-python>=0.2.90"

# ================================================================
# Cleanup
# ================================================================
clean:
	@echo "Cleaning caches…"
	@rm -rf __pycache__ */__pycache__
	@rm -rf .pytest_cache .ruff_cache
	@rm -rf voices/*.tmp
	@rm -rf ~/.cache/huggingface/hub/tmp
	@rm -rf ~/.cache/huggingface/transformers
	@rm -rf ~/.cache/torch
	@rm -rf ~/.cache/pip
	@rm -rf ~/.local/share/tts/tmp
	@echo "Done."

deepclean: clean
	@echo "Removing venv and model caches…"
	@rm -rf $(VENV)
	@rm -rf ~/.local/share/tts
	@rm -rf voices
	@echo "Done."