Spaces:

ruslanmv
/

ai-story-server-cpu

Running on Zero

App Files Files Community

ai-story-server-cpu / Makefile

ruslanmv

First commit

239225b about 1 month ago

raw

history blame contribute delete

6.07 kB

	# ================================================================
	# Makefile — AI Story Server (Python 3.11)
	# ================================================================
	# Common usage:
	# make help
	# make install # CPU-friendly install
	# make install-cuda # build llama-cpp-python with CUDA/cuBLAS offload
	# make precache # download models + compute voice latents once
	# make run # run the Gradio app (prefers GPU if available)
	# make clean # clean caches (keeps venv)
	# make deepclean # remove venv + caches
	# ---------------------------------------------------------------

	# ---- Configurable vars ----
	PYTHON ?= python3.11
	VENV ?= .venv
	PY := $(VENV)/bin/python
	PIP := $(VENV)/bin/pip

	APP ?= app.py
	PORT ?= 7860

	# Core runtime deps (CPU-safe). Torch comes via transitive deps where needed;
	# you may pin torch externally if required by your environment.
	REQS = \
	"numpy<2" \
	"gradio==4.27.0" \
	"python-dotenv" \
	"huggingface_hub" \
	"ffmpeg-python" \
	"nltk" \
	"emoji" \
	"langid" \
	"noisereduce" \
	"TTS" \
	"llama-cpp-python>=0.2.90"

	# Dev tools (optional)
	DEV_REQS = \
	"ruff" \
	"black" \
	"pip-tools"

	# ================================================================
	# Meta
	# ================================================================
	.PHONY: help venv install install-no-llama install-cuda install-dev \
	precache run run-gpu check-ffmpeg check-python lint format \
	freeze deps-update clean deepclean

	help:
	@echo "Targets:"
	@echo " install - Create venv (Python 3.11) and install CPU-safe deps"
	@echo " install-cuda - Build llama-cpp-python with CUDA/cuBLAS offload + install deps"
	@echo " install-dev - Install dev tools (ruff, black, pip-tools)"
	@echo " precache - Download models & compute voice latents once (no UI)"
	@echo " run - Run Gradio app on PORT=$(PORT) (prefers native GPU if present)"
	@echo " run-gpu - Run app forcing CUDA_VISIBLE_DEVICES (default 0)"
	@echo " lint - Run ruff"
	@echo " format - Run black and ruff --fix"
	@echo " freeze - Write requirements.txt from current venv"
	@echo " deps-update - Upgrade runtime deps"
	@echo " check-ffmpeg - Verify ffmpeg is installed"
	@echo " check-python - Verify Python 3.11 is available"
	@echo " clean - Clear caches/artifacts (keeps venv)"
	@echo " deepclean - Remove venv and caches"

	# ================================================================
	# Environment / setup
	# ================================================================
	check-python:
	@command -v $(PYTHON) >/dev/null 2>&1 \|\| \
	{ echo "ERROR: $(PYTHON) not found. Please install Python 3.11 and retry."; exit 1; }
	@echo "OK: $(PYTHON) found."

	venv: check-python
	$(PYTHON) -m venv $(VENV)
	@echo "Virtual environment created at $(VENV)"

	install-no-llama: venv
	$(PIP) install --upgrade pip setuptools wheel
	$(PIP) install "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS

	install: venv
	$(PIP) install --upgrade pip setuptools wheel
	# CPU-friendly install of all deps including llama-cpp-python
	$(PIP) install $(REQS)

	# CUDA/cuBLAS build for llama-cpp-python (requires CUDA toolkit & compiler)
	install-cuda: venv
	$(PIP) install --upgrade pip setuptools wheel
	@echo "Building llama-cpp-python with CUDA/cuBLAS…"
	@export CMAKE_ARGS="-DLLAMA_CUBLAS=on"; \
	export LLAMA_CUBLAS=1; \
	$(PIP) install --no-binary=:all: --force-reinstall "llama-cpp-python>=0.2.90"
	# Install the rest of the deps (excluding llama-cpp-python which we just built)
	$(MAKE) install-no-llama
	@echo "CUDA install complete."

	install-dev: venv
	$(PIP) install --upgrade pip
	$(PIP) install $(DEV_REQS)

	# ================================================================
	# Utility checks
	# ================================================================
	check-ffmpeg:
	@command -v ffmpeg >/dev/null 2>&1 \|\| { echo "ERROR: ffmpeg not found. Install ffmpeg and retry."; exit 1; }
	@ffmpeg -version \| head -n 1

	# ================================================================
	# Workflow targets
	# ================================================================
	# Pre-download model assets and compute voice latents (runs your app's functions)
	precache: install check-ffmpeg
	$(PY) - <<- 'PY'
	from app import precache_assets, init_models_and_latents
	precache_assets()
	init_models_and_latents()
	print("Precache complete.")
	PY

	run: install
	@echo "Starting app on port $(PORT)…"
	PORT=$(PORT) $(PY) $(APP)

	# Run, preferring a specific GPU (default GPU 0). App itself auto-detects CUDA.
	run-gpu: install
	@echo "Starting app with CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} on port $(PORT)…"
	CUDA_VISIBLE_DEVICES=$${CUDA_VISIBLE_DEVICES:-0} PORT=$(PORT) $(PY) $(APP)

	# Lint / format
	lint: install-dev
	$(VENV)/bin/ruff check .

	format: install-dev
	$(VENV)/bin/black .
	$(VENV)/bin/ruff check --fix .

	# Freeze dependency snapshot
	freeze:
	@echo "Writing requirements.txt from current venv…"
	$(VENV)/bin/pip freeze > requirements.txt
	@echo "requirements.txt updated."

	# Upgrade runtime deps (keeps numpy<2 guard)
	deps-update: venv
	$(PIP) install --upgrade pip
	$(PIP) install --upgrade "numpy<2" "gradio==4.27.0" python-dotenv huggingface_hub ffmpeg-python nltk emoji langid noisereduce TTS "llama-cpp-python>=0.2.90"

	# ================================================================
	# Cleanup
	# ================================================================
	clean:
	@echo "Cleaning caches…"
	@rm -rf __pycache__ */__pycache__
	@rm -rf .pytest_cache .ruff_cache
	@rm -rf voices/*.tmp
	@rm -rf ~/.cache/huggingface/hub/tmp
	@rm -rf ~/.cache/huggingface/transformers
	@rm -rf ~/.cache/torch
	@rm -rf ~/.cache/pip
	@rm -rf ~/.local/share/tts/tmp
	@echo "Done."

	deepclean: clean
	@echo "Removing venv and model caches…"
	@rm -rf $(VENV)
	@rm -rf ~/.local/share/tts
	@rm -rf voices
	@echo "Done."