Spaces:
Running
Running
ming
commited on
Commit
Β·
3a7a125
1
Parent(s):
e6b70e4
Add Hugging Face Spaces configuration and deployment files
Browse files- Dockerfile +33 -8
- HUGGINGFACE_DEPLOYMENT.md +220 -0
- README.md +110 -413
- env.hf +25 -0
Dockerfile
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
-
#
|
| 2 |
-
FROM python:3.
|
| 3 |
|
| 4 |
# Set environment variables
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
@@ -14,8 +14,13 @@ RUN apt-get update \
|
|
| 14 |
&& apt-get install -y --no-install-recommends \
|
| 15 |
curl \
|
| 16 |
ca-certificates \
|
|
|
|
|
|
|
| 17 |
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
# Copy requirements first for better caching
|
| 20 |
COPY requirements.txt .
|
| 21 |
|
|
@@ -30,14 +35,34 @@ COPY pytest.ini .
|
|
| 30 |
# Create non-root user for security
|
| 31 |
RUN groupadd -r appuser && useradd -r -g appuser appuser \
|
| 32 |
&& chown -R appuser:appuser /app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
USER appuser
|
| 34 |
|
| 35 |
-
# Expose port
|
| 36 |
-
EXPOSE
|
| 37 |
|
| 38 |
# Health check
|
| 39 |
-
HEALTHCHECK --interval=30s --timeout=30s --start-period=
|
| 40 |
-
CMD curl -f http://localhost:
|
| 41 |
|
| 42 |
-
# Run the
|
| 43 |
-
CMD ["
|
|
|
|
| 1 |
+
# Hugging Face Spaces compatible Dockerfile
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
|
| 4 |
# Set environment variables
|
| 5 |
ENV PYTHONDONTWRITEBYTECODE=1 \
|
|
|
|
| 14 |
&& apt-get install -y --no-install-recommends \
|
| 15 |
curl \
|
| 16 |
ca-certificates \
|
| 17 |
+
wget \
|
| 18 |
+
git \
|
| 19 |
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
|
| 21 |
+
# Install Ollama
|
| 22 |
+
RUN curl -fsSL https://ollama.ai/install.sh | sh
|
| 23 |
+
|
| 24 |
# Copy requirements first for better caching
|
| 25 |
COPY requirements.txt .
|
| 26 |
|
|
|
|
| 35 |
# Create non-root user for security
|
| 36 |
RUN groupadd -r appuser && useradd -r -g appuser appuser \
|
| 37 |
&& chown -R appuser:appuser /app
|
| 38 |
+
|
| 39 |
+
# Create startup script
|
| 40 |
+
RUN echo '#!/bin/bash\n\
|
| 41 |
+
# Start Ollama in background\n\
|
| 42 |
+
ollama serve &\n\
|
| 43 |
+
\n\
|
| 44 |
+
# Wait for Ollama to be ready\n\
|
| 45 |
+
echo "Waiting for Ollama to start..."\n\
|
| 46 |
+
sleep 10\n\
|
| 47 |
+
\n\
|
| 48 |
+
# Pull the model (this will take a few minutes on first run)\n\
|
| 49 |
+
echo "Pulling model..."\n\
|
| 50 |
+
ollama pull mistral:7b\n\
|
| 51 |
+
\n\
|
| 52 |
+
# Start the FastAPI app\n\
|
| 53 |
+
echo "Starting FastAPI app..."\n\
|
| 54 |
+
exec uvicorn app.main:app --host 0.0.0.0 --port 7860' > /app/start.sh \
|
| 55 |
+
&& chmod +x /app/start.sh \
|
| 56 |
+
&& chown appuser:appuser /app/start.sh
|
| 57 |
+
|
| 58 |
USER appuser
|
| 59 |
|
| 60 |
+
# Expose port (Hugging Face Spaces uses port 7860)
|
| 61 |
+
EXPOSE 7860
|
| 62 |
|
| 63 |
# Health check
|
| 64 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
|
| 65 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 66 |
|
| 67 |
+
# Run the startup script
|
| 68 |
+
CMD ["/app/start.sh"]
|
HUGGINGFACE_DEPLOYMENT.md
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π Hugging Face Spaces Deployment Guide
|
| 2 |
+
|
| 3 |
+
This guide will help you deploy your SummarizerApp to Hugging Face Spaces for **FREE**!
|
| 4 |
+
|
| 5 |
+
## π― Why Hugging Face Spaces?
|
| 6 |
+
|
| 7 |
+
- β
**100% Free** - No credit card required
|
| 8 |
+
- β
**16GB RAM** - Perfect for Mistral 7B model
|
| 9 |
+
- β
**Docker Support** - Easy deployment
|
| 10 |
+
- β
**Auto HTTPS** - Secure connections
|
| 11 |
+
- β
**Built for AI** - Designed for ML/AI applications
|
| 12 |
+
- β
**GitHub Integration** - Automatic deployments
|
| 13 |
+
|
| 14 |
+
## π Prerequisites
|
| 15 |
+
|
| 16 |
+
1. **Hugging Face Account** - Sign up at [huggingface.co](https://huggingface.co)
|
| 17 |
+
2. **GitHub Repository** - Your code should be on GitHub
|
| 18 |
+
3. **Docker Knowledge** - Basic understanding helpful but not required
|
| 19 |
+
|
| 20 |
+
## π οΈ Step-by-Step Deployment
|
| 21 |
+
|
| 22 |
+
### Step 1: Create a New Space
|
| 23 |
+
|
| 24 |
+
1. Go to [huggingface.co/spaces](https://huggingface.co/spaces)
|
| 25 |
+
2. Click **"Create new Space"**
|
| 26 |
+
3. Fill in the details:
|
| 27 |
+
- **Space name**: `summarizer-app` (or your preferred name)
|
| 28 |
+
- **License**: MIT
|
| 29 |
+
- **SDK**: **Docker** (important!)
|
| 30 |
+
- **Hardware**: CPU (free tier)
|
| 31 |
+
- **Visibility**: Public or Private
|
| 32 |
+
|
| 33 |
+
### Step 2: Configure Your Repository
|
| 34 |
+
|
| 35 |
+
You need to make these changes to your GitHub repository:
|
| 36 |
+
|
| 37 |
+
#### A. Rename Files
|
| 38 |
+
```bash
|
| 39 |
+
# Rename the Hugging Face specific files
|
| 40 |
+
mv Dockerfile.hf Dockerfile
|
| 41 |
+
mv README_HF.md README.md
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
#### B. Update Dockerfile (if needed)
|
| 45 |
+
The `Dockerfile.hf` is already optimized for Hugging Face Spaces, but verify it uses:
|
| 46 |
+
- Port `7860` (required by HF Spaces)
|
| 47 |
+
- `mistral:7b` model (smaller, faster)
|
| 48 |
+
- Proper startup script
|
| 49 |
+
|
| 50 |
+
#### C. Push Changes to GitHub
|
| 51 |
+
```bash
|
| 52 |
+
git add .
|
| 53 |
+
git commit -m "Add Hugging Face Spaces configuration"
|
| 54 |
+
git push origin main
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### Step 3: Connect GitHub to Hugging Face
|
| 58 |
+
|
| 59 |
+
1. In your Hugging Face Space settings
|
| 60 |
+
2. Go to **"Repository"** tab
|
| 61 |
+
3. Click **"Connect to GitHub"**
|
| 62 |
+
4. Select your `SummerizerApp` repository
|
| 63 |
+
5. Choose the `main` branch
|
| 64 |
+
|
| 65 |
+
### Step 4: Configure Environment Variables
|
| 66 |
+
|
| 67 |
+
In your Hugging Face Space settings:
|
| 68 |
+
|
| 69 |
+
1. Go to **"Settings"** tab
|
| 70 |
+
2. Scroll to **"Environment Variables"**
|
| 71 |
+
3. Add these variables:
|
| 72 |
+
|
| 73 |
+
```
|
| 74 |
+
OLLAMA_MODEL=mistral:7b
|
| 75 |
+
OLLAMA_HOST=http://localhost:11434
|
| 76 |
+
OLLAMA_TIMEOUT=30
|
| 77 |
+
SERVER_HOST=0.0.0.0
|
| 78 |
+
SERVER_PORT=7860
|
| 79 |
+
LOG_LEVEL=INFO
|
| 80 |
+
MAX_TEXT_LENGTH=32000
|
| 81 |
+
MAX_TOKENS_DEFAULT=256
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### Step 5: Deploy
|
| 85 |
+
|
| 86 |
+
1. Go to the **"Deploy"** tab in your Space
|
| 87 |
+
2. Click **"Deploy"**
|
| 88 |
+
3. Wait for the build to complete (5-10 minutes)
|
| 89 |
+
|
| 90 |
+
**What happens during deployment:**
|
| 91 |
+
- Docker image builds
|
| 92 |
+
- Ollama installs
|
| 93 |
+
- Mistral 7B model downloads (~4GB)
|
| 94 |
+
- FastAPI app starts
|
| 95 |
+
- Health checks run
|
| 96 |
+
|
| 97 |
+
## π Verification
|
| 98 |
+
|
| 99 |
+
### Check Your Deployment
|
| 100 |
+
|
| 101 |
+
1. **Visit your Space URL**: `https://your-username-summarizer-app.hf.space`
|
| 102 |
+
2. **Test Health Endpoint**: `https://your-username-summarizer-app.hf.space/health`
|
| 103 |
+
3. **View API Docs**: `https://your-username-summarizer-app.hf.space/docs`
|
| 104 |
+
|
| 105 |
+
### Test the API
|
| 106 |
+
|
| 107 |
+
```bash
|
| 108 |
+
# Test summarization
|
| 109 |
+
curl -X POST "https://your-username-summarizer-app.hf.space/api/v1/summarize" \
|
| 110 |
+
-H "Content-Type: application/json" \
|
| 111 |
+
-d '{
|
| 112 |
+
"text": "This is a long article about artificial intelligence and machine learning. It discusses various topics including natural language processing, computer vision, and deep learning techniques. The article covers the history of AI, current applications, and future prospects.",
|
| 113 |
+
"max_tokens": 100
|
| 114 |
+
}'
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
## π¨ Troubleshooting
|
| 118 |
+
|
| 119 |
+
### Common Issues
|
| 120 |
+
|
| 121 |
+
#### 1. Build Fails
|
| 122 |
+
- **Check Dockerfile**: Ensure it's named `Dockerfile` (not `Dockerfile.hf`)
|
| 123 |
+
- **Check README**: Ensure it has the proper frontmatter
|
| 124 |
+
- **Check logs**: View build logs in Hugging Face interface
|
| 125 |
+
|
| 126 |
+
#### 2. Model Not Loading
|
| 127 |
+
- **Wait longer**: Model download takes 5-10 minutes on first run
|
| 128 |
+
- **Check logs**: Look for Ollama-related errors
|
| 129 |
+
- **Verify model name**: Ensure `mistral:7b` is correct
|
| 130 |
+
|
| 131 |
+
#### 3. Out of Memory
|
| 132 |
+
- **Use smaller model**: Switch to `mistral:7b` (already configured)
|
| 133 |
+
- **Check hardware**: Ensure you're using CPU tier, not GPU
|
| 134 |
+
|
| 135 |
+
#### 4. Port Issues
|
| 136 |
+
- **Verify port**: Must use port `7860` for Hugging Face Spaces
|
| 137 |
+
- **Check SERVER_PORT**: Environment variable should be `7860`
|
| 138 |
+
|
| 139 |
+
### Debugging Commands
|
| 140 |
+
|
| 141 |
+
If you need to debug locally with HF configuration:
|
| 142 |
+
|
| 143 |
+
```bash
|
| 144 |
+
# Test with HF settings
|
| 145 |
+
cp env.hf .env
|
| 146 |
+
docker build -f Dockerfile.hf -t summarizer-hf .
|
| 147 |
+
docker run -p 7860:7860 summarizer-hf
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
## π Performance Expectations
|
| 151 |
+
|
| 152 |
+
### Startup Time
|
| 153 |
+
- **First deployment**: 8-12 minutes (includes model download)
|
| 154 |
+
- **Subsequent deployments**: 3-5 minutes
|
| 155 |
+
- **Cold start**: 30-60 seconds
|
| 156 |
+
|
| 157 |
+
### Runtime Performance
|
| 158 |
+
- **Memory usage**: ~7-8GB RAM
|
| 159 |
+
- **Response time**: 2-5 seconds per request
|
| 160 |
+
- **Concurrent requests**: 1-2 (CPU limitation)
|
| 161 |
+
|
| 162 |
+
### Limitations
|
| 163 |
+
- **No GPU**: CPU-only inference
|
| 164 |
+
- **Shared resources**: May be slower during peak usage
|
| 165 |
+
- **Sleep mode**: Space may sleep after 48 hours of inactivity
|
| 166 |
+
|
| 167 |
+
## π§ Customization Options
|
| 168 |
+
|
| 169 |
+
### Use Different Model
|
| 170 |
+
Edit environment variables:
|
| 171 |
+
```
|
| 172 |
+
OLLAMA_MODEL=llama3.1:7b # Smaller than 8b
|
| 173 |
+
OLLAMA_MODEL=mistral:7b # Default, fastest
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### Enable Security Features
|
| 177 |
+
```
|
| 178 |
+
API_KEY_ENABLED=true
|
| 179 |
+
API_KEY=your-secret-key
|
| 180 |
+
RATE_LIMIT_ENABLED=true
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
### Custom Domain
|
| 184 |
+
1. Go to Space settings
|
| 185 |
+
2. Add custom domain in "Settings" tab
|
| 186 |
+
3. Configure DNS as instructed
|
| 187 |
+
|
| 188 |
+
## π Monitoring
|
| 189 |
+
|
| 190 |
+
### View Logs
|
| 191 |
+
1. Go to your Space
|
| 192 |
+
2. Click **"Logs"** tab
|
| 193 |
+
3. Monitor startup and runtime logs
|
| 194 |
+
|
| 195 |
+
### Health Monitoring
|
| 196 |
+
- **Health endpoint**: `/health`
|
| 197 |
+
- **Metrics**: Built-in Hugging Face monitoring
|
| 198 |
+
- **Uptime**: Check Space status page
|
| 199 |
+
|
| 200 |
+
## π Success!
|
| 201 |
+
|
| 202 |
+
Once deployed, your SummarizerApp will be available at:
|
| 203 |
+
`https://your-username-summarizer-app.hf.space`
|
| 204 |
+
|
| 205 |
+
### What You Get
|
| 206 |
+
- β
**Free hosting** forever
|
| 207 |
+
- β
**HTTPS endpoint** for your API
|
| 208 |
+
- β
**16GB RAM** for AI models
|
| 209 |
+
- β
**Automatic deployments** from GitHub
|
| 210 |
+
- β
**Built-in monitoring** and logs
|
| 211 |
+
|
| 212 |
+
### Next Steps
|
| 213 |
+
1. **Share your API** with others
|
| 214 |
+
2. **Integrate with apps** using the REST API
|
| 215 |
+
3. **Monitor usage** and performance
|
| 216 |
+
4. **Upgrade to GPU** if needed (paid tier)
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
**Congratulations! Your text summarization service is now live on Hugging Face Spaces! π**
|
README.md
CHANGED
|
@@ -1,473 +1,170 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
- β
**Comprehensive Testing** - 30+ tests with >90% coverage
|
| 12 |
-
- π³ **Docker Ready** - Containerized deployment support
|
| 13 |
-
- βοΈ **Cloud Extensible** - Easy migration to cloud hosting
|
| 14 |
-
|
| 15 |
-
## Quick Start
|
| 16 |
-
|
| 17 |
-
### Prerequisites
|
| 18 |
-
|
| 19 |
-
- Python 3.7+
|
| 20 |
-
- [Ollama](https://ollama.ai) installed and running
|
| 21 |
-
- A compatible language model (e.g., `llama3.1:8b`)
|
| 22 |
-
|
| 23 |
-
### Installation
|
| 24 |
-
|
| 25 |
-
1. **Clone the repository**
|
| 26 |
-
```bash
|
| 27 |
-
git clone https://github.com/MingLu0/SummarizerBackend.git
|
| 28 |
-
cd SummarizerBackend
|
| 29 |
-
```
|
| 30 |
-
|
| 31 |
-
2. **Set up Ollama**
|
| 32 |
-
```bash
|
| 33 |
-
# Install Ollama (macOS)
|
| 34 |
-
brew install ollama
|
| 35 |
-
|
| 36 |
-
# Start Ollama service
|
| 37 |
-
ollama serve
|
| 38 |
-
|
| 39 |
-
# Pull a model (in another terminal)
|
| 40 |
-
ollama pull llama3.1:8b
|
| 41 |
-
```
|
| 42 |
-
|
| 43 |
-
3. **Set up Python environment**
|
| 44 |
-
```bash
|
| 45 |
-
# Create virtual environment
|
| 46 |
-
python3 -m venv .venv
|
| 47 |
-
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
| 48 |
-
|
| 49 |
-
# Install dependencies
|
| 50 |
-
pip install -r requirements.txt
|
| 51 |
-
```
|
| 52 |
-
|
| 53 |
-
4. **Start the server (Recommended)**
|
| 54 |
-
```bash
|
| 55 |
-
# Use the automated startup script (checks everything for you)
|
| 56 |
-
./start-server.sh
|
| 57 |
-
```
|
| 58 |
-
|
| 59 |
-
**OR manually:**
|
| 60 |
-
```bash
|
| 61 |
-
# Start the server manually
|
| 62 |
-
uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload
|
| 63 |
-
```
|
| 64 |
-
|
| 65 |
-
## Configuration
|
| 66 |
-
|
| 67 |
-
The server uses environment variables for configuration. A `.env` file is automatically created with sensible defaults:
|
| 68 |
-
|
| 69 |
-
```bash
|
| 70 |
-
# Ollama Configuration
|
| 71 |
-
OLLAMA_HOST=http://127.0.0.1:11434
|
| 72 |
-
OLLAMA_MODEL=llama3.2:latest
|
| 73 |
-
OLLAMA_TIMEOUT=30
|
| 74 |
-
|
| 75 |
-
# Server Configuration
|
| 76 |
-
SERVER_HOST=0.0.0.0
|
| 77 |
-
SERVER_PORT=8000
|
| 78 |
-
LOG_LEVEL=INFO
|
| 79 |
-
```
|
| 80 |
-
|
| 81 |
-
**Common Issues & Solutions:**
|
| 82 |
-
|
| 83 |
-
- **Port already in use**: The startup script automatically handles this
|
| 84 |
-
- **Ollama connection failed**: Ensure Ollama is running (`ollama serve`)
|
| 85 |
-
- **Model not found**: Install the model (`ollama pull llama3.2:latest`)
|
| 86 |
-
- **Wrong host configuration**: The `.env` file ensures correct localhost settings
|
| 87 |
-
|
| 88 |
-
## API Usage
|
| 89 |
-
```
|
| 90 |
|
| 91 |
-
|
| 92 |
-
```bash
|
| 93 |
-
# Health check
|
| 94 |
-
curl http://127.0.0.1:8000/health
|
| 95 |
-
|
| 96 |
-
# Summarize text
|
| 97 |
-
curl -X POST http://127.0.0.1:8000/api/v1/summarize/ \
|
| 98 |
-
-H "Content-Type: application/json" \
|
| 99 |
-
-d '{"text": "Your long text to summarize here..."}'
|
| 100 |
-
```
|
| 101 |
|
| 102 |
-
|
| 103 |
|
| 104 |
-
|
| 105 |
-
- **Swagger UI**: http://127.0.0.1:8000/docs
|
| 106 |
-
- **ReDoc**: http://127.0.0.1:8000/redoc
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
-
|
| 111 |
-
Health check endpoint.
|
| 112 |
|
| 113 |
-
|
| 114 |
-
```json
|
| 115 |
-
{
|
| 116 |
-
"status": "ok",
|
| 117 |
-
"service": "text-summarizer-api",
|
| 118 |
-
"version": "1.0.0"
|
| 119 |
-
}
|
| 120 |
```
|
| 121 |
-
|
| 122 |
-
#### `POST /api/v1/summarize/`
|
| 123 |
-
Summarize text using Ollama.
|
| 124 |
-
|
| 125 |
-
**Request:**
|
| 126 |
-
```json
|
| 127 |
-
{
|
| 128 |
-
"text": "Your text to summarize...",
|
| 129 |
-
"max_tokens": 256,
|
| 130 |
-
"prompt": "Summarize the following text concisely:"
|
| 131 |
-
}
|
| 132 |
```
|
| 133 |
|
| 134 |
-
|
| 135 |
-
```json
|
| 136 |
-
{
|
| 137 |
-
"summary": "Generated summary text",
|
| 138 |
-
"model": "llama3.1:8b",
|
| 139 |
-
"tokens_used": 150,
|
| 140 |
-
"latency_ms": 1234.5
|
| 141 |
-
}
|
| 142 |
```
|
|
|
|
|
|
|
| 143 |
|
| 144 |
-
**Error Response:**
|
| 145 |
-
```json
|
| 146 |
{
|
| 147 |
-
"
|
| 148 |
-
"
|
| 149 |
-
"
|
| 150 |
-
}
|
| 151 |
-
```
|
| 152 |
-
|
| 153 |
-
## Configuration
|
| 154 |
-
|
| 155 |
-
Configure the API using environment variables:
|
| 156 |
-
|
| 157 |
-
```bash
|
| 158 |
-
# Ollama Configuration
|
| 159 |
-
export OLLAMA_MODEL=llama3.1:8b
|
| 160 |
-
export OLLAMA_HOST=http://127.0.0.1:11434
|
| 161 |
-
export OLLAMA_TIMEOUT=30
|
| 162 |
-
|
| 163 |
-
# Server Configuration
|
| 164 |
-
export SERVER_HOST=127.0.0.1
|
| 165 |
-
export SERVER_PORT=8000
|
| 166 |
-
export LOG_LEVEL=INFO
|
| 167 |
-
|
| 168 |
-
# Optional: API Security
|
| 169 |
-
export API_KEY_ENABLED=false
|
| 170 |
-
export API_KEY=your-secret-key
|
| 171 |
-
|
| 172 |
-
# Optional: Rate Limiting
|
| 173 |
-
export RATE_LIMIT_ENABLED=false
|
| 174 |
-
export RATE_LIMIT_REQUESTS=60
|
| 175 |
-
export RATE_LIMIT_WINDOW=60
|
| 176 |
-
```
|
| 177 |
-
|
| 178 |
-
## Android Integration
|
| 179 |
-
|
| 180 |
-
### Retrofit Example
|
| 181 |
-
|
| 182 |
-
```kotlin
|
| 183 |
-
// API Interface
|
| 184 |
-
interface SummarizerApi {
|
| 185 |
-
@POST("api/v1/summarize/")
|
| 186 |
-
suspend fun summarize(@Body request: SummarizeRequest): SummarizeResponse
|
| 187 |
}
|
| 188 |
-
|
| 189 |
-
// Data Classes
|
| 190 |
-
data class SummarizeRequest(
|
| 191 |
-
val text: String,
|
| 192 |
-
val max_tokens: Int = 256,
|
| 193 |
-
val prompt: String = "Summarize the following text concisely:"
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
-
data class SummarizeResponse(
|
| 197 |
-
val summary: String,
|
| 198 |
-
val model: String,
|
| 199 |
-
val tokens_used: Int?,
|
| 200 |
-
val latency_ms: Double?
|
| 201 |
-
)
|
| 202 |
-
|
| 203 |
-
// Usage
|
| 204 |
-
val retrofit = Retrofit.Builder()
|
| 205 |
-
.baseUrl("http://127.0.0.1:8000/")
|
| 206 |
-
.addConverterFactory(GsonConverterFactory.create())
|
| 207 |
-
.build()
|
| 208 |
-
|
| 209 |
-
val api = retrofit.create(SummarizerApi::class.java)
|
| 210 |
-
val response = api.summarize(SummarizeRequest(text = "Your text here"))
|
| 211 |
```
|
| 212 |
|
| 213 |
-
###
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
|
| 216 |
-
val client = OkHttpClient()
|
| 217 |
-
val json = JSONObject().apply {
|
| 218 |
-
put("text", "Your text to summarize")
|
| 219 |
-
put("max_tokens", 256)
|
| 220 |
-
}
|
| 221 |
|
| 222 |
-
|
| 223 |
-
.url("http://127.0.0.1:8000/api/v1/summarize/")
|
| 224 |
-
.post(json.toString().toRequestBody("application/json".toMediaType()))
|
| 225 |
-
.build()
|
| 226 |
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
## Development
|
| 234 |
|
| 235 |
-
|
| 236 |
|
|
|
|
| 237 |
```bash
|
| 238 |
-
#
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
# Run with coverage
|
| 242 |
-
pytest --cov=app --cov-report=html --cov-report=term
|
| 243 |
-
|
| 244 |
-
# Run tests in Docker
|
| 245 |
-
./scripts/run-tests.sh
|
| 246 |
|
| 247 |
-
#
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
# Run tests and stop on first failure
|
| 251 |
-
pytest -x
|
| 252 |
```
|
| 253 |
|
| 254 |
-
###
|
|
|
|
| 255 |
|
| 256 |
-
|
| 257 |
-
# Format code
|
| 258 |
-
black app/ tests/
|
| 259 |
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
| 262 |
|
| 263 |
-
|
| 264 |
-
flake8 app/ tests/
|
| 265 |
-
```
|
| 266 |
-
|
| 267 |
-
### Project Structure
|
| 268 |
-
|
| 269 |
-
```
|
| 270 |
-
app/
|
| 271 |
-
βββ main.py # FastAPI app entry point
|
| 272 |
-
βββ api/
|
| 273 |
-
β βββ v1/
|
| 274 |
-
β βββ routes.py # API route definitions
|
| 275 |
-
β βββ schemas.py # Pydantic models
|
| 276 |
-
β βββ summarize.py # Summarization endpoint
|
| 277 |
-
βββ services/
|
| 278 |
-
β βββ summarizer.py # Ollama integration
|
| 279 |
-
βββ core/
|
| 280 |
-
βββ config.py # Configuration management
|
| 281 |
-
βββ logging.py # Logging setup
|
| 282 |
-
βββ middleware.py # Request middleware
|
| 283 |
-
βββ errors.py # Error handling
|
| 284 |
-
tests/
|
| 285 |
-
βββ test_api.py # API endpoint tests
|
| 286 |
-
βββ test_services.py # Service layer tests
|
| 287 |
-
βββ test_schemas.py # Pydantic model tests
|
| 288 |
-
βββ test_config.py # Configuration tests
|
| 289 |
-
βββ conftest.py # Test configuration
|
| 290 |
-
```
|
| 291 |
-
|
| 292 |
-
## Docker Deployment
|
| 293 |
-
|
| 294 |
-
### Quick Start with Docker
|
| 295 |
|
|
|
|
| 296 |
```bash
|
| 297 |
-
#
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
# 2. Download a model (first time only)
|
| 301 |
-
./scripts/setup-ollama.sh llama3.1:8b
|
| 302 |
-
|
| 303 |
-
# 3. Start the API
|
| 304 |
-
docker-compose up api -d
|
| 305 |
|
| 306 |
-
#
|
| 307 |
-
|
| 308 |
```
|
| 309 |
|
| 310 |
-
###
|
| 311 |
-
|
| 312 |
-
```bash
|
| 313 |
-
# Use development compose file
|
| 314 |
-
docker-compose -f docker-compose.dev.yml up --build
|
| 315 |
-
```
|
| 316 |
-
|
| 317 |
-
### Production with Nginx
|
| 318 |
-
|
| 319 |
```bash
|
| 320 |
-
#
|
| 321 |
-
|
| 322 |
-
```
|
| 323 |
-
|
| 324 |
-
### Manual Build
|
| 325 |
-
|
| 326 |
-
```bash
|
| 327 |
-
# Build the image
|
| 328 |
-
docker build -t summarizer-backend .
|
| 329 |
|
| 330 |
-
# Run with
|
| 331 |
-
|
| 332 |
-
-e OLLAMA_HOST=http://host.docker.internal:11434 \
|
| 333 |
-
summarizer-backend
|
| 334 |
```
|
| 335 |
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
1. **Build the image**
|
| 339 |
-
```bash
|
| 340 |
-
docker build -t your-registry/summarizer-backend:latest .
|
| 341 |
-
```
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
docker push your-registry/summarizer-backend:latest
|
| 347 |
-
|
| 348 |
-
# Deploy to your cloud provider
|
| 349 |
-
# (AWS ECS, Google Cloud Run, Azure Container Instances, etc.)
|
| 350 |
-
```
|
| 351 |
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
|
|
|
| 359 |
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
railway init
|
| 363 |
-
railway up
|
| 364 |
```
|
| 365 |
|
| 366 |
-
|
| 367 |
-
- β
Supports Docker Compose with Ollama
|
| 368 |
-
- β
Persistent volumes for models
|
| 369 |
-
- β
Automatic HTTPS
|
| 370 |
-
- β
Easy environment management
|
| 371 |
-
|
| 372 |
-
### π **Other Options**
|
| 373 |
-
|
| 374 |
-
- **Google Cloud Run**: Serverless with auto-scaling
|
| 375 |
-
- **AWS ECS**: Full container orchestration
|
| 376 |
-
- **DigitalOcean App Platform**: Simple deployment
|
| 377 |
-
- **Render**: GitHub integration
|
| 378 |
-
|
| 379 |
-
### π **Detailed Deployment Guide**
|
| 380 |
-
|
| 381 |
-
See [DEPLOYMENT.md](DEPLOYMENT.md) for comprehensive deployment instructions for all platforms.
|
| 382 |
-
|
| 383 |
-
### β οΈ **Important Notes**
|
| 384 |
-
|
| 385 |
-
- **Memory Requirements**: llama3.1:8b needs ~8GB RAM
|
| 386 |
-
- **Model Download**: Models are downloaded after deployment
|
| 387 |
-
- **Cost Optimization**: Start with smaller models (mistral:7b)
|
| 388 |
-
- **Security**: Enable API keys for production use
|
| 389 |
-
|
| 390 |
-
## Monitoring and Logging
|
| 391 |
-
|
| 392 |
-
### Request Tracking
|
| 393 |
-
Every request gets a unique ID for tracking:
|
| 394 |
```bash
|
| 395 |
-
curl -
|
| 396 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
```
|
| 398 |
|
| 399 |
-
|
| 400 |
-
```
|
| 401 |
-
2025-09-29 20:47:46,949 - app.core.middleware - INFO - Request abc123: POST /api/v1/summarize/
|
| 402 |
-
2025-09-29 20:47:46,987 - app.core.middleware - INFO - Response abc123: 200 (38.48ms)
|
| 403 |
-
```
|
| 404 |
|
| 405 |
-
|
|
|
|
|
|
|
|
|
|
| 406 |
|
| 407 |
-
|
| 408 |
-
- **llama3.1:8b** - Good balance of speed and quality
|
| 409 |
-
- **mistral:7b** - Faster, good for real-time apps
|
| 410 |
-
- **llama3.1:70b** - Higher quality, slower inference
|
| 411 |
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
|
| 418 |
-
## Troubleshooting
|
| 419 |
|
| 420 |
### Common Issues
|
| 421 |
|
| 422 |
-
**Ollama
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
```
|
| 430 |
|
| 431 |
-
|
| 432 |
-
```bash
|
| 433 |
-
# List available models
|
| 434 |
-
ollama list
|
| 435 |
|
| 436 |
-
|
| 437 |
-
ollama pull llama3.1:8b
|
| 438 |
-
```
|
| 439 |
-
|
| 440 |
-
**Port already in use**
|
| 441 |
-
```bash
|
| 442 |
-
# Use a different port
|
| 443 |
-
uvicorn app.main:app --port 8001
|
| 444 |
-
```
|
| 445 |
|
| 446 |
-
|
| 447 |
-
```bash
|
| 448 |
-
# Enable debug logging
|
| 449 |
-
export LOG_LEVEL=DEBUG
|
| 450 |
-
uvicorn app.main:app --reload
|
| 451 |
-
```
|
| 452 |
-
|
| 453 |
-
## Contributing
|
| 454 |
|
| 455 |
1. Fork the repository
|
| 456 |
-
2. Create a feature branch
|
| 457 |
-
3.
|
| 458 |
-
4.
|
| 459 |
-
5.
|
| 460 |
-
|
| 461 |
-
## License
|
| 462 |
-
|
| 463 |
-
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 464 |
-
|
| 465 |
-
## Support
|
| 466 |
-
|
| 467 |
-
- π§ **Email**: [email protected]
|
| 468 |
-
- π **Issues**: [GitHub Issues](https://github.com/MingLu0/SummarizerBackend/issues)
|
| 469 |
-
- π **Documentation**: [API Docs](http://127.0.0.1:8000/docs)
|
| 470 |
|
| 471 |
---
|
| 472 |
|
| 473 |
-
**
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Text Summarizer API
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
app_port: 7860
|
| 10 |
+
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Text Summarizer API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
A FastAPI-based text summarization service powered by Ollama and Mistral 7B model.
|
| 15 |
|
| 16 |
+
## π Features
|
|
|
|
|
|
|
| 17 |
|
| 18 |
+
- **Fast text summarization** using local LLM inference
|
| 19 |
+
- **RESTful API** with FastAPI
|
| 20 |
+
- **Health monitoring** and logging
|
| 21 |
+
- **Docker containerized** for easy deployment
|
| 22 |
+
- **Free deployment** on Hugging Face Spaces
|
| 23 |
|
| 24 |
+
## π‘ API Endpoints
|
|
|
|
| 25 |
|
| 26 |
+
### Health Check
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
```
|
| 28 |
+
GET /health
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
```
|
| 30 |
|
| 31 |
+
### Summarize Text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
```
|
| 33 |
+
POST /api/v1/summarize
|
| 34 |
+
Content-Type: application/json
|
| 35 |
|
|
|
|
|
|
|
| 36 |
{
|
| 37 |
+
"text": "Your long text to summarize here...",
|
| 38 |
+
"max_tokens": 256,
|
| 39 |
+
"temperature": 0.7
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
```
|
| 42 |
|
| 43 |
+
### API Documentation
|
| 44 |
+
- **Swagger UI**: `/docs`
|
| 45 |
+
- **ReDoc**: `/redoc`
|
| 46 |
|
| 47 |
+
## π§ Configuration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
The service uses the following environment variables:
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
- `OLLAMA_MODEL`: Model to use (default: `mistral:7b`)
|
| 52 |
+
- `OLLAMA_HOST`: Ollama service host (default: `http://localhost:11434`)
|
| 53 |
+
- `OLLAMA_TIMEOUT`: Request timeout in seconds (default: `30`)
|
| 54 |
+
- `SERVER_HOST`: Server host (default: `0.0.0.0`)
|
| 55 |
+
- `SERVER_PORT`: Server port (default: `7860`)
|
| 56 |
+
- `LOG_LEVEL`: Logging level (default: `INFO`)
|
|
|
|
| 57 |
|
| 58 |
+
## π³ Docker Deployment
|
| 59 |
|
| 60 |
+
### Local Development
|
| 61 |
```bash
|
| 62 |
+
# Build and run with docker-compose
|
| 63 |
+
docker-compose up --build
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
# Or run directly
|
| 66 |
+
docker build -f Dockerfile.hf -t summarizer-app .
|
| 67 |
+
docker run -p 7860:7860 summarizer-app
|
|
|
|
|
|
|
| 68 |
```
|
| 69 |
|
| 70 |
+
### Hugging Face Spaces
|
| 71 |
+
This app is configured for deployment on Hugging Face Spaces using Docker SDK.
|
| 72 |
|
| 73 |
+
## π Performance
|
|
|
|
|
|
|
| 74 |
|
| 75 |
+
- **Model**: Mistral 7B (7GB RAM requirement)
|
| 76 |
+
- **Startup time**: ~2-3 minutes (includes model download)
|
| 77 |
+
- **Inference speed**: ~2-5 seconds per request
|
| 78 |
+
- **Memory usage**: ~8GB RAM
|
| 79 |
|
| 80 |
+
## π οΈ Development
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
### Setup
|
| 83 |
```bash
|
| 84 |
+
# Install dependencies
|
| 85 |
+
pip install -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
+
# Run locally
|
| 88 |
+
uvicorn app.main:app --host 0.0.0.0 --port 7860
|
| 89 |
```
|
| 90 |
|
| 91 |
+
### Testing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
```bash
|
| 93 |
+
# Run tests
|
| 94 |
+
pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
# Run with coverage
|
| 97 |
+
pytest --cov=app
|
|
|
|
|
|
|
| 98 |
```
|
| 99 |
|
| 100 |
+
## π Usage Examples
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
### Python
|
| 103 |
+
```python
|
| 104 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
+
# Summarize text
|
| 107 |
+
response = requests.post(
|
| 108 |
+
"https://your-space.hf.space/api/v1/summarize",
|
| 109 |
+
json={
|
| 110 |
+
"text": "Your long article or text here...",
|
| 111 |
+
"max_tokens": 256
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
|
| 115 |
+
result = response.json()
|
| 116 |
+
print(result["summary"])
|
|
|
|
|
|
|
| 117 |
```
|
| 118 |
|
| 119 |
+
### cURL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
```bash
|
| 121 |
+
curl -X POST "https://your-space.hf.space/api/v1/summarize" \
|
| 122 |
+
-H "Content-Type: application/json" \
|
| 123 |
+
-d '{
|
| 124 |
+
"text": "Your text to summarize...",
|
| 125 |
+
"max_tokens": 256
|
| 126 |
+
}'
|
| 127 |
```
|
| 128 |
|
| 129 |
+
## π Security
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
- Non-root user execution
|
| 132 |
+
- Input validation and sanitization
|
| 133 |
+
- Rate limiting (configurable)
|
| 134 |
+
- API key authentication (optional)
|
| 135 |
|
| 136 |
+
## π Monitoring
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
The service includes:
|
| 139 |
+
- Health check endpoint
|
| 140 |
+
- Request logging
|
| 141 |
+
- Error tracking
|
| 142 |
+
- Performance metrics
|
| 143 |
|
| 144 |
+
## π Troubleshooting
|
| 145 |
|
| 146 |
### Common Issues
|
| 147 |
|
| 148 |
+
1. **Model not loading**: Check if Ollama is running and model is pulled
|
| 149 |
+
2. **Out of memory**: Ensure sufficient RAM (8GB+) for Mistral 7B
|
| 150 |
+
3. **Slow startup**: Normal on first run due to model download
|
| 151 |
+
4. **API errors**: Check logs via `/docs` endpoint
|
| 152 |
|
| 153 |
+
### Logs
|
| 154 |
+
View application logs in the Hugging Face Spaces interface or check the health endpoint for service status.
|
|
|
|
| 155 |
|
| 156 |
+
## π License
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
MIT License - see LICENSE file for details.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
+
## π€ Contributing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
1. Fork the repository
|
| 163 |
+
2. Create a feature branch
|
| 164 |
+
3. Make your changes
|
| 165 |
+
4. Add tests
|
| 166 |
+
5. Submit a pull request
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
---
|
| 169 |
|
| 170 |
+
**Deployed on Hugging Face Spaces** π
|
env.hf
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces Environment Configuration
|
| 2 |
+
# Copy this to .env for local development
|
| 3 |
+
|
| 4 |
+
# Ollama Configuration
|
| 5 |
+
OLLAMA_MODEL=mistral:7b
|
| 6 |
+
OLLAMA_HOST=http://localhost:11434
|
| 7 |
+
OLLAMA_TIMEOUT=30
|
| 8 |
+
|
| 9 |
+
# Server Configuration
|
| 10 |
+
SERVER_HOST=0.0.0.0
|
| 11 |
+
SERVER_PORT=7860
|
| 12 |
+
LOG_LEVEL=INFO
|
| 13 |
+
|
| 14 |
+
# Optional: API Security
|
| 15 |
+
API_KEY_ENABLED=false
|
| 16 |
+
API_KEY=your-secret-key-here
|
| 17 |
+
|
| 18 |
+
# Optional: Rate Limiting
|
| 19 |
+
RATE_LIMIT_ENABLED=false
|
| 20 |
+
RATE_LIMIT_REQUESTS=60
|
| 21 |
+
RATE_LIMIT_WINDOW=60
|
| 22 |
+
|
| 23 |
+
# Input validation
|
| 24 |
+
MAX_TEXT_LENGTH=32000
|
| 25 |
+
MAX_TOKENS_DEFAULT=256
|