Spaces:
Sleeping
Sleeping
ming
commited on
Commit
·
d3f36f7
1
Parent(s):
f2cff39
Fix 504 Gateway Timeout: Increase timeout values
Browse files- Increase Ollama timeout from 30s to 60s in config
- Update Nginx timeouts: connect/send 60s, read 90s
- Update dynamic timeout cap from 60s to 90s in summarizer
- Update Docker Compose and Hugging Face deployment configs
- Update timeout optimization tests to reflect new values
This should resolve the 504 Gateway Timeout errors on Hugging Face Spaces
by providing more time for CPU-bound inference in the shared environment.
- HUGGINGFACE_DEPLOYMENT.md +1 -1
- app/core/config.py +1 -1
- app/services/summarizer.py +2 -2
- docker-compose.yml +1 -1
- env.hf +1 -1
- nginx.conf +3 -3
- tests/test_timeout_optimization.py +21 -21
HUGGINGFACE_DEPLOYMENT.md
CHANGED
|
@@ -73,7 +73,7 @@ In your Hugging Face Space settings:
|
|
| 73 |
```
|
| 74 |
OLLAMA_MODEL=mistral:7b
|
| 75 |
OLLAMA_HOST=http://localhost:11434
|
| 76 |
-
OLLAMA_TIMEOUT=
|
| 77 |
SERVER_HOST=0.0.0.0
|
| 78 |
SERVER_PORT=7860
|
| 79 |
LOG_LEVEL=INFO
|
|
|
|
| 73 |
```
|
| 74 |
OLLAMA_MODEL=mistral:7b
|
| 75 |
OLLAMA_HOST=http://localhost:11434
|
| 76 |
+
OLLAMA_TIMEOUT=60
|
| 77 |
SERVER_HOST=0.0.0.0
|
| 78 |
SERVER_PORT=7860
|
| 79 |
LOG_LEVEL=INFO
|
app/core/config.py
CHANGED
|
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
|
|
| 13 |
# Ollama Configuration
|
| 14 |
ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
|
| 15 |
ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
|
| 16 |
-
ollama_timeout: int = Field(default=
|
| 17 |
|
| 18 |
# Server Configuration
|
| 19 |
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
|
|
|
|
| 13 |
# Ollama Configuration
|
| 14 |
ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
|
| 15 |
ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
|
| 16 |
+
ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
|
| 17 |
|
| 18 |
# Server Configuration
|
| 19 |
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
|
app/services/summarizer.py
CHANGED
|
@@ -55,9 +55,9 @@ class OllamaService:
|
|
| 55 |
"""
|
| 56 |
start_time = time.time()
|
| 57 |
|
| 58 |
-
# Optimized timeout: base + 3s per extra 1000 chars (cap
|
| 59 |
text_length = len(text)
|
| 60 |
-
dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3),
|
| 61 |
|
| 62 |
# Preprocess text to reduce input size for faster processing
|
| 63 |
if text_length > 4000:
|
|
|
|
| 55 |
"""
|
| 56 |
start_time = time.time()
|
| 57 |
|
| 58 |
+
# Optimized timeout: base + 3s per extra 1000 chars (cap 90s)
|
| 59 |
text_length = len(text)
|
| 60 |
+
dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 90)
|
| 61 |
|
| 62 |
# Preprocess text to reduce input size for faster processing
|
| 63 |
if text_length > 4000:
|
docker-compose.yml
CHANGED
|
@@ -28,7 +28,7 @@ services:
|
|
| 28 |
environment:
|
| 29 |
- OLLAMA_HOST=http://ollama:11434
|
| 30 |
- OLLAMA_MODEL=llama3.1:8b
|
| 31 |
-
- OLLAMA_TIMEOUT=
|
| 32 |
- SERVER_HOST=0.0.0.0
|
| 33 |
- SERVER_PORT=8000
|
| 34 |
- LOG_LEVEL=INFO
|
|
|
|
| 28 |
environment:
|
| 29 |
- OLLAMA_HOST=http://ollama:11434
|
| 30 |
- OLLAMA_MODEL=llama3.1:8b
|
| 31 |
+
- OLLAMA_TIMEOUT=60
|
| 32 |
- SERVER_HOST=0.0.0.0
|
| 33 |
- SERVER_PORT=8000
|
| 34 |
- LOG_LEVEL=INFO
|
env.hf
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
# Ollama Configuration
|
| 5 |
OLLAMA_MODEL=mistral:7b
|
| 6 |
OLLAMA_HOST=http://localhost:11434
|
| 7 |
-
OLLAMA_TIMEOUT=
|
| 8 |
|
| 9 |
# Server Configuration
|
| 10 |
SERVER_HOST=0.0.0.0
|
|
|
|
| 4 |
# Ollama Configuration
|
| 5 |
OLLAMA_MODEL=mistral:7b
|
| 6 |
OLLAMA_HOST=http://localhost:11434
|
| 7 |
+
OLLAMA_TIMEOUT=60
|
| 8 |
|
| 9 |
# Server Configuration
|
| 10 |
SERVER_HOST=0.0.0.0
|
nginx.conf
CHANGED
|
@@ -30,9 +30,9 @@ http {
|
|
| 30 |
proxy_set_header X-Forwarded-Proto $scheme;
|
| 31 |
|
| 32 |
# Timeouts
|
| 33 |
-
proxy_connect_timeout
|
| 34 |
-
proxy_send_timeout
|
| 35 |
-
proxy_read_timeout
|
| 36 |
}
|
| 37 |
|
| 38 |
# Health check endpoint (no rate limiting)
|
|
|
|
| 30 |
proxy_set_header X-Forwarded-Proto $scheme;
|
| 31 |
|
| 32 |
# Timeouts
|
| 33 |
+
proxy_connect_timeout 60s;
|
| 34 |
+
proxy_send_timeout 60s;
|
| 35 |
+
proxy_read_timeout 90s;
|
| 36 |
}
|
| 37 |
|
| 38 |
# Health check endpoint (no rate limiting)
|
tests/test_timeout_optimization.py
CHANGED
|
@@ -33,7 +33,7 @@ class TestTimeoutOptimization:
|
|
| 33 |
# Test the optimized formula directly
|
| 34 |
base_timeout = 60 # Optimized base timeout
|
| 35 |
scaling_factor = 5 # Optimized scaling factor
|
| 36 |
-
max_cap =
|
| 37 |
|
| 38 |
# Test cases: (text_length, expected_timeout)
|
| 39 |
test_cases = [
|
|
@@ -42,8 +42,8 @@ class TestTimeoutOptimization:
|
|
| 42 |
(1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
|
| 43 |
(2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
|
| 44 |
(5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
|
| 45 |
-
(10000,
|
| 46 |
-
(50000,
|
| 47 |
]
|
| 48 |
|
| 49 |
for text_length, expected_timeout in test_cases:
|
|
@@ -72,17 +72,17 @@ class TestTimeoutOptimization:
|
|
| 72 |
very_large_text_length = 100000 # 100,000 characters
|
| 73 |
base_timeout = 60
|
| 74 |
scaling_factor = 5
|
| 75 |
-
max_cap =
|
| 76 |
|
| 77 |
# Calculate what the timeout would be without cap
|
| 78 |
uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
|
| 79 |
|
| 80 |
-
# Should be much higher than
|
| 81 |
-
assert uncapped_timeout >
|
| 82 |
|
| 83 |
-
# With cap, should be exactly
|
| 84 |
capped_timeout = min(uncapped_timeout, max_cap)
|
| 85 |
-
assert capped_timeout ==
|
| 86 |
|
| 87 |
def test_timeout_optimization_prevents_excessive_waits(self):
|
| 88 |
"""Test that optimized timeouts prevent excessive waits like 100+ seconds."""
|
|
@@ -97,15 +97,15 @@ class TestTimeoutOptimization:
|
|
| 97 |
dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
|
| 98 |
dynamic_timeout = min(dynamic_timeout, max_cap)
|
| 99 |
|
| 100 |
-
# No timeout should exceed
|
| 101 |
-
assert dynamic_timeout <=
|
| 102 |
-
f"Timeout for {text_length} chars should not exceed
|
| 103 |
|
| 104 |
# No timeout should be excessively long (like 100+ seconds for typical text)
|
| 105 |
if text_length <= 20000: # Typical text sizes
|
| 106 |
-
# Allow up to
|
| 107 |
-
assert dynamic_timeout <=
|
| 108 |
-
f"Timeout for typical text size {text_length} should not exceed
|
| 109 |
|
| 110 |
def test_timeout_optimization_performance_improvement(self):
|
| 111 |
"""Test that timeout optimization provides better performance characteristics."""
|
|
@@ -122,13 +122,13 @@ class TestTimeoutOptimization:
|
|
| 122 |
# New calculation (after optimization)
|
| 123 |
new_base = 60
|
| 124 |
new_scaling = 5
|
| 125 |
-
new_cap =
|
| 126 |
new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling) # 60 + 9*5 = 105
|
| 127 |
-
new_timeout = min(new_timeout, new_cap) # Capped at
|
| 128 |
|
| 129 |
# New timeout should be significantly better
|
| 130 |
assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
|
| 131 |
-
assert new_timeout ==
|
| 132 |
assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
|
| 133 |
|
| 134 |
def test_timeout_optimization_edge_cases(self):
|
|
@@ -166,14 +166,14 @@ class TestTimeoutOptimization:
|
|
| 166 |
dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
|
| 167 |
dynamic_timeout = min(dynamic_timeout, max_cap)
|
| 168 |
|
| 169 |
-
# Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at
|
| 170 |
-
expected_timeout =
|
| 171 |
assert dynamic_timeout == expected_timeout, \
|
| 172 |
f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
|
| 173 |
|
| 174 |
# Should not be 100+ seconds
|
| 175 |
-
assert dynamic_timeout <=
|
| 176 |
-
f"Optimized timeout should not exceed
|
| 177 |
|
| 178 |
# Should be much better than the old calculation
|
| 179 |
old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10) # 120 + 19*10 = 310
|
|
|
|
| 33 |
# Test the optimized formula directly
|
| 34 |
base_timeout = 60 # Optimized base timeout
|
| 35 |
scaling_factor = 5 # Optimized scaling factor
|
| 36 |
+
max_cap = 90 # Optimized maximum cap
|
| 37 |
|
| 38 |
# Test cases: (text_length, expected_timeout)
|
| 39 |
test_cases = [
|
|
|
|
| 42 |
(1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
|
| 43 |
(2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
|
| 44 |
(5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
|
| 45 |
+
(10000, 90), # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105, capped at 90
|
| 46 |
+
(50000, 90), # Very large: should be capped at 90
|
| 47 |
]
|
| 48 |
|
| 49 |
for text_length, expected_timeout in test_cases:
|
|
|
|
| 72 |
very_large_text_length = 100000 # 100,000 characters
|
| 73 |
base_timeout = 60
|
| 74 |
scaling_factor = 5
|
| 75 |
+
max_cap = 90 # Optimized cap
|
| 76 |
|
| 77 |
# Calculate what the timeout would be without cap
|
| 78 |
uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
|
| 79 |
|
| 80 |
+
# Should be much higher than 90 without cap
|
| 81 |
+
assert uncapped_timeout > 90, f"Uncapped timeout should be > 90s, got {uncapped_timeout}"
|
| 82 |
|
| 83 |
+
# With cap, should be exactly 90
|
| 84 |
capped_timeout = min(uncapped_timeout, max_cap)
|
| 85 |
+
assert capped_timeout == 90, f"Capped timeout should be 90s, got {capped_timeout}"
|
| 86 |
|
| 87 |
def test_timeout_optimization_prevents_excessive_waits(self):
|
| 88 |
"""Test that optimized timeouts prevent excessive waits like 100+ seconds."""
|
|
|
|
| 97 |
dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
|
| 98 |
dynamic_timeout = min(dynamic_timeout, max_cap)
|
| 99 |
|
| 100 |
+
# No timeout should exceed 90 seconds
|
| 101 |
+
assert dynamic_timeout <= 90, \
|
| 102 |
+
f"Timeout for {text_length} chars should not exceed 90s, got {dynamic_timeout}"
|
| 103 |
|
| 104 |
# No timeout should be excessively long (like 100+ seconds for typical text)
|
| 105 |
if text_length <= 20000: # Typical text sizes
|
| 106 |
+
# Allow up to 90 seconds for 20k chars (which is reasonable and capped)
|
| 107 |
+
assert dynamic_timeout <= 90, \
|
| 108 |
+
f"Timeout for typical text size {text_length} should not exceed 90s, got {dynamic_timeout}"
|
| 109 |
|
| 110 |
def test_timeout_optimization_performance_improvement(self):
|
| 111 |
"""Test that timeout optimization provides better performance characteristics."""
|
|
|
|
| 122 |
# New calculation (after optimization)
|
| 123 |
new_base = 60
|
| 124 |
new_scaling = 5
|
| 125 |
+
new_cap = 90
|
| 126 |
new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling) # 60 + 9*5 = 105
|
| 127 |
+
new_timeout = min(new_timeout, new_cap) # Capped at 90
|
| 128 |
|
| 129 |
# New timeout should be significantly better
|
| 130 |
assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
|
| 131 |
+
assert new_timeout == 90, f"New timeout should be 90s for 10k chars (capped), got {new_timeout}"
|
| 132 |
assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
|
| 133 |
|
| 134 |
def test_timeout_optimization_edge_cases(self):
|
|
|
|
| 166 |
dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
|
| 167 |
dynamic_timeout = min(dynamic_timeout, max_cap)
|
| 168 |
|
| 169 |
+
# Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 90
|
| 170 |
+
expected_timeout = 90 # Capped at 90
|
| 171 |
assert dynamic_timeout == expected_timeout, \
|
| 172 |
f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
|
| 173 |
|
| 174 |
# Should not be 100+ seconds
|
| 175 |
+
assert dynamic_timeout <= 90, \
|
| 176 |
+
f"Optimized timeout should not exceed 90s, got {dynamic_timeout}"
|
| 177 |
|
| 178 |
# Should be much better than the old calculation
|
| 179 |
old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10) # 120 + 19*10 = 310
|