ming commited on
Commit
d3f36f7
·
1 Parent(s): f2cff39

Fix 504 Gateway Timeout: Increase timeout values

Browse files

- Increase Ollama timeout from 30s to 60s in config
- Update Nginx timeouts: connect/send 60s, read 90s
- Update dynamic timeout cap from 60s to 90s in summarizer
- Update Docker Compose and Hugging Face deployment configs
- Update timeout optimization tests to reflect new values

This should resolve the 504 Gateway Timeout errors on Hugging Face Spaces
by providing more time for CPU-bound inference in the shared environment.

HUGGINGFACE_DEPLOYMENT.md CHANGED
@@ -73,7 +73,7 @@ In your Hugging Face Space settings:
73
  ```
74
  OLLAMA_MODEL=mistral:7b
75
  OLLAMA_HOST=http://localhost:11434
76
- OLLAMA_TIMEOUT=30
77
  SERVER_HOST=0.0.0.0
78
  SERVER_PORT=7860
79
  LOG_LEVEL=INFO
 
73
  ```
74
  OLLAMA_MODEL=mistral:7b
75
  OLLAMA_HOST=http://localhost:11434
76
+ OLLAMA_TIMEOUT=60
77
  SERVER_HOST=0.0.0.0
78
  SERVER_PORT=7860
79
  LOG_LEVEL=INFO
app/core/config.py CHANGED
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
13
  # Ollama Configuration
14
  ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
15
  ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
16
- ollama_timeout: int = Field(default=30, env="OLLAMA_TIMEOUT", ge=1)
17
 
18
  # Server Configuration
19
  server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
 
13
  # Ollama Configuration
14
  ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
15
  ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
16
+ ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
17
 
18
  # Server Configuration
19
  server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
app/services/summarizer.py CHANGED
@@ -55,9 +55,9 @@ class OllamaService:
55
  """
56
  start_time = time.time()
57
 
58
- # Optimized timeout: base + 3s per extra 1000 chars (cap 60s)
59
  text_length = len(text)
60
- dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 60)
61
 
62
  # Preprocess text to reduce input size for faster processing
63
  if text_length > 4000:
 
55
  """
56
  start_time = time.time()
57
 
58
+ # Optimized timeout: base + 3s per extra 1000 chars (cap 90s)
59
  text_length = len(text)
60
+ dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 90)
61
 
62
  # Preprocess text to reduce input size for faster processing
63
  if text_length > 4000:
docker-compose.yml CHANGED
@@ -28,7 +28,7 @@ services:
28
  environment:
29
  - OLLAMA_HOST=http://ollama:11434
30
  - OLLAMA_MODEL=llama3.1:8b
31
- - OLLAMA_TIMEOUT=30
32
  - SERVER_HOST=0.0.0.0
33
  - SERVER_PORT=8000
34
  - LOG_LEVEL=INFO
 
28
  environment:
29
  - OLLAMA_HOST=http://ollama:11434
30
  - OLLAMA_MODEL=llama3.1:8b
31
+ - OLLAMA_TIMEOUT=60
32
  - SERVER_HOST=0.0.0.0
33
  - SERVER_PORT=8000
34
  - LOG_LEVEL=INFO
env.hf CHANGED
@@ -4,7 +4,7 @@
4
  # Ollama Configuration
5
  OLLAMA_MODEL=mistral:7b
6
  OLLAMA_HOST=http://localhost:11434
7
- OLLAMA_TIMEOUT=30
8
 
9
  # Server Configuration
10
  SERVER_HOST=0.0.0.0
 
4
  # Ollama Configuration
5
  OLLAMA_MODEL=mistral:7b
6
  OLLAMA_HOST=http://localhost:11434
7
+ OLLAMA_TIMEOUT=60
8
 
9
  # Server Configuration
10
  SERVER_HOST=0.0.0.0
nginx.conf CHANGED
@@ -30,9 +30,9 @@ http {
30
  proxy_set_header X-Forwarded-Proto $scheme;
31
 
32
  # Timeouts
33
- proxy_connect_timeout 30s;
34
- proxy_send_timeout 30s;
35
- proxy_read_timeout 30s;
36
  }
37
 
38
  # Health check endpoint (no rate limiting)
 
30
  proxy_set_header X-Forwarded-Proto $scheme;
31
 
32
  # Timeouts
33
+ proxy_connect_timeout 60s;
34
+ proxy_send_timeout 60s;
35
+ proxy_read_timeout 90s;
36
  }
37
 
38
  # Health check endpoint (no rate limiting)
tests/test_timeout_optimization.py CHANGED
@@ -33,7 +33,7 @@ class TestTimeoutOptimization:
33
  # Test the optimized formula directly
34
  base_timeout = 60 # Optimized base timeout
35
  scaling_factor = 5 # Optimized scaling factor
36
- max_cap = 120 # Optimized maximum cap
37
 
38
  # Test cases: (text_length, expected_timeout)
39
  test_cases = [
@@ -42,8 +42,8 @@ class TestTimeoutOptimization:
42
  (1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
43
  (2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
44
  (5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
45
- (10000, 105), # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105
46
- (50000, 120), # Very large: should be capped at 120
47
  ]
48
 
49
  for text_length, expected_timeout in test_cases:
@@ -72,17 +72,17 @@ class TestTimeoutOptimization:
72
  very_large_text_length = 100000 # 100,000 characters
73
  base_timeout = 60
74
  scaling_factor = 5
75
- max_cap = 120 # Optimized cap
76
 
77
  # Calculate what the timeout would be without cap
78
  uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
79
 
80
- # Should be much higher than 120 without cap
81
- assert uncapped_timeout > 120, f"Uncapped timeout should be > 120s, got {uncapped_timeout}"
82
 
83
- # With cap, should be exactly 120
84
  capped_timeout = min(uncapped_timeout, max_cap)
85
- assert capped_timeout == 120, f"Capped timeout should be 120s, got {capped_timeout}"
86
 
87
  def test_timeout_optimization_prevents_excessive_waits(self):
88
  """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
@@ -97,15 +97,15 @@ class TestTimeoutOptimization:
97
  dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
98
  dynamic_timeout = min(dynamic_timeout, max_cap)
99
 
100
- # No timeout should exceed 120 seconds
101
- assert dynamic_timeout <= 120, \
102
- f"Timeout for {text_length} chars should not exceed 120s, got {dynamic_timeout}"
103
 
104
  # No timeout should be excessively long (like 100+ seconds for typical text)
105
  if text_length <= 20000: # Typical text sizes
106
- # Allow up to 120 seconds for 20k chars (which is reasonable and capped)
107
- assert dynamic_timeout <= 120, \
108
- f"Timeout for typical text size {text_length} should not exceed 120s, got {dynamic_timeout}"
109
 
110
  def test_timeout_optimization_performance_improvement(self):
111
  """Test that timeout optimization provides better performance characteristics."""
@@ -122,13 +122,13 @@ class TestTimeoutOptimization:
122
  # New calculation (after optimization)
123
  new_base = 60
124
  new_scaling = 5
125
- new_cap = 120
126
  new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling) # 60 + 9*5 = 105
127
- new_timeout = min(new_timeout, new_cap) # Capped at 120
128
 
129
  # New timeout should be significantly better
130
  assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
131
- assert new_timeout == 105, f"New timeout should be 105s for 10k chars, got {new_timeout}"
132
  assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
133
 
134
  def test_timeout_optimization_edge_cases(self):
@@ -166,14 +166,14 @@ class TestTimeoutOptimization:
166
  dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
167
  dynamic_timeout = min(dynamic_timeout, max_cap)
168
 
169
- # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 120
170
- expected_timeout = 120 # Capped at 120
171
  assert dynamic_timeout == expected_timeout, \
172
  f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
173
 
174
  # Should not be 100+ seconds
175
- assert dynamic_timeout <= 120, \
176
- f"Optimized timeout should not exceed 120s, got {dynamic_timeout}"
177
 
178
  # Should be much better than the old calculation
179
  old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10) # 120 + 19*10 = 310
 
33
  # Test the optimized formula directly
34
  base_timeout = 60 # Optimized base timeout
35
  scaling_factor = 5 # Optimized scaling factor
36
+ max_cap = 90 # Optimized maximum cap
37
 
38
  # Test cases: (text_length, expected_timeout)
39
  test_cases = [
 
42
  (1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
43
  (2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
44
  (5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
45
+ (10000, 90), # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105, capped at 90
46
+ (50000, 90), # Very large: should be capped at 90
47
  ]
48
 
49
  for text_length, expected_timeout in test_cases:
 
72
  very_large_text_length = 100000 # 100,000 characters
73
  base_timeout = 60
74
  scaling_factor = 5
75
+ max_cap = 90 # Optimized cap
76
 
77
  # Calculate what the timeout would be without cap
78
  uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
79
 
80
+ # Should be much higher than 90 without cap
81
+ assert uncapped_timeout > 90, f"Uncapped timeout should be > 90s, got {uncapped_timeout}"
82
 
83
+ # With cap, should be exactly 90
84
  capped_timeout = min(uncapped_timeout, max_cap)
85
+ assert capped_timeout == 90, f"Capped timeout should be 90s, got {capped_timeout}"
86
 
87
  def test_timeout_optimization_prevents_excessive_waits(self):
88
  """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
 
97
  dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
98
  dynamic_timeout = min(dynamic_timeout, max_cap)
99
 
100
+ # No timeout should exceed 90 seconds
101
+ assert dynamic_timeout <= 90, \
102
+ f"Timeout for {text_length} chars should not exceed 90s, got {dynamic_timeout}"
103
 
104
  # No timeout should be excessively long (like 100+ seconds for typical text)
105
  if text_length <= 20000: # Typical text sizes
106
+ # Allow up to 90 seconds for 20k chars (which is reasonable and capped)
107
+ assert dynamic_timeout <= 90, \
108
+ f"Timeout for typical text size {text_length} should not exceed 90s, got {dynamic_timeout}"
109
 
110
  def test_timeout_optimization_performance_improvement(self):
111
  """Test that timeout optimization provides better performance characteristics."""
 
122
  # New calculation (after optimization)
123
  new_base = 60
124
  new_scaling = 5
125
+ new_cap = 90
126
  new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling) # 60 + 9*5 = 105
127
+ new_timeout = min(new_timeout, new_cap) # Capped at 90
128
 
129
  # New timeout should be significantly better
130
  assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
131
+ assert new_timeout == 90, f"New timeout should be 90s for 10k chars (capped), got {new_timeout}"
132
  assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
133
 
134
  def test_timeout_optimization_edge_cases(self):
 
166
  dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
167
  dynamic_timeout = min(dynamic_timeout, max_cap)
168
 
169
+ # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 90
170
+ expected_timeout = 90 # Capped at 90
171
  assert dynamic_timeout == expected_timeout, \
172
  f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
173
 
174
  # Should not be 100+ seconds
175
+ assert dynamic_timeout <= 90, \
176
+ f"Optimized timeout should not exceed 90s, got {dynamic_timeout}"
177
 
178
  # Should be much better than the old calculation
179
  old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10) # 120 + 19*10 = 310