ming commited on
Commit
2c658eb
Β·
1 Parent(s): c35817c

Optimize timeout configuration for better performance

Browse files

πŸš€ Timeout Optimizations:
- Reduced base timeout from 120s to 60s
- Reduced scaling factor from +10s to +5s per 1000 chars
- Reduced maximum cap from 300s to 120s
- Updated all tests to reflect new timeout values

βœ… Benefits:
- Faster failure detection for stuck requests
- More reasonable timeout values for typical use cases
- Still provides dynamic scaling for large text
- Prevents extremely long waits (100+ seconds)

πŸ“Š New Timeout Formula:
- Base: 60 seconds
- Scaling: +5 seconds per 1000 characters over 1000
- Maximum: 120 seconds (2 minutes)
- Example: 10,000 chars = 60 + (9 * 5) = 105 seconds

This addresses the 100+ second timeout issue while maintaining
the dynamic timeout system for large text processing.

app/core/config.py CHANGED
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
13
  # Ollama Configuration
14
  ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
15
  ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
16
- ollama_timeout: int = Field(default=30, env="OLLAMA_TIMEOUT", ge=1)
17
 
18
  # Server Configuration
19
  server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
 
13
  # Ollama Configuration
14
  ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
15
  ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
16
+ ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
17
 
18
  # Server Configuration
19
  server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
app/services/summarizer.py CHANGED
@@ -43,10 +43,10 @@ class OllamaService:
43
  # Calculate dynamic timeout based on text length
44
  # Base timeout + additional time for longer texts
45
  text_length = len(text)
46
- dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 * 10) # +10s per 1000 chars over 1000
47
 
48
- # Cap the timeout at 5 minutes to prevent extremely long waits
49
- dynamic_timeout = min(dynamic_timeout, 300)
50
 
51
  logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")
52
 
 
43
  # Calculate dynamic timeout based on text length
44
  # Base timeout + additional time for longer texts
45
  text_length = len(text)
46
+ dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 * 5) # +5s per 1000 chars over 1000
47
 
48
+ # Cap the timeout at 2 minutes to prevent extremely long waits
49
+ dynamic_timeout = min(dynamic_timeout, 120)
50
 
51
  logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")
52
 
tests/test_502_prevention.py CHANGED
@@ -48,13 +48,13 @@ class Test502BadGatewayPrevention:
48
  # Verify extended timeout was used
49
  mock_client.assert_called_once()
50
  call_args = mock_client.call_args
51
- expected_timeout = 120 + (10000 - 1000) // 1000 * 10 # 210 seconds
52
  assert call_args[1]['timeout'] == expected_timeout
53
 
54
  @pytest.mark.integration
55
  def test_very_large_text_gets_capped_timeout(self):
56
  """Test that very large text gets capped timeout to prevent infinite waits."""
57
- very_large_text = "A" * 100000 # 100,000 characters
58
 
59
  with patch('httpx.AsyncClient') as mock_client:
60
  mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
@@ -64,14 +64,14 @@ class Test502BadGatewayPrevention:
64
  json={"text": very_large_text, "max_tokens": 256}
65
  )
66
 
67
- # Verify timeout is capped at 300 seconds
68
  mock_client.assert_called_once()
69
  call_args = mock_client.call_args
70
- assert call_args[1]['timeout'] == 300 # Maximum cap
71
 
72
  @pytest.mark.integration
73
  def test_small_text_uses_base_timeout(self):
74
- """Test that small text uses base timeout (30 seconds)."""
75
  small_text = "Short text"
76
 
77
  with patch('httpx.AsyncClient') as mock_client:
@@ -85,7 +85,7 @@ class Test502BadGatewayPrevention:
85
  # Verify base timeout was used
86
  mock_client.assert_called_once()
87
  call_args = mock_client.call_args
88
- assert call_args[1]['timeout'] == 120 # Base timeout
89
 
90
  @pytest.mark.integration
91
  def test_medium_text_gets_appropriate_timeout(self):
@@ -103,7 +103,7 @@ class Test502BadGatewayPrevention:
103
  # Verify appropriate timeout was used
104
  mock_client.assert_called_once()
105
  call_args = mock_client.call_args
106
- expected_timeout = 120 + (5000 - 1000) // 1000 * 10 # 160 seconds
107
  assert call_args[1]['timeout'] == expected_timeout
108
 
109
  @pytest.mark.integration
@@ -181,13 +181,13 @@ class Test502BadGatewayPrevention:
181
  def test_dynamic_timeout_calculation_formula(self):
182
  """Test the exact formula for dynamic timeout calculation."""
183
  test_cases = [
184
- (500, 120), # Small text: base timeout (120s)
185
- (1000, 120), # Exactly 1000 chars: base timeout (120s)
186
- (1500, 120), # 1500 chars: 120 + (500//1000)*10 = 120 + 0*10 = 120
187
- (2000, 130), # 2000 chars: 120 + (1000//1000)*10 = 120 + 1*10 = 130
188
- (5000, 160), # 5000 chars: 120 + (4000//1000)*10 = 120 + 4*10 = 160
189
- (10000, 210), # 10000 chars: 120 + (9000//1000)*10 = 120 + 9*10 = 210
190
- (50000, 300), # Very large: should be capped at 300
191
  ]
192
 
193
  for text_length, expected_timeout in test_cases:
 
48
  # Verify extended timeout was used
49
  mock_client.assert_called_once()
50
  call_args = mock_client.call_args
51
+ expected_timeout = 60 + (10000 - 1000) // 1000 * 5 # 105 seconds
52
  assert call_args[1]['timeout'] == expected_timeout
53
 
54
  @pytest.mark.integration
55
  def test_very_large_text_gets_capped_timeout(self):
56
  """Test that very large text gets capped timeout to prevent infinite waits."""
57
+ very_large_text = "A" * 100000 # 100,000 characters (should exceed 120s cap)
58
 
59
  with patch('httpx.AsyncClient') as mock_client:
60
  mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
 
64
  json={"text": very_large_text, "max_tokens": 256}
65
  )
66
 
67
+ # Verify timeout is capped at 120 seconds
68
  mock_client.assert_called_once()
69
  call_args = mock_client.call_args
70
+ assert call_args[1]['timeout'] == 120 # Maximum cap
71
 
72
  @pytest.mark.integration
73
  def test_small_text_uses_base_timeout(self):
74
+ """Test that small text uses base timeout (60 seconds)."""
75
  small_text = "Short text"
76
 
77
  with patch('httpx.AsyncClient') as mock_client:
 
85
  # Verify base timeout was used
86
  mock_client.assert_called_once()
87
  call_args = mock_client.call_args
88
+ assert call_args[1]['timeout'] == 60 # Base timeout
89
 
90
  @pytest.mark.integration
91
  def test_medium_text_gets_appropriate_timeout(self):
 
103
  # Verify appropriate timeout was used
104
  mock_client.assert_called_once()
105
  call_args = mock_client.call_args
106
+ expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
107
  assert call_args[1]['timeout'] == expected_timeout
108
 
109
  @pytest.mark.integration
 
181
  def test_dynamic_timeout_calculation_formula(self):
182
  """Test the exact formula for dynamic timeout calculation."""
183
  test_cases = [
184
+ (500, 60), # Small text: base timeout (60s)
185
+ (1000, 60), # Exactly 1000 chars: base timeout (60s)
186
+ (1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
187
+ (2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
188
+ (5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
189
+ (10000, 105), # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105
190
+ (50000, 120), # Very large: should be capped at 120
191
  ]
192
 
193
  for text_length, expected_timeout in test_cases:
tests/test_api.py CHANGED
@@ -95,5 +95,5 @@ def test_summarize_endpoint_large_text_handling():
95
  # Verify the client was called with extended timeout
96
  mock_client.assert_called_once()
97
  call_args = mock_client.call_args
98
- expected_timeout = 120 + (5000 - 1000) // 1000 * 10 # 160 seconds
99
  assert call_args[1]['timeout'] == expected_timeout
 
95
  # Verify the client was called with extended timeout
96
  mock_client.assert_called_once()
97
  call_args = mock_client.call_args
98
+ expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
99
  assert call_args[1]['timeout'] == expected_timeout
tests/test_services.py CHANGED
@@ -61,7 +61,7 @@ class TestOllamaService:
61
  """Test service initialization."""
62
  assert ollama_service.base_url == "http://127.0.0.1:11434"
63
  assert ollama_service.model == "llama3.2:latest" # Updated to match current config
64
- assert ollama_service.timeout == 120 # Updated to match current config
65
 
66
  @pytest.mark.asyncio
67
  async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
@@ -175,24 +175,24 @@ class TestOllamaService:
175
  # Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
176
  mock_client.assert_called_once()
177
  call_args = mock_client.call_args
178
- expected_timeout = 120 + (5000 - 1000) // 1000 * 10 # 160 seconds
179
  assert call_args[1]['timeout'] == expected_timeout
180
 
181
  @pytest.mark.asyncio
182
  async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
183
- """Test that dynamic timeout is capped at 5 minutes (300 seconds)."""
184
  stub_response = StubAsyncResponse(json_data=mock_ollama_response)
185
- very_large_text = "A" * 50000 # 50000 characters (should exceed 300s cap)
186
 
187
  with patch('httpx.AsyncClient') as mock_client:
188
  mock_client.return_value = StubAsyncClient(post_result=stub_response)
189
 
190
  result = await ollama_service.summarize_text(very_large_text)
191
 
192
- # Verify the timeout is capped at 300 seconds
193
  mock_client.assert_called_once()
194
  call_args = mock_client.call_args
195
- assert call_args[1]['timeout'] == 300 # Maximum cap
196
 
197
  @pytest.mark.asyncio
198
  async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
@@ -214,7 +214,7 @@ class TestOllamaService:
214
  async def test_timeout_error_message_improvement(self, ollama_service):
215
  """Test that timeout errors now include dynamic timeout and text length info."""
216
  test_text = "A" * 2000 # 2000 characters
217
- expected_timeout = 120 + (2000 - 1000) // 1000 * 10 # 130 seconds
218
 
219
  with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
220
  with pytest.raises(httpx.HTTPError) as exc_info:
 
61
  """Test service initialization."""
62
  assert ollama_service.base_url == "http://127.0.0.1:11434"
63
  assert ollama_service.model == "llama3.2:latest" # Updated to match current config
64
+ assert ollama_service.timeout == 60 # Updated to match current config
65
 
66
  @pytest.mark.asyncio
67
  async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
 
175
  # Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
176
  mock_client.assert_called_once()
177
  call_args = mock_client.call_args
178
+ expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
179
  assert call_args[1]['timeout'] == expected_timeout
180
 
181
  @pytest.mark.asyncio
182
  async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
183
+ """Test that dynamic timeout is capped at 2 minutes (120 seconds)."""
184
  stub_response = StubAsyncResponse(json_data=mock_ollama_response)
185
+ very_large_text = "A" * 50000 # 50000 characters (should exceed 120s cap)
186
 
187
  with patch('httpx.AsyncClient') as mock_client:
188
  mock_client.return_value = StubAsyncClient(post_result=stub_response)
189
 
190
  result = await ollama_service.summarize_text(very_large_text)
191
 
192
+ # Verify the timeout is capped at 120 seconds
193
  mock_client.assert_called_once()
194
  call_args = mock_client.call_args
195
+ assert call_args[1]['timeout'] == 120 # Maximum cap
196
 
197
  @pytest.mark.asyncio
198
  async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
 
214
  async def test_timeout_error_message_improvement(self, ollama_service):
215
  """Test that timeout errors now include dynamic timeout and text length info."""
216
  test_text = "A" * 2000 # 2000 characters
217
+ expected_timeout = 60 + (2000 - 1000) // 1000 * 5 # 65 seconds
218
 
219
  with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
220
  with pytest.raises(httpx.HTTPError) as exc_info: