Spaces:
Running
Running
ming
commited on
Commit
Β·
2c658eb
1
Parent(s):
c35817c
Optimize timeout configuration for better performance
Browse filesπ Timeout Optimizations:
- Reduced base timeout from 120s to 60s
- Reduced scaling factor from +10s to +5s per 1000 chars
- Reduced maximum cap from 300s to 120s
- Updated all tests to reflect new timeout values
β
Benefits:
- Faster failure detection for stuck requests
- More reasonable timeout values for typical use cases
- Still provides dynamic scaling for large text
- Prevents extremely long waits (100+ seconds)
π New Timeout Formula:
- Base: 60 seconds
- Scaling: +5 seconds per 1000 characters over 1000
- Maximum: 120 seconds (2 minutes)
- Example: 10,000 chars = 60 + (9 * 5) = 105 seconds
This addresses the 100+ second timeout issue while maintaining
the dynamic timeout system for large text processing.
- app/core/config.py +1 -1
- app/services/summarizer.py +3 -3
- tests/test_502_prevention.py +14 -14
- tests/test_api.py +1 -1
- tests/test_services.py +7 -7
app/core/config.py
CHANGED
|
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
|
|
| 13 |
# Ollama Configuration
|
| 14 |
ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
|
| 15 |
ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
|
| 16 |
-
ollama_timeout: int = Field(default=
|
| 17 |
|
| 18 |
# Server Configuration
|
| 19 |
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
|
|
|
|
| 13 |
# Ollama Configuration
|
| 14 |
ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
|
| 15 |
ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
|
| 16 |
+
ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
|
| 17 |
|
| 18 |
# Server Configuration
|
| 19 |
server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
|
app/services/summarizer.py
CHANGED
|
@@ -43,10 +43,10 @@ class OllamaService:
|
|
| 43 |
# Calculate dynamic timeout based on text length
|
| 44 |
# Base timeout + additional time for longer texts
|
| 45 |
text_length = len(text)
|
| 46 |
-
dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 *
|
| 47 |
|
| 48 |
-
# Cap the timeout at
|
| 49 |
-
dynamic_timeout = min(dynamic_timeout,
|
| 50 |
|
| 51 |
logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")
|
| 52 |
|
|
|
|
| 43 |
# Calculate dynamic timeout based on text length
|
| 44 |
# Base timeout + additional time for longer texts
|
| 45 |
text_length = len(text)
|
| 46 |
+
dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 * 5) # +5s per 1000 chars over 1000
|
| 47 |
|
| 48 |
+
# Cap the timeout at 2 minutes to prevent extremely long waits
|
| 49 |
+
dynamic_timeout = min(dynamic_timeout, 120)
|
| 50 |
|
| 51 |
logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")
|
| 52 |
|
tests/test_502_prevention.py
CHANGED
|
@@ -48,13 +48,13 @@ class Test502BadGatewayPrevention:
|
|
| 48 |
# Verify extended timeout was used
|
| 49 |
mock_client.assert_called_once()
|
| 50 |
call_args = mock_client.call_args
|
| 51 |
-
expected_timeout =
|
| 52 |
assert call_args[1]['timeout'] == expected_timeout
|
| 53 |
|
| 54 |
@pytest.mark.integration
|
| 55 |
def test_very_large_text_gets_capped_timeout(self):
|
| 56 |
"""Test that very large text gets capped timeout to prevent infinite waits."""
|
| 57 |
-
very_large_text = "A" * 100000 # 100,000 characters
|
| 58 |
|
| 59 |
with patch('httpx.AsyncClient') as mock_client:
|
| 60 |
mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
|
|
@@ -64,14 +64,14 @@ class Test502BadGatewayPrevention:
|
|
| 64 |
json={"text": very_large_text, "max_tokens": 256}
|
| 65 |
)
|
| 66 |
|
| 67 |
-
# Verify timeout is capped at
|
| 68 |
mock_client.assert_called_once()
|
| 69 |
call_args = mock_client.call_args
|
| 70 |
-
assert call_args[1]['timeout'] ==
|
| 71 |
|
| 72 |
@pytest.mark.integration
|
| 73 |
def test_small_text_uses_base_timeout(self):
|
| 74 |
-
"""Test that small text uses base timeout (
|
| 75 |
small_text = "Short text"
|
| 76 |
|
| 77 |
with patch('httpx.AsyncClient') as mock_client:
|
|
@@ -85,7 +85,7 @@ class Test502BadGatewayPrevention:
|
|
| 85 |
# Verify base timeout was used
|
| 86 |
mock_client.assert_called_once()
|
| 87 |
call_args = mock_client.call_args
|
| 88 |
-
assert call_args[1]['timeout'] ==
|
| 89 |
|
| 90 |
@pytest.mark.integration
|
| 91 |
def test_medium_text_gets_appropriate_timeout(self):
|
|
@@ -103,7 +103,7 @@ class Test502BadGatewayPrevention:
|
|
| 103 |
# Verify appropriate timeout was used
|
| 104 |
mock_client.assert_called_once()
|
| 105 |
call_args = mock_client.call_args
|
| 106 |
-
expected_timeout =
|
| 107 |
assert call_args[1]['timeout'] == expected_timeout
|
| 108 |
|
| 109 |
@pytest.mark.integration
|
|
@@ -181,13 +181,13 @@ class Test502BadGatewayPrevention:
|
|
| 181 |
def test_dynamic_timeout_calculation_formula(self):
|
| 182 |
"""Test the exact formula for dynamic timeout calculation."""
|
| 183 |
test_cases = [
|
| 184 |
-
(500,
|
| 185 |
-
(1000,
|
| 186 |
-
(1500,
|
| 187 |
-
(2000,
|
| 188 |
-
(5000,
|
| 189 |
-
(10000,
|
| 190 |
-
(50000,
|
| 191 |
]
|
| 192 |
|
| 193 |
for text_length, expected_timeout in test_cases:
|
|
|
|
| 48 |
# Verify extended timeout was used
|
| 49 |
mock_client.assert_called_once()
|
| 50 |
call_args = mock_client.call_args
|
| 51 |
+
expected_timeout = 60 + (10000 - 1000) // 1000 * 5 # 105 seconds
|
| 52 |
assert call_args[1]['timeout'] == expected_timeout
|
| 53 |
|
| 54 |
@pytest.mark.integration
|
| 55 |
def test_very_large_text_gets_capped_timeout(self):
|
| 56 |
"""Test that very large text gets capped timeout to prevent infinite waits."""
|
| 57 |
+
very_large_text = "A" * 100000 # 100,000 characters (should exceed 120s cap)
|
| 58 |
|
| 59 |
with patch('httpx.AsyncClient') as mock_client:
|
| 60 |
mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
|
|
|
|
| 64 |
json={"text": very_large_text, "max_tokens": 256}
|
| 65 |
)
|
| 66 |
|
| 67 |
+
# Verify timeout is capped at 120 seconds
|
| 68 |
mock_client.assert_called_once()
|
| 69 |
call_args = mock_client.call_args
|
| 70 |
+
assert call_args[1]['timeout'] == 120 # Maximum cap
|
| 71 |
|
| 72 |
@pytest.mark.integration
|
| 73 |
def test_small_text_uses_base_timeout(self):
|
| 74 |
+
"""Test that small text uses base timeout (60 seconds)."""
|
| 75 |
small_text = "Short text"
|
| 76 |
|
| 77 |
with patch('httpx.AsyncClient') as mock_client:
|
|
|
|
| 85 |
# Verify base timeout was used
|
| 86 |
mock_client.assert_called_once()
|
| 87 |
call_args = mock_client.call_args
|
| 88 |
+
assert call_args[1]['timeout'] == 60 # Base timeout
|
| 89 |
|
| 90 |
@pytest.mark.integration
|
| 91 |
def test_medium_text_gets_appropriate_timeout(self):
|
|
|
|
| 103 |
# Verify appropriate timeout was used
|
| 104 |
mock_client.assert_called_once()
|
| 105 |
call_args = mock_client.call_args
|
| 106 |
+
expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
|
| 107 |
assert call_args[1]['timeout'] == expected_timeout
|
| 108 |
|
| 109 |
@pytest.mark.integration
|
|
|
|
| 181 |
def test_dynamic_timeout_calculation_formula(self):
|
| 182 |
"""Test the exact formula for dynamic timeout calculation."""
|
| 183 |
test_cases = [
|
| 184 |
+
(500, 60), # Small text: base timeout (60s)
|
| 185 |
+
(1000, 60), # Exactly 1000 chars: base timeout (60s)
|
| 186 |
+
(1500, 60), # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
|
| 187 |
+
(2000, 65), # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
|
| 188 |
+
(5000, 80), # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
|
| 189 |
+
(10000, 105), # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105
|
| 190 |
+
(50000, 120), # Very large: should be capped at 120
|
| 191 |
]
|
| 192 |
|
| 193 |
for text_length, expected_timeout in test_cases:
|
tests/test_api.py
CHANGED
|
@@ -95,5 +95,5 @@ def test_summarize_endpoint_large_text_handling():
|
|
| 95 |
# Verify the client was called with extended timeout
|
| 96 |
mock_client.assert_called_once()
|
| 97 |
call_args = mock_client.call_args
|
| 98 |
-
expected_timeout =
|
| 99 |
assert call_args[1]['timeout'] == expected_timeout
|
|
|
|
| 95 |
# Verify the client was called with extended timeout
|
| 96 |
mock_client.assert_called_once()
|
| 97 |
call_args = mock_client.call_args
|
| 98 |
+
expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
|
| 99 |
assert call_args[1]['timeout'] == expected_timeout
|
tests/test_services.py
CHANGED
|
@@ -61,7 +61,7 @@ class TestOllamaService:
|
|
| 61 |
"""Test service initialization."""
|
| 62 |
assert ollama_service.base_url == "http://127.0.0.1:11434"
|
| 63 |
assert ollama_service.model == "llama3.2:latest" # Updated to match current config
|
| 64 |
-
assert ollama_service.timeout ==
|
| 65 |
|
| 66 |
@pytest.mark.asyncio
|
| 67 |
async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
|
|
@@ -175,24 +175,24 @@ class TestOllamaService:
|
|
| 175 |
# Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
|
| 176 |
mock_client.assert_called_once()
|
| 177 |
call_args = mock_client.call_args
|
| 178 |
-
expected_timeout =
|
| 179 |
assert call_args[1]['timeout'] == expected_timeout
|
| 180 |
|
| 181 |
@pytest.mark.asyncio
|
| 182 |
async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
|
| 183 |
-
"""Test that dynamic timeout is capped at
|
| 184 |
stub_response = StubAsyncResponse(json_data=mock_ollama_response)
|
| 185 |
-
very_large_text = "A" * 50000 # 50000 characters (should exceed
|
| 186 |
|
| 187 |
with patch('httpx.AsyncClient') as mock_client:
|
| 188 |
mock_client.return_value = StubAsyncClient(post_result=stub_response)
|
| 189 |
|
| 190 |
result = await ollama_service.summarize_text(very_large_text)
|
| 191 |
|
| 192 |
-
# Verify the timeout is capped at
|
| 193 |
mock_client.assert_called_once()
|
| 194 |
call_args = mock_client.call_args
|
| 195 |
-
assert call_args[1]['timeout'] ==
|
| 196 |
|
| 197 |
@pytest.mark.asyncio
|
| 198 |
async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
|
|
@@ -214,7 +214,7 @@ class TestOllamaService:
|
|
| 214 |
async def test_timeout_error_message_improvement(self, ollama_service):
|
| 215 |
"""Test that timeout errors now include dynamic timeout and text length info."""
|
| 216 |
test_text = "A" * 2000 # 2000 characters
|
| 217 |
-
expected_timeout =
|
| 218 |
|
| 219 |
with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
|
| 220 |
with pytest.raises(httpx.HTTPError) as exc_info:
|
|
|
|
| 61 |
"""Test service initialization."""
|
| 62 |
assert ollama_service.base_url == "http://127.0.0.1:11434"
|
| 63 |
assert ollama_service.model == "llama3.2:latest" # Updated to match current config
|
| 64 |
+
assert ollama_service.timeout == 60 # Updated to match current config
|
| 65 |
|
| 66 |
@pytest.mark.asyncio
|
| 67 |
async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
|
|
|
|
| 175 |
# Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
|
| 176 |
mock_client.assert_called_once()
|
| 177 |
call_args = mock_client.call_args
|
| 178 |
+
expected_timeout = 60 + (5000 - 1000) // 1000 * 5 # 80 seconds
|
| 179 |
assert call_args[1]['timeout'] == expected_timeout
|
| 180 |
|
| 181 |
@pytest.mark.asyncio
|
| 182 |
async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
|
| 183 |
+
"""Test that dynamic timeout is capped at 2 minutes (120 seconds)."""
|
| 184 |
stub_response = StubAsyncResponse(json_data=mock_ollama_response)
|
| 185 |
+
very_large_text = "A" * 50000 # 50000 characters (should exceed 120s cap)
|
| 186 |
|
| 187 |
with patch('httpx.AsyncClient') as mock_client:
|
| 188 |
mock_client.return_value = StubAsyncClient(post_result=stub_response)
|
| 189 |
|
| 190 |
result = await ollama_service.summarize_text(very_large_text)
|
| 191 |
|
| 192 |
+
# Verify the timeout is capped at 120 seconds
|
| 193 |
mock_client.assert_called_once()
|
| 194 |
call_args = mock_client.call_args
|
| 195 |
+
assert call_args[1]['timeout'] == 120 # Maximum cap
|
| 196 |
|
| 197 |
@pytest.mark.asyncio
|
| 198 |
async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
|
|
|
|
| 214 |
async def test_timeout_error_message_improvement(self, ollama_service):
|
| 215 |
"""Test that timeout errors now include dynamic timeout and text length info."""
|
| 216 |
test_text = "A" * 2000 # 2000 characters
|
| 217 |
+
expected_timeout = 60 + (2000 - 1000) // 1000 * 5 # 65 seconds
|
| 218 |
|
| 219 |
with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
|
| 220 |
with pytest.raises(httpx.HTTPError) as exc_info:
|