Spaces:

colin730
/

SummarizerApp

Running

ming commited on Oct 4

Commit

2c658eb

1 Parent(s): c35817c

Optimize timeout configuration for better performance

🚀 Timeout Optimizations:
- Reduced base timeout from 120s to 60s
- Reduced scaling factor from +10s to +5s per 1000 chars
- Reduced maximum cap from 300s to 120s
- Updated all tests to reflect new timeout values

✅ Benefits:
- Faster failure detection for stuck requests
- More reasonable timeout values for typical use cases
- Still provides dynamic scaling for large text
- Prevents extremely long waits (100+ seconds)

📊 New Timeout Formula:
- Base: 60 seconds
- Scaling: +5 seconds per 1000 characters over 1000
- Maximum: 120 seconds (2 minutes)
- Example: 10,000 chars = 60 + (9 * 5) = 105 seconds

This addresses the 100+ second timeout issue while maintaining
the dynamic timeout system for large text processing.

Files changed (5) hide show

app/core/config.py +1 -1
app/services/summarizer.py +3 -3
tests/test_502_prevention.py +14 -14
tests/test_api.py +1 -1
tests/test_services.py +7 -7

app/core/config.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Settings(BaseSettings):
     # Ollama Configuration
     ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
     ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
-    ollama_timeout: int = Field(default=30, env="OLLAMA_TIMEOUT", ge=1)
     # Server Configuration
     server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")

     # Ollama Configuration
     ollama_model: str = Field(default="llama3.2:latest", env="OLLAMA_MODEL")
     ollama_host: str = Field(default="http://127.0.0.1:11434", env="OLLAMA_HOST")
+    ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
     # Server Configuration
     server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")

app/services/summarizer.py CHANGED Viewed

@@ -43,10 +43,10 @@ class OllamaService:
         # Calculate dynamic timeout based on text length
         # Base timeout + additional time for longer texts
         text_length = len(text)
-        dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 * 10)  # +10s per 1000 chars over 1000
-        # Cap the timeout at 5 minutes to prevent extremely long waits
-        dynamic_timeout = min(dynamic_timeout, 300)
         logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")

         # Calculate dynamic timeout based on text length
         # Base timeout + additional time for longer texts
         text_length = len(text)
+        dynamic_timeout = self.timeout + max(0, (text_length - 1000) // 1000 * 5)  # +5s per 1000 chars over 1000
+        # Cap the timeout at 2 minutes to prevent extremely long waits
+        dynamic_timeout = min(dynamic_timeout, 120)
         logger.info(f"Processing text of {text_length} characters with timeout of {dynamic_timeout}s")

tests/test_502_prevention.py CHANGED Viewed

@@ -48,13 +48,13 @@ class Test502BadGatewayPrevention:
             # Verify extended timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            expected_timeout = 120 + (10000 - 1000) // 1000 * 10  # 210 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.integration
     def test_very_large_text_gets_capped_timeout(self):
         """Test that very large text gets capped timeout to prevent infinite waits."""
-        very_large_text = "A" * 100000  # 100,000 characters
         with patch('httpx.AsyncClient') as mock_client:
             mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
@@ -64,14 +64,14 @@ class Test502BadGatewayPrevention:
                 json={"text": very_large_text, "max_tokens": 256}
             )
-            # Verify timeout is capped at 300 seconds
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            assert call_args[1]['timeout'] == 300  # Maximum cap
     @pytest.mark.integration
     def test_small_text_uses_base_timeout(self):
-        """Test that small text uses base timeout (30 seconds)."""
         small_text = "Short text"
         with patch('httpx.AsyncClient') as mock_client:
@@ -85,7 +85,7 @@ class Test502BadGatewayPrevention:
             # Verify base timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            assert call_args[1]['timeout'] == 120  # Base timeout
     @pytest.mark.integration
     def test_medium_text_gets_appropriate_timeout(self):
@@ -103,7 +103,7 @@ class Test502BadGatewayPrevention:
             # Verify appropriate timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            expected_timeout = 120 + (5000 - 1000) // 1000 * 10  # 160 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.integration
@@ -181,13 +181,13 @@ class Test502BadGatewayPrevention:
     def test_dynamic_timeout_calculation_formula(self):
         """Test the exact formula for dynamic timeout calculation."""
         test_cases = [
-            (500, 120),     # Small text: base timeout (120s)
-            (1000, 120),    # Exactly 1000 chars: base timeout (120s)
-            (1500, 120),    # 1500 chars: 120 + (500//1000)*10 = 120 + 0*10 = 120
-            (2000, 130),    # 2000 chars: 120 + (1000//1000)*10 = 120 + 1*10 = 130
-            (5000, 160),    # 5000 chars: 120 + (4000//1000)*10 = 120 + 4*10 = 160
-            (10000, 210),   # 10000 chars: 120 + (9000//1000)*10 = 120 + 9*10 = 210
-            (50000, 300),   # Very large: should be capped at 300
         ]
         for text_length, expected_timeout in test_cases:

             # Verify extended timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            expected_timeout = 60 + (10000 - 1000) // 1000 * 5  # 105 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.integration
     def test_very_large_text_gets_capped_timeout(self):
         """Test that very large text gets capped timeout to prevent infinite waits."""
+        very_large_text = "A" * 100000  # 100,000 characters (should exceed 120s cap)
         with patch('httpx.AsyncClient') as mock_client:
             mock_client.return_value = StubAsyncClient(post_result=StubAsyncResponse())
                 json={"text": very_large_text, "max_tokens": 256}
             )
+            # Verify timeout is capped at 120 seconds
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            assert call_args[1]['timeout'] == 120  # Maximum cap
     @pytest.mark.integration
     def test_small_text_uses_base_timeout(self):
+        """Test that small text uses base timeout (60 seconds)."""
         small_text = "Short text"
         with patch('httpx.AsyncClient') as mock_client:
             # Verify base timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            assert call_args[1]['timeout'] == 60  # Base timeout
     @pytest.mark.integration
     def test_medium_text_gets_appropriate_timeout(self):
             # Verify appropriate timeout was used
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            expected_timeout = 60 + (5000 - 1000) // 1000 * 5  # 80 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.integration
     def test_dynamic_timeout_calculation_formula(self):
         """Test the exact formula for dynamic timeout calculation."""
         test_cases = [
+            (500, 60),      # Small text: base timeout (60s)
+            (1000, 60),     # Exactly 1000 chars: base timeout (60s)
+            (1500, 60),     # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
+            (2000, 65),     # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
+            (5000, 80),     # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
+            (10000, 105),   # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105
+            (50000, 120),   # Very large: should be capped at 120
         ]
         for text_length, expected_timeout in test_cases:

tests/test_api.py CHANGED Viewed

@@ -95,5 +95,5 @@ def test_summarize_endpoint_large_text_handling():
         # Verify the client was called with extended timeout
         mock_client.assert_called_once()
         call_args = mock_client.call_args
-        expected_timeout = 120 + (5000 - 1000) // 1000 * 10  # 160 seconds
         assert call_args[1]['timeout'] == expected_timeout

         # Verify the client was called with extended timeout
         mock_client.assert_called_once()
         call_args = mock_client.call_args
+        expected_timeout = 60 + (5000 - 1000) // 1000 * 5  # 80 seconds
         assert call_args[1]['timeout'] == expected_timeout

tests/test_services.py CHANGED Viewed

@@ -61,7 +61,7 @@ class TestOllamaService:
         """Test service initialization."""
         assert ollama_service.base_url == "http://127.0.0.1:11434"
         assert ollama_service.model == "llama3.2:latest"  # Updated to match current config
-        assert ollama_service.timeout == 120  # Updated to match current config
     @pytest.mark.asyncio
     async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
@@ -175,24 +175,24 @@ class TestOllamaService:
             # Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            expected_timeout = 120 + (5000 - 1000) // 1000 * 10  # 160 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.asyncio
     async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
-        """Test that dynamic timeout is capped at 5 minutes (300 seconds)."""
         stub_response = StubAsyncResponse(json_data=mock_ollama_response)
-        very_large_text = "A" * 50000  # 50000 characters (should exceed 300s cap)
         with patch('httpx.AsyncClient') as mock_client:
             mock_client.return_value = StubAsyncClient(post_result=stub_response)
             result = await ollama_service.summarize_text(very_large_text)
-            # Verify the timeout is capped at 300 seconds
             mock_client.assert_called_once()
             call_args = mock_client.call_args
-            assert call_args[1]['timeout'] == 300  # Maximum cap
     @pytest.mark.asyncio
     async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
@@ -214,7 +214,7 @@ class TestOllamaService:
     async def test_timeout_error_message_improvement(self, ollama_service):
         """Test that timeout errors now include dynamic timeout and text length info."""
         test_text = "A" * 2000  # 2000 characters
-        expected_timeout = 120 + (2000 - 1000) // 1000 * 10  # 130 seconds
         with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
             with pytest.raises(httpx.HTTPError) as exc_info:

         """Test service initialization."""
         assert ollama_service.base_url == "http://127.0.0.1:11434"
         assert ollama_service.model == "llama3.2:latest"  # Updated to match current config
+        assert ollama_service.timeout == 60  # Updated to match current config
     @pytest.mark.asyncio
     async def test_summarize_text_success(self, ollama_service, mock_ollama_response):
             # Expected: 30s base + (5000-1000)/1000 * 10 = 30 + 40 = 70s
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            expected_timeout = 60 + (5000 - 1000) // 1000 * 5  # 80 seconds
             assert call_args[1]['timeout'] == expected_timeout
     @pytest.mark.asyncio
     async def test_dynamic_timeout_maximum_cap(self, ollama_service, mock_ollama_response):
+        """Test that dynamic timeout is capped at 2 minutes (120 seconds)."""
         stub_response = StubAsyncResponse(json_data=mock_ollama_response)
+        very_large_text = "A" * 50000  # 50000 characters (should exceed 120s cap)
         with patch('httpx.AsyncClient') as mock_client:
             mock_client.return_value = StubAsyncClient(post_result=stub_response)
             result = await ollama_service.summarize_text(very_large_text)
+            # Verify the timeout is capped at 120 seconds
             mock_client.assert_called_once()
             call_args = mock_client.call_args
+            assert call_args[1]['timeout'] == 120  # Maximum cap
     @pytest.mark.asyncio
     async def test_dynamic_timeout_logging(self, ollama_service, mock_ollama_response, caplog):
     async def test_timeout_error_message_improvement(self, ollama_service):
         """Test that timeout errors now include dynamic timeout and text length info."""
         test_text = "A" * 2000  # 2000 characters
+        expected_timeout = 60 + (2000 - 1000) // 1000 * 5  # 65 seconds
         with patch('httpx.AsyncClient', return_value=StubAsyncClient(post_exc=httpx.TimeoutException("Timeout"))):
             with pytest.raises(httpx.HTTPError) as exc_info: