Spaces:

colin730
/

SummarizerApp

Running

ming commited on 24 days ago

Commit

d3f36f7

1 Parent(s): f2cff39

Fix 504 Gateway Timeout: Increase timeout values

- Increase Ollama timeout from 30s to 60s in config
- Update Nginx timeouts: connect/send 60s, read 90s
- Update dynamic timeout cap from 60s to 90s in summarizer
- Update Docker Compose and Hugging Face deployment configs
- Update timeout optimization tests to reflect new values

This should resolve the 504 Gateway Timeout errors on Hugging Face Spaces
by providing more time for CPU-bound inference in the shared environment.

Files changed (7) hide show

HUGGINGFACE_DEPLOYMENT.md +1 -1
app/core/config.py +1 -1
app/services/summarizer.py +2 -2
docker-compose.yml +1 -1
env.hf +1 -1
nginx.conf +3 -3
tests/test_timeout_optimization.py +21 -21

HUGGINGFACE_DEPLOYMENT.md CHANGED Viewed

@@ -73,7 +73,7 @@ In your Hugging Face Space settings:
 ```
 OLLAMA_MODEL=mistral:7b
 OLLAMA_HOST=http://localhost:11434
-OLLAMA_TIMEOUT=30
 SERVER_HOST=0.0.0.0
 SERVER_PORT=7860
 LOG_LEVEL=INFO

 ```
 OLLAMA_MODEL=mistral:7b
 OLLAMA_HOST=http://localhost:11434
+OLLAMA_TIMEOUT=60
 SERVER_HOST=0.0.0.0
 SERVER_PORT=7860
 LOG_LEVEL=INFO

app/core/config.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Settings(BaseSettings):
     # Ollama Configuration
     ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
     ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
-    ollama_timeout: int = Field(default=30, env="OLLAMA_TIMEOUT", ge=1)
     # Server Configuration
     server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")

     # Ollama Configuration
     ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
     ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
+    ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
     # Server Configuration
     server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")

app/services/summarizer.py CHANGED Viewed

@@ -55,9 +55,9 @@ class OllamaService:
         """
         start_time = time.time()
-        # Optimized timeout: base + 3s per extra 1000 chars (cap 60s)
         text_length = len(text)
-        dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 60)
         # Preprocess text to reduce input size for faster processing
         if text_length > 4000:

         """
         start_time = time.time()
+        # Optimized timeout: base + 3s per extra 1000 chars (cap 90s)
         text_length = len(text)
+        dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 90)
         # Preprocess text to reduce input size for faster processing
         if text_length > 4000:

docker-compose.yml CHANGED Viewed

@@ -28,7 +28,7 @@ services:
     environment:
       - OLLAMA_HOST=http://ollama:11434
       - OLLAMA_MODEL=llama3.1:8b
-      - OLLAMA_TIMEOUT=30
       - SERVER_HOST=0.0.0.0
       - SERVER_PORT=8000
       - LOG_LEVEL=INFO

     environment:
       - OLLAMA_HOST=http://ollama:11434
       - OLLAMA_MODEL=llama3.1:8b
+      - OLLAMA_TIMEOUT=60
       - SERVER_HOST=0.0.0.0
       - SERVER_PORT=8000
       - LOG_LEVEL=INFO

env.hf CHANGED Viewed

@@ -4,7 +4,7 @@
 # Ollama Configuration
 OLLAMA_MODEL=mistral:7b
 OLLAMA_HOST=http://localhost:11434
-OLLAMA_TIMEOUT=30
 # Server Configuration
 SERVER_HOST=0.0.0.0

 # Ollama Configuration
 OLLAMA_MODEL=mistral:7b
 OLLAMA_HOST=http://localhost:11434
+OLLAMA_TIMEOUT=60
 # Server Configuration
 SERVER_HOST=0.0.0.0

nginx.conf CHANGED Viewed

@@ -30,9 +30,9 @@ http {
             proxy_set_header X-Forwarded-Proto $scheme;
             # Timeouts
-            proxy_connect_timeout 30s;
-            proxy_send_timeout 30s;
-            proxy_read_timeout 30s;
         }
         # Health check endpoint (no rate limiting)

             proxy_set_header X-Forwarded-Proto $scheme;
             # Timeouts
+            proxy_connect_timeout 60s;
+            proxy_send_timeout 60s;
+            proxy_read_timeout 90s;
         }
         # Health check endpoint (no rate limiting)

tests/test_timeout_optimization.py CHANGED Viewed

@@ -33,7 +33,7 @@ class TestTimeoutOptimization:
         # Test the optimized formula directly
         base_timeout = 60  # Optimized base timeout
         scaling_factor = 5  # Optimized scaling factor
-        max_cap = 120  # Optimized maximum cap
         # Test cases: (text_length, expected_timeout)
         test_cases = [
@@ -42,8 +42,8 @@ class TestTimeoutOptimization:
             (1500, 60),     # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
             (2000, 65),     # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
             (5000, 80),     # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
-            (10000, 105),   # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105
-            (50000, 120),   # Very large: should be capped at 120
         ]
         for text_length, expected_timeout in test_cases:
@@ -72,17 +72,17 @@ class TestTimeoutOptimization:
         very_large_text_length = 100000  # 100,000 characters
         base_timeout = 60
         scaling_factor = 5
-        max_cap = 120  # Optimized cap
         # Calculate what the timeout would be without cap
         uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
-        # Should be much higher than 120 without cap
-        assert uncapped_timeout > 120, f"Uncapped timeout should be > 120s, got {uncapped_timeout}"
-        # With cap, should be exactly 120
         capped_timeout = min(uncapped_timeout, max_cap)
-        assert capped_timeout == 120, f"Capped timeout should be 120s, got {capped_timeout}"
     def test_timeout_optimization_prevents_excessive_waits(self):
         """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
@@ -97,15 +97,15 @@ class TestTimeoutOptimization:
             dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
             dynamic_timeout = min(dynamic_timeout, max_cap)
-            # No timeout should exceed 120 seconds
-            assert dynamic_timeout <= 120, \
-                f"Timeout for {text_length} chars should not exceed 120s, got {dynamic_timeout}"
             # No timeout should be excessively long (like 100+ seconds for typical text)
             if text_length <= 20000:  # Typical text sizes
-                # Allow up to 120 seconds for 20k chars (which is reasonable and capped)
-                assert dynamic_timeout <= 120, \
-                    f"Timeout for typical text size {text_length} should not exceed 120s, got {dynamic_timeout}"
     def test_timeout_optimization_performance_improvement(self):
         """Test that timeout optimization provides better performance characteristics."""
@@ -122,13 +122,13 @@ class TestTimeoutOptimization:
         # New calculation (after optimization)
         new_base = 60
         new_scaling = 5
-        new_cap = 120
         new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling)  # 60 + 9*5 = 105
-        new_timeout = min(new_timeout, new_cap)  # Capped at 120
         # New timeout should be significantly better
         assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
-        assert new_timeout == 105, f"New timeout should be 105s for 10k chars, got {new_timeout}"
         assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
     def test_timeout_optimization_edge_cases(self):
@@ -166,14 +166,14 @@ class TestTimeoutOptimization:
         dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
         dynamic_timeout = min(dynamic_timeout, max_cap)
-        # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 120
-        expected_timeout = 120  # Capped at 120
         assert dynamic_timeout == expected_timeout, \
             f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
         # Should not be 100+ seconds
-        assert dynamic_timeout <= 120, \
-            f"Optimized timeout should not exceed 120s, got {dynamic_timeout}"
         # Should be much better than the old calculation
         old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10)  # 120 + 19*10 = 310

         # Test the optimized formula directly
         base_timeout = 60  # Optimized base timeout
         scaling_factor = 5  # Optimized scaling factor
+        max_cap = 90  # Optimized maximum cap
         # Test cases: (text_length, expected_timeout)
         test_cases = [
             (1500, 60),     # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
             (2000, 65),     # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
             (5000, 80),     # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
+            (10000, 90),    # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105, capped at 90
+            (50000, 90),    # Very large: should be capped at 90
         ]
         for text_length, expected_timeout in test_cases:
         very_large_text_length = 100000  # 100,000 characters
         base_timeout = 60
         scaling_factor = 5
+        max_cap = 90  # Optimized cap
         # Calculate what the timeout would be without cap
         uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
+        # Should be much higher than 90 without cap
+        assert uncapped_timeout > 90, f"Uncapped timeout should be > 90s, got {uncapped_timeout}"
+        # With cap, should be exactly 90
         capped_timeout = min(uncapped_timeout, max_cap)
+        assert capped_timeout == 90, f"Capped timeout should be 90s, got {capped_timeout}"
     def test_timeout_optimization_prevents_excessive_waits(self):
         """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
             dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
             dynamic_timeout = min(dynamic_timeout, max_cap)
+            # No timeout should exceed 90 seconds
+            assert dynamic_timeout <= 90, \
+                f"Timeout for {text_length} chars should not exceed 90s, got {dynamic_timeout}"
             # No timeout should be excessively long (like 100+ seconds for typical text)
             if text_length <= 20000:  # Typical text sizes
+                # Allow up to 90 seconds for 20k chars (which is reasonable and capped)
+                assert dynamic_timeout <= 90, \
+                    f"Timeout for typical text size {text_length} should not exceed 90s, got {dynamic_timeout}"
     def test_timeout_optimization_performance_improvement(self):
         """Test that timeout optimization provides better performance characteristics."""
         # New calculation (after optimization)
         new_base = 60
         new_scaling = 5
+        new_cap = 90
         new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling)  # 60 + 9*5 = 105
+        new_timeout = min(new_timeout, new_cap)  # Capped at 90
         # New timeout should be significantly better
         assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
+        assert new_timeout == 90, f"New timeout should be 90s for 10k chars (capped), got {new_timeout}"
         assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
     def test_timeout_optimization_edge_cases(self):
         dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
         dynamic_timeout = min(dynamic_timeout, max_cap)
+        # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 90
+        expected_timeout = 90  # Capped at 90
         assert dynamic_timeout == expected_timeout, \
             f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
         # Should not be 100+ seconds
+        assert dynamic_timeout <= 90, \
+            f"Optimized timeout should not exceed 90s, got {dynamic_timeout}"
         # Should be much better than the old calculation
         old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10)  # 120 + 19*10 = 310