Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	
		ming
		
	commited on
		
		
					Commit 
							
							·
						
						d3f36f7
	
1
								Parent(s):
							
							f2cff39
								
Fix 504 Gateway Timeout: Increase timeout values
Browse files- Increase Ollama timeout from 30s to 60s in config
- Update Nginx timeouts: connect/send 60s, read 90s
- Update dynamic timeout cap from 60s to 90s in summarizer
- Update Docker Compose and Hugging Face deployment configs
- Update timeout optimization tests to reflect new values
This should resolve the 504 Gateway Timeout errors on Hugging Face Spaces
by providing more time for CPU-bound inference in the shared environment.
- HUGGINGFACE_DEPLOYMENT.md +1 -1
- app/core/config.py +1 -1
- app/services/summarizer.py +2 -2
- docker-compose.yml +1 -1
- env.hf +1 -1
- nginx.conf +3 -3
- tests/test_timeout_optimization.py +21 -21
    	
        HUGGINGFACE_DEPLOYMENT.md
    CHANGED
    
    | @@ -73,7 +73,7 @@ In your Hugging Face Space settings: | |
| 73 | 
             
            ```
         | 
| 74 | 
             
            OLLAMA_MODEL=mistral:7b
         | 
| 75 | 
             
            OLLAMA_HOST=http://localhost:11434
         | 
| 76 | 
            -
            OLLAMA_TIMEOUT= | 
| 77 | 
             
            SERVER_HOST=0.0.0.0
         | 
| 78 | 
             
            SERVER_PORT=7860
         | 
| 79 | 
             
            LOG_LEVEL=INFO
         | 
|  | |
| 73 | 
             
            ```
         | 
| 74 | 
             
            OLLAMA_MODEL=mistral:7b
         | 
| 75 | 
             
            OLLAMA_HOST=http://localhost:11434
         | 
| 76 | 
            +
            OLLAMA_TIMEOUT=60
         | 
| 77 | 
             
            SERVER_HOST=0.0.0.0
         | 
| 78 | 
             
            SERVER_PORT=7860
         | 
| 79 | 
             
            LOG_LEVEL=INFO
         | 
    	
        app/core/config.py
    CHANGED
    
    | @@ -13,7 +13,7 @@ class Settings(BaseSettings): | |
| 13 | 
             
                # Ollama Configuration
         | 
| 14 | 
             
                ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
         | 
| 15 | 
             
                ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
         | 
| 16 | 
            -
                ollama_timeout: int = Field(default= | 
| 17 |  | 
| 18 | 
             
                # Server Configuration
         | 
| 19 | 
             
                server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
         | 
|  | |
| 13 | 
             
                # Ollama Configuration
         | 
| 14 | 
             
                ollama_model: str = Field(default="llama3.2:1b", env="OLLAMA_MODEL")
         | 
| 15 | 
             
                ollama_host: str = Field(default="http://0.0.0.0:11434", env="OLLAMA_HOST")
         | 
| 16 | 
            +
                ollama_timeout: int = Field(default=60, env="OLLAMA_TIMEOUT", ge=1)
         | 
| 17 |  | 
| 18 | 
             
                # Server Configuration
         | 
| 19 | 
             
                server_host: str = Field(default="127.0.0.1", env="SERVER_HOST")
         | 
    	
        app/services/summarizer.py
    CHANGED
    
    | @@ -55,9 +55,9 @@ class OllamaService: | |
| 55 | 
             
                    """
         | 
| 56 | 
             
                    start_time = time.time()
         | 
| 57 |  | 
| 58 | 
            -
                    # Optimized timeout: base + 3s per extra 1000 chars (cap  | 
| 59 | 
             
                    text_length = len(text)
         | 
| 60 | 
            -
                    dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3),  | 
| 61 |  | 
| 62 | 
             
                    # Preprocess text to reduce input size for faster processing
         | 
| 63 | 
             
                    if text_length > 4000:
         | 
|  | |
| 55 | 
             
                    """
         | 
| 56 | 
             
                    start_time = time.time()
         | 
| 57 |  | 
| 58 | 
            +
                    # Optimized timeout: base + 3s per extra 1000 chars (cap 90s)
         | 
| 59 | 
             
                    text_length = len(text)
         | 
| 60 | 
            +
                    dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 90)
         | 
| 61 |  | 
| 62 | 
             
                    # Preprocess text to reduce input size for faster processing
         | 
| 63 | 
             
                    if text_length > 4000:
         | 
    	
        docker-compose.yml
    CHANGED
    
    | @@ -28,7 +28,7 @@ services: | |
| 28 | 
             
                environment:
         | 
| 29 | 
             
                  - OLLAMA_HOST=http://ollama:11434
         | 
| 30 | 
             
                  - OLLAMA_MODEL=llama3.1:8b
         | 
| 31 | 
            -
                  - OLLAMA_TIMEOUT= | 
| 32 | 
             
                  - SERVER_HOST=0.0.0.0
         | 
| 33 | 
             
                  - SERVER_PORT=8000
         | 
| 34 | 
             
                  - LOG_LEVEL=INFO
         | 
|  | |
| 28 | 
             
                environment:
         | 
| 29 | 
             
                  - OLLAMA_HOST=http://ollama:11434
         | 
| 30 | 
             
                  - OLLAMA_MODEL=llama3.1:8b
         | 
| 31 | 
            +
                  - OLLAMA_TIMEOUT=60
         | 
| 32 | 
             
                  - SERVER_HOST=0.0.0.0
         | 
| 33 | 
             
                  - SERVER_PORT=8000
         | 
| 34 | 
             
                  - LOG_LEVEL=INFO
         | 
    	
        env.hf
    CHANGED
    
    | @@ -4,7 +4,7 @@ | |
| 4 | 
             
            # Ollama Configuration
         | 
| 5 | 
             
            OLLAMA_MODEL=mistral:7b
         | 
| 6 | 
             
            OLLAMA_HOST=http://localhost:11434
         | 
| 7 | 
            -
            OLLAMA_TIMEOUT= | 
| 8 |  | 
| 9 | 
             
            # Server Configuration
         | 
| 10 | 
             
            SERVER_HOST=0.0.0.0
         | 
|  | |
| 4 | 
             
            # Ollama Configuration
         | 
| 5 | 
             
            OLLAMA_MODEL=mistral:7b
         | 
| 6 | 
             
            OLLAMA_HOST=http://localhost:11434
         | 
| 7 | 
            +
            OLLAMA_TIMEOUT=60
         | 
| 8 |  | 
| 9 | 
             
            # Server Configuration
         | 
| 10 | 
             
            SERVER_HOST=0.0.0.0
         | 
    	
        nginx.conf
    CHANGED
    
    | @@ -30,9 +30,9 @@ http { | |
| 30 | 
             
                        proxy_set_header X-Forwarded-Proto $scheme;
         | 
| 31 |  | 
| 32 | 
             
                        # Timeouts
         | 
| 33 | 
            -
                        proxy_connect_timeout  | 
| 34 | 
            -
                        proxy_send_timeout  | 
| 35 | 
            -
                        proxy_read_timeout  | 
| 36 | 
             
                    }
         | 
| 37 |  | 
| 38 | 
             
                    # Health check endpoint (no rate limiting)
         | 
|  | |
| 30 | 
             
                        proxy_set_header X-Forwarded-Proto $scheme;
         | 
| 31 |  | 
| 32 | 
             
                        # Timeouts
         | 
| 33 | 
            +
                        proxy_connect_timeout 60s;
         | 
| 34 | 
            +
                        proxy_send_timeout 60s;
         | 
| 35 | 
            +
                        proxy_read_timeout 90s;
         | 
| 36 | 
             
                    }
         | 
| 37 |  | 
| 38 | 
             
                    # Health check endpoint (no rate limiting)
         | 
    	
        tests/test_timeout_optimization.py
    CHANGED
    
    | @@ -33,7 +33,7 @@ class TestTimeoutOptimization: | |
| 33 | 
             
                    # Test the optimized formula directly
         | 
| 34 | 
             
                    base_timeout = 60  # Optimized base timeout
         | 
| 35 | 
             
                    scaling_factor = 5  # Optimized scaling factor
         | 
| 36 | 
            -
                    max_cap =  | 
| 37 |  | 
| 38 | 
             
                    # Test cases: (text_length, expected_timeout)
         | 
| 39 | 
             
                    test_cases = [
         | 
| @@ -42,8 +42,8 @@ class TestTimeoutOptimization: | |
| 42 | 
             
                        (1500, 60),     # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
         | 
| 43 | 
             
                        (2000, 65),     # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
         | 
| 44 | 
             
                        (5000, 80),     # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
         | 
| 45 | 
            -
                        (10000,  | 
| 46 | 
            -
                        (50000,  | 
| 47 | 
             
                    ]
         | 
| 48 |  | 
| 49 | 
             
                    for text_length, expected_timeout in test_cases:
         | 
| @@ -72,17 +72,17 @@ class TestTimeoutOptimization: | |
| 72 | 
             
                    very_large_text_length = 100000  # 100,000 characters
         | 
| 73 | 
             
                    base_timeout = 60
         | 
| 74 | 
             
                    scaling_factor = 5
         | 
| 75 | 
            -
                    max_cap =  | 
| 76 |  | 
| 77 | 
             
                    # Calculate what the timeout would be without cap
         | 
| 78 | 
             
                    uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
         | 
| 79 |  | 
| 80 | 
            -
                    # Should be much higher than  | 
| 81 | 
            -
                    assert uncapped_timeout >  | 
| 82 |  | 
| 83 | 
            -
                    # With cap, should be exactly  | 
| 84 | 
             
                    capped_timeout = min(uncapped_timeout, max_cap)
         | 
| 85 | 
            -
                    assert capped_timeout ==  | 
| 86 |  | 
| 87 | 
             
                def test_timeout_optimization_prevents_excessive_waits(self):
         | 
| 88 | 
             
                    """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
         | 
| @@ -97,15 +97,15 @@ class TestTimeoutOptimization: | |
| 97 | 
             
                        dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
         | 
| 98 | 
             
                        dynamic_timeout = min(dynamic_timeout, max_cap)
         | 
| 99 |  | 
| 100 | 
            -
                        # No timeout should exceed  | 
| 101 | 
            -
                        assert dynamic_timeout <=  | 
| 102 | 
            -
                            f"Timeout for {text_length} chars should not exceed  | 
| 103 |  | 
| 104 | 
             
                        # No timeout should be excessively long (like 100+ seconds for typical text)
         | 
| 105 | 
             
                        if text_length <= 20000:  # Typical text sizes
         | 
| 106 | 
            -
                            # Allow up to  | 
| 107 | 
            -
                            assert dynamic_timeout <=  | 
| 108 | 
            -
                                f"Timeout for typical text size {text_length} should not exceed  | 
| 109 |  | 
| 110 | 
             
                def test_timeout_optimization_performance_improvement(self):
         | 
| 111 | 
             
                    """Test that timeout optimization provides better performance characteristics."""
         | 
| @@ -122,13 +122,13 @@ class TestTimeoutOptimization: | |
| 122 | 
             
                    # New calculation (after optimization)
         | 
| 123 | 
             
                    new_base = 60
         | 
| 124 | 
             
                    new_scaling = 5
         | 
| 125 | 
            -
                    new_cap =  | 
| 126 | 
             
                    new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling)  # 60 + 9*5 = 105
         | 
| 127 | 
            -
                    new_timeout = min(new_timeout, new_cap)  # Capped at  | 
| 128 |  | 
| 129 | 
             
                    # New timeout should be significantly better
         | 
| 130 | 
             
                    assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
         | 
| 131 | 
            -
                    assert new_timeout ==  | 
| 132 | 
             
                    assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
         | 
| 133 |  | 
| 134 | 
             
                def test_timeout_optimization_edge_cases(self):
         | 
| @@ -166,14 +166,14 @@ class TestTimeoutOptimization: | |
| 166 | 
             
                    dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
         | 
| 167 | 
             
                    dynamic_timeout = min(dynamic_timeout, max_cap)
         | 
| 168 |  | 
| 169 | 
            -
                    # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at  | 
| 170 | 
            -
                    expected_timeout =  | 
| 171 | 
             
                    assert dynamic_timeout == expected_timeout, \
         | 
| 172 | 
             
                        f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
         | 
| 173 |  | 
| 174 | 
             
                    # Should not be 100+ seconds
         | 
| 175 | 
            -
                    assert dynamic_timeout <=  | 
| 176 | 
            -
                        f"Optimized timeout should not exceed  | 
| 177 |  | 
| 178 | 
             
                    # Should be much better than the old calculation
         | 
| 179 | 
             
                    old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10)  # 120 + 19*10 = 310
         | 
|  | |
| 33 | 
             
                    # Test the optimized formula directly
         | 
| 34 | 
             
                    base_timeout = 60  # Optimized base timeout
         | 
| 35 | 
             
                    scaling_factor = 5  # Optimized scaling factor
         | 
| 36 | 
            +
                    max_cap = 90  # Optimized maximum cap
         | 
| 37 |  | 
| 38 | 
             
                    # Test cases: (text_length, expected_timeout)
         | 
| 39 | 
             
                    test_cases = [
         | 
|  | |
| 42 | 
             
                        (1500, 60),     # 1500 chars: 60 + (500//1000)*5 = 60 + 0*5 = 60
         | 
| 43 | 
             
                        (2000, 65),     # 2000 chars: 60 + (1000//1000)*5 = 60 + 1*5 = 65
         | 
| 44 | 
             
                        (5000, 80),     # 5000 chars: 60 + (4000//1000)*5 = 60 + 4*5 = 80
         | 
| 45 | 
            +
                        (10000, 90),    # 10000 chars: 60 + (9000//1000)*5 = 60 + 9*5 = 105, capped at 90
         | 
| 46 | 
            +
                        (50000, 90),    # Very large: should be capped at 90
         | 
| 47 | 
             
                    ]
         | 
| 48 |  | 
| 49 | 
             
                    for text_length, expected_timeout in test_cases:
         | 
|  | |
| 72 | 
             
                    very_large_text_length = 100000  # 100,000 characters
         | 
| 73 | 
             
                    base_timeout = 60
         | 
| 74 | 
             
                    scaling_factor = 5
         | 
| 75 | 
            +
                    max_cap = 90  # Optimized cap
         | 
| 76 |  | 
| 77 | 
             
                    # Calculate what the timeout would be without cap
         | 
| 78 | 
             
                    uncapped_timeout = base_timeout + max(0, (very_large_text_length - 1000) // 1000 * scaling_factor)
         | 
| 79 |  | 
| 80 | 
            +
                    # Should be much higher than 90 without cap
         | 
| 81 | 
            +
                    assert uncapped_timeout > 90, f"Uncapped timeout should be > 90s, got {uncapped_timeout}"
         | 
| 82 |  | 
| 83 | 
            +
                    # With cap, should be exactly 90
         | 
| 84 | 
             
                    capped_timeout = min(uncapped_timeout, max_cap)
         | 
| 85 | 
            +
                    assert capped_timeout == 90, f"Capped timeout should be 90s, got {capped_timeout}"
         | 
| 86 |  | 
| 87 | 
             
                def test_timeout_optimization_prevents_excessive_waits(self):
         | 
| 88 | 
             
                    """Test that optimized timeouts prevent excessive waits like 100+ seconds."""
         | 
|  | |
| 97 | 
             
                        dynamic_timeout = base_timeout + max(0, (text_length - 1000) // 1000 * scaling_factor)
         | 
| 98 | 
             
                        dynamic_timeout = min(dynamic_timeout, max_cap)
         | 
| 99 |  | 
| 100 | 
            +
                        # No timeout should exceed 90 seconds
         | 
| 101 | 
            +
                        assert dynamic_timeout <= 90, \
         | 
| 102 | 
            +
                            f"Timeout for {text_length} chars should not exceed 90s, got {dynamic_timeout}"
         | 
| 103 |  | 
| 104 | 
             
                        # No timeout should be excessively long (like 100+ seconds for typical text)
         | 
| 105 | 
             
                        if text_length <= 20000:  # Typical text sizes
         | 
| 106 | 
            +
                            # Allow up to 90 seconds for 20k chars (which is reasonable and capped)
         | 
| 107 | 
            +
                            assert dynamic_timeout <= 90, \
         | 
| 108 | 
            +
                                f"Timeout for typical text size {text_length} should not exceed 90s, got {dynamic_timeout}"
         | 
| 109 |  | 
| 110 | 
             
                def test_timeout_optimization_performance_improvement(self):
         | 
| 111 | 
             
                    """Test that timeout optimization provides better performance characteristics."""
         | 
|  | |
| 122 | 
             
                    # New calculation (after optimization)
         | 
| 123 | 
             
                    new_base = 60
         | 
| 124 | 
             
                    new_scaling = 5
         | 
| 125 | 
            +
                    new_cap = 90
         | 
| 126 | 
             
                    new_timeout = new_base + max(0, (text_length - 1000) // 1000 * new_scaling)  # 60 + 9*5 = 105
         | 
| 127 | 
            +
                    new_timeout = min(new_timeout, new_cap)  # Capped at 90
         | 
| 128 |  | 
| 129 | 
             
                    # New timeout should be significantly better
         | 
| 130 | 
             
                    assert new_timeout < old_timeout, f"New timeout {new_timeout}s should be less than old {old_timeout}s"
         | 
| 131 | 
            +
                    assert new_timeout == 90, f"New timeout should be 90s for 10k chars (capped), got {new_timeout}"
         | 
| 132 | 
             
                    assert old_timeout == 210, f"Old timeout should be 210s for 10k chars, got {old_timeout}"
         | 
| 133 |  | 
| 134 | 
             
                def test_timeout_optimization_edge_cases(self):
         | 
|  | |
| 166 | 
             
                    dynamic_timeout = base_timeout + max(0, (problematic_text_length - 1000) // 1000 * scaling_factor)
         | 
| 167 | 
             
                    dynamic_timeout = min(dynamic_timeout, max_cap)
         | 
| 168 |  | 
| 169 | 
            +
                    # Should be 60 + (19000//1000)*5 = 60 + 19*5 = 155, capped at 90
         | 
| 170 | 
            +
                    expected_timeout = 90  # Capped at 90
         | 
| 171 | 
             
                    assert dynamic_timeout == expected_timeout, \
         | 
| 172 | 
             
                        f"Problematic text length should have capped timeout {expected_timeout}s, got {dynamic_timeout}"
         | 
| 173 |  | 
| 174 | 
             
                    # Should not be 100+ seconds
         | 
| 175 | 
            +
                    assert dynamic_timeout <= 90, \
         | 
| 176 | 
            +
                        f"Optimized timeout should not exceed 90s, got {dynamic_timeout}"
         | 
| 177 |  | 
| 178 | 
             
                    # Should be much better than the old calculation
         | 
| 179 | 
             
                    old_timeout = 120 + max(0, (problematic_text_length - 1000) // 1000 * 10)  # 120 + 19*10 = 310
         |