""" Tests for V2 API endpoints. """ import json import pytest from unittest.mock import AsyncMock, patch, MagicMock from fastapi.testclient import TestClient from app.main import app class TestV2SummarizeStream: """Test V2 streaming summarization endpoint.""" @pytest.mark.integration def test_v2_stream_endpoint_exists(self, client: TestClient): """Test that V2 stream endpoint exists and returns proper response.""" response = client.post( "/api/v2/summarize/stream", json={ "text": "This is a test text to summarize.", "max_tokens": 50 } ) # Should return 200 with SSE content type assert response.status_code == 200 assert response.headers["content-type"] == "text/event-stream; charset=utf-8" assert "Cache-Control" in response.headers assert "Connection" in response.headers @pytest.mark.integration def test_v2_stream_endpoint_validation_error(self, client: TestClient): """Test V2 stream endpoint with validation error.""" response = client.post( "/api/v2/summarize/stream", json={ "text": "", # Empty text should fail validation "max_tokens": 50 } ) assert response.status_code == 422 # Validation error @pytest.mark.integration def test_v2_stream_endpoint_sse_format(self, client: TestClient): """Test that V2 stream endpoint returns proper SSE format.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: # Mock the streaming response async def mock_generator(): yield {"content": "This is a", "done": False, "tokens_used": 1} yield {"content": " test summary.", "done": False, "tokens_used": 2} yield {"content": "", "done": True, "tokens_used": 2, "latency_ms": 100.0} mock_stream.return_value = mock_generator() response = client.post( "/api/v2/summarize/stream", json={ "text": "This is a test text to summarize.", "max_tokens": 50 } ) assert response.status_code == 200 # Check SSE format content = response.text lines = content.strip().split('\n') # Should have data lines data_lines = [line for line in lines if line.startswith('data: ')] assert len(data_lines) >= 3 # At least 3 chunks # Parse first data line first_data = json.loads(data_lines[0][6:]) # Remove 'data: ' prefix assert "content" in first_data assert "done" in first_data assert first_data["content"] == "This is a" assert first_data["done"] is False @pytest.mark.integration def test_v2_stream_endpoint_error_handling(self, client: TestClient): """Test V2 stream endpoint error handling.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: # Mock an error in the stream async def mock_error_generator(): yield {"content": "", "done": True, "error": "Model not available"} mock_stream.return_value = mock_error_generator() response = client.post( "/api/v2/summarize/stream", json={ "text": "This is a test text to summarize.", "max_tokens": 50 } ) assert response.status_code == 200 # Check error is properly formatted in SSE content = response.text lines = content.strip().split('\n') data_lines = [line for line in lines if line.startswith('data: ')] # Parse error data line error_data = json.loads(data_lines[0][6:]) # Remove 'data: ' prefix assert "error" in error_data assert error_data["done"] is True assert "Model not available" in error_data["error"] @pytest.mark.integration def test_v2_stream_endpoint_uses_v1_schema(self, client: TestClient): """Test that V2 endpoint uses the same schema as V1 for compatibility.""" # Test with V1-style request response = client.post( "/api/v2/summarize/stream", json={ "text": "This is a test text to summarize.", "max_tokens": 50, "prompt": "Summarize this text:" } ) # Should accept V1 schema format assert response.status_code == 200 @pytest.mark.integration def test_v2_stream_endpoint_parameter_mapping(self, client: TestClient): """Test that V2 correctly maps V1 parameters to V2 service.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() response = client.post( "/api/v2/summarize/stream", json={ "text": "Test text", "max_tokens": 100, # Should map to max_new_tokens "prompt": "Custom prompt" } ) assert response.status_code == 200 # Verify service was called with correct parameters mock_stream.assert_called_once() call_args = mock_stream.call_args # Check that max_tokens was mapped to max_new_tokens assert call_args[1]['max_new_tokens'] == 100 assert call_args[1]['prompt'] == "Custom prompt" assert call_args[1]['text'] == "Test text" @pytest.mark.integration def test_v2_adaptive_token_logic_short_text(self, client: TestClient): """Test adaptive token logic for short texts (<1500 chars).""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() # Short text (500 chars) short_text = "This is a short text. " * 20 # ~500 chars response = client.post( "/api/v2/summarize/stream", json={ "text": short_text, # Don't specify max_tokens to test adaptive logic } ) assert response.status_code == 200 # Verify service was called with adaptive max_new_tokens mock_stream.assert_called_once() call_args = mock_stream.call_args # For short text, should use 60-100 tokens max_new_tokens = call_args[1]['max_new_tokens'] assert 60 <= max_new_tokens <= 100 @pytest.mark.integration def test_v2_adaptive_token_logic_long_text(self, client: TestClient): """Test adaptive token logic for long texts (>1500 chars).""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() # Long text (2000 chars) long_text = "This is a longer text that should trigger adaptive token logic. " * 40 # ~2000 chars response = client.post( "/api/v2/summarize/stream", json={ "text": long_text, # Don't specify max_tokens to test adaptive logic } ) assert response.status_code == 200 # Verify service was called with adaptive max_new_tokens mock_stream.assert_called_once() call_args = mock_stream.call_args # For long text, should use proportional scaling but capped max_new_tokens = call_args[1]['max_new_tokens'] assert 100 <= max_new_tokens <= 400 @pytest.mark.integration def test_v2_temperature_and_top_p_parameters(self, client: TestClient): """Test that temperature and top_p parameters are passed correctly.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() response = client.post( "/api/v2/summarize/stream", json={ "text": "Test text", "temperature": 0.5, "top_p": 0.8 } ) assert response.status_code == 200 # Verify service was called with correct parameters mock_stream.assert_called_once() call_args = mock_stream.call_args assert call_args[1]['temperature'] == 0.5 assert call_args[1]['top_p'] == 0.8 @pytest.mark.integration def test_v2_default_temperature_and_top_p(self, client: TestClient): """Test that default temperature and top_p values are used when not specified.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() response = client.post( "/api/v2/summarize/stream", json={ "text": "Test text" # Don't specify temperature or top_p } ) assert response.status_code == 200 # Verify service was called with default parameters mock_stream.assert_called_once() call_args = mock_stream.call_args assert call_args[1]['temperature'] == 0.3 # Default temperature assert call_args[1]['top_p'] == 0.9 # Default top_p @pytest.mark.integration def test_v2_recursive_summarization_trigger(self, client: TestClient): """Test that recursive summarization is triggered for long texts.""" with patch('app.services.hf_streaming_summarizer.hf_streaming_service.summarize_text_stream') as mock_stream: async def mock_generator(): yield {"content": "", "done": True} mock_stream.return_value = mock_generator() # Very long text (>1500 chars) to trigger recursive summarization very_long_text = "This is a very long text that should definitely trigger recursive summarization logic. " * 30 # ~2000+ chars response = client.post( "/api/v2/summarize/stream", json={ "text": very_long_text } ) assert response.status_code == 200 # The service should be called, and internally it should detect long text # and use recursive summarization mock_stream.assert_called_once() class TestV2APICompatibility: """Test V2 API compatibility with V1.""" @pytest.mark.integration def test_v2_uses_same_schemas_as_v1(self): """Test that V2 imports and uses the same schemas as V1.""" from app.api.v2.schemas import SummarizeRequest, SummarizeResponse from app.api.v1.schemas import SummarizeRequest as V1SummarizeRequest, SummarizeResponse as V1SummarizeResponse # Should be the same classes assert SummarizeRequest is V1SummarizeRequest assert SummarizeResponse is V1SummarizeResponse @pytest.mark.integration def test_v2_endpoint_structure_matches_v1(self, client: TestClient): """Test that V2 endpoint structure matches V1.""" # V1 endpoints v1_response = client.post( "/api/v1/summarize/stream", json={"text": "Test", "max_tokens": 50} ) # V2 endpoints should have same structure v2_response = client.post( "/api/v2/summarize/stream", json={"text": "Test", "max_tokens": 50} ) # Both should return 200 (even if V2 fails due to missing dependencies) # The important thing is the endpoint structure is the same assert v1_response.status_code in [200, 502] # 502 if Ollama not running assert v2_response.status_code in [200, 502] # 502 if HF not available # Both should have same headers assert v1_response.headers.get("content-type") == v2_response.headers.get("content-type")