Spaces:

colin730
/

SummarizerApp

Running

ming commited on Oct 9

Commit

6e01ea3

1 Parent(s): 8aac05c

feat: Add text streaming support with SSE

- Add summarize_text_stream() async generator method to OllamaService
- Implement /api/v1/summarize/stream endpoint with Server-Sent Events
- Add StreamChunk schema for streaming response documentation
- Comprehensive test coverage with TDD approach (11 new tests)
- Android-friendly SSE format for real-time text streaming
- Maintains backward compatibility with existing non-streaming endpoint
- Proper error handling with SSE error events
- Manual verification with curl confirms working implementation

Closes: Streaming text summarization feature request

Files changed (5) hide show

app/api/v1/schemas.py +8 -0
app/api/v1/summarize.py +59 -0
app/services/summarizer.py +97 -1
tests/test_api.py +202 -2
tests/test_services.py +255 -0

app/api/v1/schemas.py CHANGED Viewed

@@ -42,6 +42,14 @@ class HealthResponse(BaseModel):
     ollama: Optional[str] = Field(None, description="Ollama service status")
 class ErrorResponse(BaseModel):
     """Error response schema."""

     ollama: Optional[str] = Field(None, description="Ollama service status")
+class StreamChunk(BaseModel):
+    """Schema for streaming response chunks."""
+    content: str = Field(..., description="Content chunk from the stream")
+    done: bool = Field(..., description="Whether this is the final chunk")
+    tokens_used: Optional[int] = Field(None, description="Number of tokens used so far")
 class ErrorResponse(BaseModel):
     """Error response schema."""

app/api/v1/summarize.py CHANGED Viewed

@@ -1,7 +1,9 @@
 """
 Summarization endpoints.
 """
 from fastapi import APIRouter, HTTPException
 import httpx
 from app.api.v1.schemas import SummarizeRequest, SummarizeResponse
 from app.services.summarizer import ollama_service
@@ -33,3 +35,60 @@ async def summarize(payload: SummarizeRequest) -> SummarizeResponse:
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

 """
 Summarization endpoints.
 """
+import json
 from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
 import httpx
 from app.api.v1.schemas import SummarizeRequest, SummarizeResponse
 from app.services.summarizer import ollama_service
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
+async def _stream_generator(payload: SummarizeRequest):
+    """Generator function for streaming SSE responses."""
+    try:
+        async for chunk in ollama_service.summarize_text_stream(
+            text=payload.text,
+            max_tokens=payload.max_tokens or 256,
+            prompt=payload.prompt or "Summarize the following text concisely:",
+        ):
+            # Format as SSE event
+            sse_data = json.dumps(chunk)
+            yield f"data: {sse_data}\n\n"
+    except httpx.TimeoutException as e:
+        # Send error event in SSE format
+        error_chunk = {
+            "content": "",
+            "done": True,
+            "error": "Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens."
+        }
+        sse_data = json.dumps(error_chunk)
+        yield f"data: {sse_data}\n\n"
+        return  # Don't raise exception in streaming context
+    except httpx.HTTPError as e:
+        # Send error event in SSE format
+        error_chunk = {
+            "content": "",
+            "done": True,
+            "error": f"Summarization failed: {str(e)}"
+        }
+        sse_data = json.dumps(error_chunk)
+        yield f"data: {sse_data}\n\n"
+        return  # Don't raise exception in streaming context
+    except Exception as e:
+        # Send error event in SSE format
+        error_chunk = {
+            "content": "",
+            "done": True,
+            "error": f"Internal server error: {str(e)}"
+        }
+        sse_data = json.dumps(error_chunk)
+        yield f"data: {sse_data}\n\n"
+        return  # Don't raise exception in streaming context
+@router.post("/stream")
+async def summarize_stream(payload: SummarizeRequest):
+    """Stream text summarization using Server-Sent Events (SSE)."""
+    return StreamingResponse(
+        _stream_generator(payload),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+        }
+    )

app/services/summarizer.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """
 Ollama service integration for text summarization.
 """
 import time
-from typing import Dict, Any
 from urllib.parse import urljoin
 import httpx
@@ -123,6 +124,101 @@ class OllamaService:
             # Present a consistent error type to callers
             raise httpx.HTTPError(f"Ollama API error: {e}") from e
     async def check_health(self) -> bool:
         """
         Verify Ollama is reachable and (optionally) that the model exists.

 """
 Ollama service integration for text summarization.
 """
+import json
 import time
+from typing import Dict, Any, AsyncGenerator
 from urllib.parse import urljoin
 import httpx
             # Present a consistent error type to callers
             raise httpx.HTTPError(f"Ollama API error: {e}") from e
+    async def summarize_text_stream(
+        self,
+        text: str,
+        max_tokens: int = 100,
+        prompt: str = "Summarize concisely:",
+    ) -> AsyncGenerator[Dict[str, Any], None]:
+        """
+        Stream text summarization using Ollama.
+        Yields chunks as they arrive from Ollama.
+        Raises httpx.HTTPError (and subclasses) on failure.
+        """
+        start_time = time.time()
+        # Optimized timeout: base + 3s per extra 1000 chars (cap 90s)
+        text_length = len(text)
+        dynamic_timeout = min(self.timeout + max(0, (text_length - 1000) // 1000 * 3), 90)
+        # Preprocess text to reduce input size for faster processing
+        if text_length > 4000:
+            # Truncate very long texts and add note
+            text = text[:4000] + "\n\n[Text truncated for faster processing]"
+            text_length = len(text)
+            logger.info(f"Text truncated from {len(text)} to {text_length} chars for faster processing")
+        logger.info(f"Processing text of {text_length} chars with timeout {dynamic_timeout}s")
+        full_prompt = f"{prompt}\n\n{text}"
+        payload = {
+            "model": self.model,
+            "prompt": full_prompt,
+            "stream": True,  # Enable streaming
+            "options": {
+                "num_predict": max_tokens,
+                "temperature": 0.1,  # Lower temperature for faster, more focused output
+                "top_p": 0.9,        # Nucleus sampling for efficiency
+                "top_k": 40,         # Limit vocabulary for speed
+                "repeat_penalty": 1.1,  # Prevent repetition
+                "num_ctx": 2048,     # Limit context window for speed
+            },
+        }
+        generate_url = urljoin(self.base_url, "api/generate")
+        logger.info(f"POST {generate_url} (streaming)")
+        try:
+            async with httpx.AsyncClient(timeout=dynamic_timeout) as client:
+                async with client.stream("POST", generate_url, json=payload) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        line = line.strip()
+                        if not line:
+                            continue
+                        try:
+                            data = json.loads(line)
+                            chunk = {
+                                "content": data.get("response", ""),
+                                "done": data.get("done", False),
+                                "tokens_used": data.get("eval_count", 0),
+                            }
+                            yield chunk
+                            # Break if this is the final chunk
+                            if data.get("done", False):
+                                break
+                        except json.JSONDecodeError:
+                            # Skip malformed JSON lines
+                            logger.warning(f"Skipping malformed JSON line: {line[:100]}")
+                            continue
+        except httpx.TimeoutException:
+            logger.error(
+                f"Timeout calling Ollama after {dynamic_timeout}s "
+                f"(chars={text_length}, url={generate_url})"
+            )
+            raise
+        except httpx.RequestError as e:
+            # Network / connection errors (DNS, refused, TLS, etc.)
+            logger.error(f"Request error calling Ollama at {generate_url}: {e}")
+            raise
+        except httpx.HTTPStatusError as e:
+            # Non-2xx responses
+            body = e.response.text if e.response is not None else ""
+            logger.error(
+                f"HTTP {e.response.status_code if e.response else '??'} from Ollama at {generate_url}: {body[:400]}"
+            )
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error calling Ollama at {generate_url}: {e}")
+            # Present a consistent error type to callers
+            raise httpx.HTTPError(f"Ollama API error: {e}") from e
     async def check_health(self) -> bool:
         """
         Verify Ollama is reachable and (optionally) that the model exists.

tests/test_api.py CHANGED Viewed

@@ -1,8 +1,9 @@
 """
 Integration tests for API endpoints.
 """
 import pytest
-from unittest.mock import patch
 from starlette.testclient import TestClient
 from app.main import app
@@ -96,4 +97,203 @@ def test_summarize_endpoint_large_text_handling():
         mock_client.assert_called_once()
         call_args = mock_client.call_args
         expected_timeout = 60 + (5000 - 1000) // 1000 * 5  # 80 seconds
-        assert call_args[1]['timeout'] == expected_timeout

 """
 Integration tests for API endpoints.
 """
+import json
 import pytest
+from unittest.mock import patch, MagicMock
 from starlette.testclient import TestClient
 from app.main import app
         mock_client.assert_called_once()
         call_args = mock_client.call_args
         expected_timeout = 60 + (5000 - 1000) // 1000 * 5  # 80 seconds
+        assert call_args[1]['timeout'] == expected_timeout
+# Tests for Streaming Endpoint
+@pytest.mark.integration
+def test_summarize_stream_endpoint_success(sample_text):
+    """Test successful streaming summarization via API endpoint."""
+    # Mock streaming response data
+    mock_stream_data = [
+        '{"response": "This", "done": false, "eval_count": 1}\n',
+        '{"response": " is", "done": false, "eval_count": 2}\n',
+        '{"response": " a", "done": false, "eval_count": 3}\n',
+        '{"response": " test", "done": true, "eval_count": 4}\n'
+    ]
+    class MockStreamResponse:
+        def __init__(self, data):
+            self.data = data
+        async def aiter_lines(self):
+            for line in self.data:
+                yield line
+        def raise_for_status(self):
+            pass
+    class MockStreamContextManager:
+        def __init__(self, response):
+            self.response = response
+        async def __aenter__(self):
+            return self.response
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+    class MockStreamClient:
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+        def stream(self, method, url, **kwargs):
+            return MockStreamContextManager(MockStreamResponse(mock_stream_data))
+    with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+        resp = client.post(
+            "/api/v1/summarize/stream",
+            json={"text": sample_text, "max_tokens": 128}
+        )
+        assert resp.status_code == 200
+        assert resp.headers["content-type"] == "text/event-stream; charset=utf-8"
+        # Parse SSE response
+        lines = resp.text.strip().split('\n')
+        data_lines = [line for line in lines if line.startswith('data: ')]
+        assert len(data_lines) == 4
+        # Parse first chunk
+        first_chunk = json.loads(data_lines[0][6:])  # Remove 'data: ' prefix
+        assert first_chunk["content"] == "This"
+        assert first_chunk["done"] is False
+        assert first_chunk["tokens_used"] == 1
+        # Parse last chunk
+        last_chunk = json.loads(data_lines[-1][6:])  # Remove 'data: ' prefix
+        assert last_chunk["content"] == " test"
+        assert last_chunk["done"] is True
+        assert last_chunk["tokens_used"] == 4
+@pytest.mark.integration
+def test_summarize_stream_endpoint_validation_error():
+    """Test validation error for empty text in streaming endpoint."""
+    resp = client.post(
+        "/api/v1/summarize/stream",
+        json={"text": ""}
+    )
+    assert resp.status_code == 422
+@pytest.mark.integration
+def test_summarize_stream_endpoint_timeout_error():
+    """Test that timeout errors in streaming return proper error."""
+    import httpx
+    class MockStreamClient:
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+        def stream(self, method, url, **kwargs):
+            raise httpx.TimeoutException("Timeout")
+    with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+        resp = client.post(
+            "/api/v1/summarize/stream",
+            json={"text": "Test text that will timeout"}
+        )
+        assert resp.status_code == 200  # SSE returns 200 even with errors
+        assert resp.headers["content-type"] == "text/event-stream; charset=utf-8"
+        # Parse SSE response
+        lines = resp.text.strip().split('\n')
+        data_lines = [line for line in lines if line.startswith('data: ')]
+        assert len(data_lines) == 1
+        error_chunk = json.loads(data_lines[0][6:])  # Remove 'data: ' prefix
+        assert error_chunk["done"] is True
+        assert "timeout" in error_chunk["error"].lower()
+@pytest.mark.integration
+def test_summarize_stream_endpoint_http_error():
+    """Test that HTTP errors in streaming return proper error."""
+    import httpx
+    http_error = httpx.HTTPStatusError("Bad Request", request=MagicMock(), response=MagicMock())
+    class MockStreamClient:
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+        def stream(self, method, url, **kwargs):
+            raise http_error
+    with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+        resp = client.post(
+            "/api/v1/summarize/stream",
+            json={"text": "Test text"}
+        )
+        assert resp.status_code == 200  # SSE returns 200 even with errors
+        assert resp.headers["content-type"] == "text/event-stream; charset=utf-8"
+        # Parse SSE response
+        lines = resp.text.strip().split('\n')
+        data_lines = [line for line in lines if line.startswith('data: ')]
+        assert len(data_lines) == 1
+        error_chunk = json.loads(data_lines[0][6:])  # Remove 'data: ' prefix
+        assert error_chunk["done"] is True
+        assert "Summarization failed" in error_chunk["error"]
+@pytest.mark.integration
+def test_summarize_stream_endpoint_sse_format():
+    """Test that streaming endpoint returns proper SSE format."""
+    mock_stream_data = ['{"response": "Summary", "done": true, "eval_count": 1}\n']
+    class MockStreamResponse:
+        def __init__(self, data):
+            self.data = data
+        async def aiter_lines(self):
+            for line in self.data:
+                yield line
+        def raise_for_status(self):
+            pass
+    class MockStreamContextManager:
+        def __init__(self, response):
+            self.response = response
+        async def __aenter__(self):
+            return self.response
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+    class MockStreamClient:
+        async def __aenter__(self):
+            return self
+        async def __aexit__(self, exc_type, exc, tb):
+            return False
+        def stream(self, method, url, **kwargs):
+            return MockStreamContextManager(MockStreamResponse(mock_stream_data))
+    with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+        resp = client.post(
+            "/api/v1/summarize/stream",
+            json={"text": "Test text"}
+        )
+        assert resp.status_code == 200
+        assert resp.headers["content-type"] == "text/event-stream; charset=utf-8"
+        assert resp.headers["cache-control"] == "no-cache"
+        assert resp.headers["connection"] == "keep-alive"
+        # Check SSE format
+        lines = resp.text.strip().split('\n')
+        assert any(line.startswith('data: ') for line in lines)

tests/test_services.py CHANGED Viewed

@@ -224,3 +224,258 @@ class TestOllamaService:
             error_message = str(exc_info.value)
             assert f"timeout after {expected_timeout}s" in error_message
             assert "Text may be too long or complex" in error_message

             error_message = str(exc_info.value)
             assert f"timeout after {expected_timeout}s" in error_message
             assert "Text may be too long or complex" in error_message
+    # Tests for Streaming Functionality
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_success(self, ollama_service):
+        """Test successful text streaming."""
+        # Mock streaming response data
+        mock_stream_data = [
+            '{"response": "This", "done": false, "eval_count": 1}\n',
+            '{"response": " is", "done": false, "eval_count": 2}\n',
+            '{"response": " a", "done": false, "eval_count": 3}\n',
+            '{"response": " test", "done": true, "eval_count": 4}\n'
+        ]
+        class MockStreamResponse:
+            def __init__(self, data):
+                self.data = data
+                self._index = 0
+            async def aiter_lines(self):
+                for line in self.data:
+                    yield line
+            def raise_for_status(self):
+                # Mock successful response
+                pass
+        mock_response = MockStreamResponse(mock_stream_data)
+        class MockStreamContextManager:
+            def __init__(self, response):
+                self.response = response
+            async def __aenter__(self):
+                return self.response
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                # Return an async context manager
+                return MockStreamContextManager(mock_response)
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            chunks = []
+            async for chunk in ollama_service.summarize_text_stream("Test text"):
+                chunks.append(chunk)
+            assert len(chunks) == 4
+            assert chunks[0]["content"] == "This"
+            assert chunks[0]["done"] is False
+            assert chunks[0]["tokens_used"] == 1
+            assert chunks[-1]["content"] == " test"
+            assert chunks[-1]["done"] is True
+            assert chunks[-1]["tokens_used"] == 4
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_with_custom_params(self, ollama_service):
+        """Test streaming with custom parameters."""
+        mock_stream_data = ['{"response": "Summary", "done": true, "eval_count": 1}\n']
+        class MockStreamResponse:
+            def __init__(self, data):
+                self.data = data
+            async def aiter_lines(self):
+                for line in self.data:
+                    yield line
+            def raise_for_status(self):
+                # Mock successful response
+                pass
+        mock_response = MockStreamResponse(mock_stream_data)
+        captured_payload = {}
+        class MockStreamContextManager:
+            def __init__(self, response):
+                self.response = response
+            async def __aenter__(self):
+                return self.response
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                captured_payload.update(kwargs.get('json', {}))
+                return MockStreamContextManager(mock_response)
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            chunks = []
+            async for chunk in ollama_service.summarize_text_stream(
+                "Test text",
+                max_tokens=512,
+                prompt="Custom prompt"
+            ):
+                chunks.append(chunk)
+            # Verify captured payload
+            assert captured_payload["stream"] is True
+            assert captured_payload["options"]["num_predict"] == 512
+            assert "Custom prompt" in captured_payload["prompt"]
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_timeout(self, ollama_service):
+        """Test streaming timeout handling."""
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                raise httpx.TimeoutException("Timeout")
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            with pytest.raises(httpx.TimeoutException):
+                chunks = []
+                async for chunk in ollama_service.summarize_text_stream("Test text"):
+                    chunks.append(chunk)
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_http_error(self, ollama_service):
+        """Test streaming HTTP error handling."""
+        http_error = httpx.HTTPStatusError("Bad Request", request=MagicMock(), response=MagicMock())
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                raise http_error
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            with pytest.raises(httpx.HTTPStatusError):
+                chunks = []
+                async for chunk in ollama_service.summarize_text_stream("Test text"):
+                    chunks.append(chunk)
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_empty_response(self, ollama_service):
+        """Test streaming with empty response."""
+        mock_stream_data = []
+        class MockStreamResponse:
+            def __init__(self, data):
+                self.data = data
+            async def aiter_lines(self):
+                for line in self.data:
+                    yield line
+            def raise_for_status(self):
+                # Mock successful response
+                pass
+        mock_response = MockStreamResponse(mock_stream_data)
+        class MockStreamContextManager:
+            def __init__(self, response):
+                self.response = response
+            async def __aenter__(self):
+                return self.response
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                return MockStreamContextManager(mock_response)
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            chunks = []
+            async for chunk in ollama_service.summarize_text_stream("Test text"):
+                chunks.append(chunk)
+            assert len(chunks) == 0
+    @pytest.mark.asyncio
+    async def test_summarize_text_stream_malformed_json(self, ollama_service):
+        """Test streaming with malformed JSON response."""
+        mock_stream_data = [
+            '{"response": "Valid", "done": false, "eval_count": 1}\n',
+            'invalid json line\n',
+            '{"response": "End", "done": true, "eval_count": 2}\n'
+        ]
+        class MockStreamResponse:
+            def __init__(self, data):
+                self.data = data
+            async def aiter_lines(self):
+                for line in self.data:
+                    yield line
+            def raise_for_status(self):
+                # Mock successful response
+                pass
+        mock_response = MockStreamResponse(mock_stream_data)
+        class MockStreamContextManager:
+            def __init__(self, response):
+                self.response = response
+            async def __aenter__(self):
+                return self.response
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+        class MockStreamClient:
+            async def __aenter__(self):
+                return self
+            async def __aexit__(self, exc_type, exc, tb):
+                return False
+            def stream(self, method, url, **kwargs):
+                return MockStreamContextManager(mock_response)
+        with patch('httpx.AsyncClient', return_value=MockStreamClient()):
+            chunks = []
+            async for chunk in ollama_service.summarize_text_stream("Test text"):
+                chunks.append(chunk)
+            # Should skip malformed JSON and continue with valid chunks
+            assert len(chunks) == 2
+            assert chunks[0]["content"] == "Valid"
+            assert chunks[1]["content"] == "End"