Spaces:
Running
Running
| """ | |
| Summarization endpoints. | |
| """ | |
| import json | |
| from fastapi import APIRouter, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| import httpx | |
| from app.api.v1.schemas import SummarizeRequest, SummarizeResponse | |
| from app.services.summarizer import ollama_service | |
| from app.services.transformers_summarizer import transformers_service | |
| router = APIRouter() | |
| async def summarize(payload: SummarizeRequest) -> SummarizeResponse: | |
| """Summarize input text using Ollama service.""" | |
| try: | |
| result = await ollama_service.summarize_text( | |
| text=payload.text, | |
| max_tokens=payload.max_tokens or 256, | |
| prompt=payload.prompt or "Summarize the following text concisely:", | |
| ) | |
| return SummarizeResponse(**result) | |
| except httpx.TimeoutException as e: | |
| # Timeout error - provide helpful message | |
| raise HTTPException( | |
| status_code=504, | |
| detail="Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens." | |
| ) | |
| except httpx.HTTPError as e: | |
| # Upstream (Ollama) error | |
| raise HTTPException(status_code=502, detail=f"Summarization failed: {str(e)}") | |
| except Exception as e: | |
| # Unexpected error | |
| raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}") | |
| async def _stream_generator(payload: SummarizeRequest): | |
| """Generator function for streaming SSE responses.""" | |
| try: | |
| async for chunk in ollama_service.summarize_text_stream( | |
| text=payload.text, | |
| max_tokens=payload.max_tokens or 256, | |
| prompt=payload.prompt or "Summarize the following text concisely:", | |
| ): | |
| # Format as SSE event | |
| sse_data = json.dumps(chunk) | |
| yield f"data: {sse_data}\n\n" | |
| except httpx.TimeoutException as e: | |
| # Send error event in SSE format | |
| error_chunk = { | |
| "content": "", | |
| "done": True, | |
| "error": "Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens." | |
| } | |
| sse_data = json.dumps(error_chunk) | |
| yield f"data: {sse_data}\n\n" | |
| return # Don't raise exception in streaming context | |
| except httpx.HTTPError as e: | |
| # Send error event in SSE format | |
| error_chunk = { | |
| "content": "", | |
| "done": True, | |
| "error": f"Summarization failed: {str(e)}" | |
| } | |
| sse_data = json.dumps(error_chunk) | |
| yield f"data: {sse_data}\n\n" | |
| return # Don't raise exception in streaming context | |
| except Exception as e: | |
| # Send error event in SSE format | |
| error_chunk = { | |
| "content": "", | |
| "done": True, | |
| "error": f"Internal server error: {str(e)}" | |
| } | |
| sse_data = json.dumps(error_chunk) | |
| yield f"data: {sse_data}\n\n" | |
| return # Don't raise exception in streaming context | |
| async def summarize_stream(payload: SummarizeRequest): | |
| """Stream text summarization using Server-Sent Events (SSE).""" | |
| return StreamingResponse( | |
| _stream_generator(payload), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "Connection": "keep-alive", | |
| } | |
| ) | |
| async def _pipeline_stream_generator(payload: SummarizeRequest): | |
| """Generator function for Transformers pipeline streaming SSE responses.""" | |
| try: | |
| async for chunk in transformers_service.summarize_text_stream( | |
| text=payload.text, | |
| max_length=payload.max_tokens or 130, | |
| ): | |
| # Format as SSE event | |
| sse_data = json.dumps(chunk) | |
| yield f"data: {sse_data}\n\n" | |
| except Exception as e: | |
| # Send error event in SSE format | |
| error_chunk = { | |
| "content": "", | |
| "done": True, | |
| "error": f"Pipeline summarization failed: {str(e)}" | |
| } | |
| sse_data = json.dumps(error_chunk) | |
| yield f"data: {sse_data}\n\n" | |
| return # Don't raise exception in streaming context | |
| async def summarize_pipeline_stream(payload: SummarizeRequest): | |
| """Fast streaming summarization using Transformers pipeline (8-12s response time).""" | |
| return StreamingResponse( | |
| _pipeline_stream_generator(payload), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "Connection": "keep-alive", | |
| } | |
| ) | |