Spaces:
Sleeping
Sleeping
File size: 4,623 Bytes
0d683e2 6e01ea3 0d683e2 6e01ea3 fa85955 0d683e2 02a56a9 0d683e2 2431837 fa85955 0d683e2 2431837 0d683e2 6e01ea3 02a56a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""
Summarization endpoints.
"""
import json
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
import httpx
from app.api.v1.schemas import SummarizeRequest, SummarizeResponse
from app.services.summarizer import ollama_service
from app.services.transformers_summarizer import transformers_service
router = APIRouter()
@router.post("/", response_model=SummarizeResponse)
async def summarize(payload: SummarizeRequest) -> SummarizeResponse:
"""Summarize input text using Ollama service."""
try:
result = await ollama_service.summarize_text(
text=payload.text,
max_tokens=payload.max_tokens or 256,
prompt=payload.prompt or "Summarize the following text concisely:",
)
return SummarizeResponse(**result)
except httpx.TimeoutException as e:
# Timeout error - provide helpful message
raise HTTPException(
status_code=504,
detail="Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens."
)
except httpx.HTTPError as e:
# Upstream (Ollama) error
raise HTTPException(status_code=502, detail=f"Summarization failed: {str(e)}")
except Exception as e:
# Unexpected error
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
async def _stream_generator(payload: SummarizeRequest):
"""Generator function for streaming SSE responses."""
try:
async for chunk in ollama_service.summarize_text_stream(
text=payload.text,
max_tokens=payload.max_tokens or 256,
prompt=payload.prompt or "Summarize the following text concisely:",
):
# Format as SSE event
sse_data = json.dumps(chunk)
yield f"data: {sse_data}\n\n"
except httpx.TimeoutException as e:
# Send error event in SSE format
error_chunk = {
"content": "",
"done": True,
"error": "Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens."
}
sse_data = json.dumps(error_chunk)
yield f"data: {sse_data}\n\n"
return # Don't raise exception in streaming context
except httpx.HTTPError as e:
# Send error event in SSE format
error_chunk = {
"content": "",
"done": True,
"error": f"Summarization failed: {str(e)}"
}
sse_data = json.dumps(error_chunk)
yield f"data: {sse_data}\n\n"
return # Don't raise exception in streaming context
except Exception as e:
# Send error event in SSE format
error_chunk = {
"content": "",
"done": True,
"error": f"Internal server error: {str(e)}"
}
sse_data = json.dumps(error_chunk)
yield f"data: {sse_data}\n\n"
return # Don't raise exception in streaming context
@router.post("/stream")
async def summarize_stream(payload: SummarizeRequest):
"""Stream text summarization using Server-Sent Events (SSE)."""
return StreamingResponse(
_stream_generator(payload),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
}
)
async def _pipeline_stream_generator(payload: SummarizeRequest):
"""Generator function for Transformers pipeline streaming SSE responses."""
try:
async for chunk in transformers_service.summarize_text_stream(
text=payload.text,
max_length=payload.max_tokens or 130,
):
# Format as SSE event
sse_data = json.dumps(chunk)
yield f"data: {sse_data}\n\n"
except Exception as e:
# Send error event in SSE format
error_chunk = {
"content": "",
"done": True,
"error": f"Pipeline summarization failed: {str(e)}"
}
sse_data = json.dumps(error_chunk)
yield f"data: {sse_data}\n\n"
return # Don't raise exception in streaming context
@router.post("/pipeline/stream")
async def summarize_pipeline_stream(payload: SummarizeRequest):
"""Fast streaming summarization using Transformers pipeline (8-12s response time)."""
return StreamingResponse(
_pipeline_stream_generator(payload),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
}
)
|