File size: 4,623 Bytes
0d683e2
 
 
6e01ea3
0d683e2
6e01ea3
fa85955
0d683e2
 
02a56a9
0d683e2
 
 
 
 
 
 
 
 
 
 
 
 
 
2431837
 
 
 
 
 
fa85955
 
0d683e2
2431837
 
 
0d683e2
 
6e01ea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02a56a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""
Summarization endpoints.
"""
import json
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse
import httpx
from app.api.v1.schemas import SummarizeRequest, SummarizeResponse
from app.services.summarizer import ollama_service
from app.services.transformers_summarizer import transformers_service

router = APIRouter()


@router.post("/", response_model=SummarizeResponse)
async def summarize(payload: SummarizeRequest) -> SummarizeResponse:
    """Summarize input text using Ollama service."""
    try:
        result = await ollama_service.summarize_text(
            text=payload.text,
            max_tokens=payload.max_tokens or 256,
            prompt=payload.prompt or "Summarize the following text concisely:",
        )
        return SummarizeResponse(**result)
    except httpx.TimeoutException as e:
        # Timeout error - provide helpful message
        raise HTTPException(
            status_code=504, 
            detail="Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens."
        )
    except httpx.HTTPError as e:
        # Upstream (Ollama) error
        raise HTTPException(status_code=502, detail=f"Summarization failed: {str(e)}")
    except Exception as e:
        # Unexpected error
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")


async def _stream_generator(payload: SummarizeRequest):
    """Generator function for streaming SSE responses."""
    try:
        async for chunk in ollama_service.summarize_text_stream(
            text=payload.text,
            max_tokens=payload.max_tokens or 256,
            prompt=payload.prompt or "Summarize the following text concisely:",
        ):
            # Format as SSE event
            sse_data = json.dumps(chunk)
            yield f"data: {sse_data}\n\n"
            
    except httpx.TimeoutException as e:
        # Send error event in SSE format
        error_chunk = {
            "content": "",
            "done": True,
            "error": "Request timeout. The text may be too long or complex. Try reducing the text length or max_tokens."
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"
        return  # Don't raise exception in streaming context
    except httpx.HTTPError as e:
        # Send error event in SSE format
        error_chunk = {
            "content": "",
            "done": True,
            "error": f"Summarization failed: {str(e)}"
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"
        return  # Don't raise exception in streaming context
    except Exception as e:
        # Send error event in SSE format
        error_chunk = {
            "content": "",
            "done": True,
            "error": f"Internal server error: {str(e)}"
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"
        return  # Don't raise exception in streaming context


@router.post("/stream")
async def summarize_stream(payload: SummarizeRequest):
    """Stream text summarization using Server-Sent Events (SSE)."""
    return StreamingResponse(
        _stream_generator(payload),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        }
    )


async def _pipeline_stream_generator(payload: SummarizeRequest):
    """Generator function for Transformers pipeline streaming SSE responses."""
    try:
        async for chunk in transformers_service.summarize_text_stream(
            text=payload.text,
            max_length=payload.max_tokens or 130,
        ):
            # Format as SSE event
            sse_data = json.dumps(chunk)
            yield f"data: {sse_data}\n\n"
            
    except Exception as e:
        # Send error event in SSE format
        error_chunk = {
            "content": "",
            "done": True,
            "error": f"Pipeline summarization failed: {str(e)}"
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"
        return  # Don't raise exception in streaming context


@router.post("/pipeline/stream")
async def summarize_pipeline_stream(payload: SummarizeRequest):
    """Fast streaming summarization using Transformers pipeline (8-12s response time)."""
    return StreamingResponse(
        _pipeline_stream_generator(payload),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        }
    )