File size: 1,689 Bytes
0b6e76d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
"""
V2 Summarization endpoints using HuggingFace streaming.
"""
import json
from fastapi import APIRouter, HTTPException
from fastapi.responses import StreamingResponse

from app.api.v2.schemas import SummarizeRequest
from app.services.hf_streaming_summarizer import hf_streaming_service

router = APIRouter()


@router.post("/stream")
async def summarize_stream(payload: SummarizeRequest):
    """Stream text summarization using HuggingFace TextIteratorStreamer via SSE."""
    return StreamingResponse(
        _stream_generator(payload),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        }
    )


async def _stream_generator(payload: SummarizeRequest):
    """Generator function for streaming SSE responses using HuggingFace."""
    try:
        async for chunk in hf_streaming_service.summarize_text_stream(
            text=payload.text,
            max_new_tokens=payload.max_tokens or 128,  # Map max_tokens to max_new_tokens
            temperature=0.7,  # Use default temperature
            top_p=0.95,  # Use default top_p
            prompt=payload.prompt or "Summarize the following text concisely:",
        ):
            # Format as SSE event (same format as V1)
            sse_data = json.dumps(chunk)
            yield f"data: {sse_data}\n\n"
            
    except Exception as e:
        # Send error event in SSE format (same as V1)
        error_chunk = {
            "content": "",
            "done": True,
            "error": f"HuggingFace summarization failed: {str(e)}"
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"