File size: 1,486 Bytes
903b444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# Summarization helper — DistilBART model with punctuation pre-processing

from transformers import pipeline
import torch
import logging
from punctuation import punctuate_text  

# Load summarization model 
device = 0 if torch.cuda.is_available() else -1
summarizer = pipeline(
    "summarization",
    model="sshleifer/distilbart-cnn-12-6",
    device=device
)

# Summarize text 
def summarize_text(content: str, query: str = "") -> str:
    """
    Summarize already punctuated content, optionally focusing on a query.
    """
    if not content.strip():
        return ""

    # Ensure content is punctuated before summarizing
    content = punctuate_text(content)

    # Build summarization input
    if query:
        input_text = f"Summarize the following text focusing on '{query}': {content}"
    else:
        input_text = content

    try:
        # Token length check (truncate if needed)
        max_input_chars = 3000  
        if len(input_text) > max_input_chars:
            input_text = input_text[:max_input_chars] + " [...]"

        summary = summarizer(
            input_text,
            max_length=150,
            min_length=30,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.9
        )[0]["summary_text"]

        return summary.strip()

    except Exception as e:
        logging.error(f"⚠️ Summarization failed: {str(e)}")
        return content[:200] + " [...]" if len(content) > 200 else content