Spaces:
Sleeping
Sleeping
File size: 1,486 Bytes
903b444 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# Summarization helper — DistilBART model with punctuation pre-processing
from transformers import pipeline
import torch
import logging
from punctuation import punctuate_text
# Load summarization model
device = 0 if torch.cuda.is_available() else -1
summarizer = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-12-6",
device=device
)
# Summarize text
def summarize_text(content: str, query: str = "") -> str:
"""
Summarize already punctuated content, optionally focusing on a query.
"""
if not content.strip():
return ""
# Ensure content is punctuated before summarizing
content = punctuate_text(content)
# Build summarization input
if query:
input_text = f"Summarize the following text focusing on '{query}': {content}"
else:
input_text = content
try:
# Token length check (truncate if needed)
max_input_chars = 3000
if len(input_text) > max_input_chars:
input_text = input_text[:max_input_chars] + " [...]"
summary = summarizer(
input_text,
max_length=150,
min_length=30,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.9
)[0]["summary_text"]
return summary.strip()
except Exception as e:
logging.error(f"⚠️ Summarization failed: {str(e)}")
return content[:200] + " [...]" if len(content) > 200 else content
|