ks-version-1-1 / backend /nlp_summary.py
NIKKI77's picture
Deploy: GPU-ready HF Space (Docker)
903b444
# Summarization helper — DistilBART model with punctuation pre-processing
from transformers import pipeline
import torch
import logging
from punctuation import punctuate_text
# Load summarization model
device = 0 if torch.cuda.is_available() else -1
summarizer = pipeline(
"summarization",
model="sshleifer/distilbart-cnn-12-6",
device=device
)
# Summarize text
def summarize_text(content: str, query: str = "") -> str:
"""
Summarize already punctuated content, optionally focusing on a query.
"""
if not content.strip():
return ""
# Ensure content is punctuated before summarizing
content = punctuate_text(content)
# Build summarization input
if query:
input_text = f"Summarize the following text focusing on '{query}': {content}"
else:
input_text = content
try:
# Token length check (truncate if needed)
max_input_chars = 3000
if len(input_text) > max_input_chars:
input_text = input_text[:max_input_chars] + " [...]"
summary = summarizer(
input_text,
max_length=150,
min_length=30,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.9
)[0]["summary_text"]
return summary.strip()
except Exception as e:
logging.error(f"⚠️ Summarization failed: {str(e)}")
return content[:200] + " [...]" if len(content) > 200 else content