# Summarization helper — DistilBART model with punctuation pre-processing from transformers import pipeline import torch import logging from punctuation import punctuate_text # Load summarization model device = 0 if torch.cuda.is_available() else -1 summarizer = pipeline( "summarization", model="sshleifer/distilbart-cnn-12-6", device=device ) # Summarize text def summarize_text(content: str, query: str = "") -> str: """ Summarize already punctuated content, optionally focusing on a query. """ if not content.strip(): return "" # Ensure content is punctuated before summarizing content = punctuate_text(content) # Build summarization input if query: input_text = f"Summarize the following text focusing on '{query}': {content}" else: input_text = content try: # Token length check (truncate if needed) max_input_chars = 3000 if len(input_text) > max_input_chars: input_text = input_text[:max_input_chars] + " [...]" summary = summarizer( input_text, max_length=150, min_length=30, do_sample=True, top_k=50, top_p=0.95, temperature=0.9 )[0]["summary_text"] return summary.strip() except Exception as e: logging.error(f"⚠️ Summarization failed: {str(e)}") return content[:200] + " [...]" if len(content) > 200 else content