Spaces:
Sleeping
Sleeping
| # Summarization helper — DistilBART model with punctuation pre-processing | |
| from transformers import pipeline | |
| import torch | |
| import logging | |
| from punctuation import punctuate_text | |
| # Load summarization model | |
| device = 0 if torch.cuda.is_available() else -1 | |
| summarizer = pipeline( | |
| "summarization", | |
| model="sshleifer/distilbart-cnn-12-6", | |
| device=device | |
| ) | |
| # Summarize text | |
| def summarize_text(content: str, query: str = "") -> str: | |
| """ | |
| Summarize already punctuated content, optionally focusing on a query. | |
| """ | |
| if not content.strip(): | |
| return "" | |
| # Ensure content is punctuated before summarizing | |
| content = punctuate_text(content) | |
| # Build summarization input | |
| if query: | |
| input_text = f"Summarize the following text focusing on '{query}': {content}" | |
| else: | |
| input_text = content | |
| try: | |
| # Token length check (truncate if needed) | |
| max_input_chars = 3000 | |
| if len(input_text) > max_input_chars: | |
| input_text = input_text[:max_input_chars] + " [...]" | |
| summary = summarizer( | |
| input_text, | |
| max_length=150, | |
| min_length=30, | |
| do_sample=True, | |
| top_k=50, | |
| top_p=0.95, | |
| temperature=0.9 | |
| )[0]["summary_text"] | |
| return summary.strip() | |
| except Exception as e: | |
| logging.error(f"⚠️ Summarization failed: {str(e)}") | |
| return content[:200] + " [...]" if len(content) > 200 else content | |