Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from PyPDF2 import PdfReader | |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
| from gtts import gTTS | |
| from io import BytesIO | |
| import re | |
| import os | |
| model_name = "ArtifactAI/led_large_16384_arxiv_summarization" | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| def extract_first_sentence(text): | |
| sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text) | |
| if sentences: | |
| return sentences[0] | |
| else: | |
| return text | |
| def summarize_pdf_abstract(pdf_file): | |
| try: | |
| reader = PdfReader(pdf_file) | |
| abstract_text = "" | |
| for page in reader.pages: | |
| if "Abstract" in page.extract_text() or "Introduction" in page.extract_text(): | |
| abstract_text = page.extract_text() | |
| break | |
| inputs = tokenizer(abstract_text, return_tensors="pt") | |
| outputs = model.generate(**inputs) | |
| summary = tokenizer.decode(outputs[0]) | |
| # Extract only the first sentence | |
| summary_sentence = extract_first_sentence(summary) | |
| # Generate audio | |
| speech = gTTS(text=summary_sentence, lang="en") | |
| speech_bytes = BytesIO() | |
| speech.write_to_fp(speech_bytes) | |
| # Return individual output values | |
| return summary_sentence, speech_bytes.getvalue() | |
| except Exception as e: | |
| raise Exception(str(e)) | |
| interface = gr.Interface( | |
| fn=summarize_pdf_abstract, | |
| inputs=[gr.File(label="Upload PDF")], | |
| outputs=[gr.Textbox(label="Summary"), gr.Audio()], | |
| title="PDF Summarization & Audio Tool", | |
| description="""PDF Summarization App. This app summarizes the abstract of a PDF in one sentence and generates an audio of it. Only upload PDF's with Abstracts | |
| Please read the README.MD for information about the app and sample PDFs.""", | |
| examples=[os.path.join(os.path.dirname(__file__), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")],cache_examples=True, | |
| ) | |
| interface.launch(share=True) |