Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from transformers import T5Tokenizer, T5ForConditionalGeneration | |
| from transformers import pipeline | |
| import torch | |
| import base64 | |
| import tempfile | |
| import os | |
| checkpoint = "MBZUAI/LaMini-Flan-T5-248M" | |
| #model and tokenizer loading | |
| tokenizer = T5Tokenizer.from_pretrained(checkpoint) | |
| with tempfile.TemporaryDirectory() as offload_folder: | |
| base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32, offload_folder=offload_folder) | |
| #file loader and preprocessing | |
| def file_preprocessing(file): | |
| loader = PyPDFLoader(file) | |
| pages = loader.load_and_split() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50) | |
| texts = text_splitter.split_documents(pages) | |
| final_texts = "" | |
| for text in texts: | |
| print(text) | |
| final_texts = final_texts + text.page_content | |
| return final_texts | |
| #LLM pipeline | |
| def llm_pipeline(filepath): | |
| pipe_sum = pipeline( | |
| 'summarization', | |
| model = base_model, | |
| tokenizer = tokenizer, | |
| max_length = 500, | |
| min_length = 50) | |
| input_text = file_preprocessing(filepath) | |
| result = pipe_sum(input_text) | |
| result = result[0]['summary_text'] | |
| return result | |
| def main(): | |
| st.title("Document Summarization App") | |
| uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf']) | |
| if uploaded_file is not None: | |
| if st.button("Summarize"): | |
| col2 = st.columns(1) | |
| # Use a temporary filename directly | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| temp_file.flush() # Ensure contents are written to disk | |
| filepath = temp_file.name | |
| try: | |
| summary = llm_pipeline(filepath) | |
| st.success(summary) # Display only the summary | |
| except Exception as e: | |
| st.error(f"An error occurred during summarization: {e}") | |
| # Clean up the temporary file | |
| os.remove(filepath) | |
| if __name__ == "__main__": | |
| main() |