sathvikk commited on
Commit
6cb7262
Β·
verified Β·
1 Parent(s): 5f2937a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +106 -90
src/streamlit_app.py CHANGED
@@ -1,122 +1,138 @@
1
  import os
2
- os.environ["TRANSFORMERS_CACHE"] = "/cache" # Hugging Face Spaces cache directory
 
3
 
4
  import streamlit as st
5
  import fitz # PyMuPDF
6
  from transformers import pipeline
7
 
8
- # Set page config
9
  st.set_page_config(
10
  page_title="PrepPal",
11
  page_icon="πŸ“˜",
12
  layout="wide",
13
- menu_items={
14
- 'About': "PrepPal - AI-powered study assistant"
15
- }
16
  )
17
 
18
- # Load summarizer model with error handling
 
 
 
19
  @st.cache_resource
20
  def load_summarizer():
21
  try:
22
  return pipeline(
23
  "summarization",
24
- model="sshleifer/distilbart-cnn-12-6",
25
- device=-1 # Use CPU (more reliable in Spaces)
26
  )
27
  except Exception as e:
28
- st.error(f"❌ Failed to load model: {str(e)}")
29
  return None
30
 
31
- # Extract text from PDF with size limit
32
- def extract_text_from_pdf(uploaded_file):
33
  text = ""
34
  try:
35
- # Check file size (max 5MB)
36
- if uploaded_file.size > 5_000_000:
37
- st.error("File too large (max 5MB)")
38
- return ""
39
-
40
- with fitz.open(stream=uploaded_file.read(), filetype="pdf") as doc:
41
- for page in doc:
42
- text += page.get_text()
 
43
  return text.strip()
44
  except Exception as e:
45
- st.error(f"❌ Error extracting text: {str(e)}")
46
  return ""
47
 
48
- # Summarize text in chunks
49
- def summarize_text(text, summarizer, max_chunk_length=2000):
50
- if not text or not summarizer:
51
- return ""
52
-
53
- try:
54
- chunks = [text[i:i+max_chunk_length] for i in range(0, len(text), max_chunk_length)]
55
- summary = ""
56
- for chunk in chunks:
57
- result = summarizer(
58
- chunk,
59
- max_length=130,
60
- min_length=30,
61
- do_sample=False
62
- )
63
- summary += result[0]['summary_text'] + "\n"
64
- return summary.strip()
65
- except Exception as e:
66
- st.error(f"❌ Summarization failed: {str(e)}")
67
  return ""
68
-
69
- # Load model
70
- summarizer = load_summarizer()
71
-
72
- # UI Layout
73
- st.title("πŸ“˜ PrepPal - Study Assistant")
74
- tab1, tab2, tab3 = st.tabs(["πŸ“„ Summarize Notes", "❓ Ask a Doubt", "πŸ’¬ Feedback"])
75
-
76
- with tab1:
77
- st.header("PDF Summarizer")
78
- st.write("Upload your class notes in PDF format to receive a summarized version.")
79
 
80
- uploaded_pdf = st.file_uploader(
81
- "Choose a PDF file (max 5MB)",
82
- type=["pdf"],
83
- accept_multiple_files=False
84
- )
85
-
86
- if uploaded_pdf and summarizer:
87
- with st.spinner("Extracting text..."):
88
- pdf_text = extract_text_from_pdf(uploaded_pdf)
89
-
90
- if pdf_text:
91
- st.subheader("Extracted Text Preview")
92
- st.text_area("", pdf_text[:1000] + "...", height=200, disabled=True)
93
 
94
- if st.button("βœ‚οΈ Summarize"):
95
- with st.spinner("Summarizing... Please wait."):
96
- summary = summarize_text(pdf_text, summarizer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- if summary:
99
- st.subheader("βœ… Summary")
100
- st.text_area("Summary Output", summary, height=300)
101
- st.download_button(
102
- "⬇️ Download Summary",
103
- data=summary,
104
- file_name="summary.txt",
105
- mime="text/plain"
106
- )
107
- else:
108
- st.warning("No summary generated")
109
-
110
- with tab2:
111
- st.header("Ask Questions About Your Notes")
112
- st.info("πŸ”§ This feature is coming soon! You'll be able to ask questions about your uploaded notes.")
113
-
114
- with tab3:
115
- st.header("Help Improve PrepPal")
116
- feedback = st.text_area("Your feedback or suggestions")
117
- if st.button("Submit Feedback"):
118
- st.success("Thank you for your feedback! We'll use it to improve PrepPal.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- # Footer
121
- st.markdown("---")
122
- st.caption("PrepPal v1.0 | AI-powered study assistant")
 
1
  import os
2
+ import tempfile
3
+ os.environ["TRANSFORMERS_CACHE"] = "/cache"
4
 
5
  import streamlit as st
6
  import fitz # PyMuPDF
7
  from transformers import pipeline
8
 
9
+ # Security headers and config
10
  st.set_page_config(
11
  page_title="PrepPal",
12
  page_icon="πŸ“˜",
13
  layout="wide",
14
+ menu_items={'About': "PrepPal - AI-powered PDF summarizer"}
 
 
15
  )
16
 
17
+ st.markdown("""
18
+ <meta http-equiv="Content-Security-Policy" content="default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;">
19
+ """, unsafe_allow_html=True)
20
+
21
  @st.cache_resource
22
  def load_summarizer():
23
  try:
24
  return pipeline(
25
  "summarization",
26
+ model="facebook/bart-large-cnn", # Reliable medium-size model
27
+ device=-1 # Force CPU
28
  )
29
  except Exception as e:
30
+ st.error(f"Model loading failed: {str(e)}")
31
  return None
32
 
33
+ def extract_text(uploaded_file):
 
34
  text = ""
35
  try:
36
+ # Save to temp file first (fixes 403 issues)
37
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
38
+ tmp.write(uploaded_file.getbuffer())
39
+ tmp_path = tmp.name
40
+
41
+ with fitz.open(tmp_path) as doc:
42
+ text = "\n".join([page.get_text() for page in doc])
43
+
44
+ os.unlink(tmp_path)
45
  return text.strip()
46
  except Exception as e:
47
+ st.error(f"PDF processing error: {str(e)}")
48
  return ""
49
 
50
+ def summarize(text, model, max_chunk=1500):
51
+ if not text or not model:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return ""
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ chunks = [text[i:i+max_chunk] for i in range(0, len(text), max_chunk)]
55
+ summary = []
56
+
57
+ for chunk in chunks:
58
+ result = model(
59
+ chunk,
60
+ max_length=150,
61
+ min_length=50,
62
+ do_sample=False
63
+ )
64
+ summary.append(result[0]['summary_text'])
65
+
66
+ return "\n".join(summary)
67
 
68
+ # Main App with all 3 tabs
69
+ def main():
70
+ st.title("πŸ“˜ PrepPal - Study Assistant")
71
+
72
+ # Create all three tabs
73
+ tab1, tab2, tab3 = st.tabs(["πŸ“„ Summarize Notes", "❓ Ask a Doubt", "πŸ’¬ Feedback"])
74
+
75
+ with tab1:
76
+ st.header("PDF Summarizer")
77
+ st.write("Upload your PDF (max 10MB) for an AI-generated summary")
78
+
79
+ uploaded_file = st.file_uploader(
80
+ "Choose PDF file",
81
+ type=["pdf"],
82
+ accept_multiple_files=False,
83
+ key="pdf_uploader"
84
+ )
85
+
86
+ if uploaded_file:
87
+ if uploaded_file.size > 10_000_000: # 10MB limit
88
+ st.error("File too large (max 10MB)")
89
+ else:
90
+ with st.spinner("Extracting text..."):
91
+ text = extract_text(uploaded_file)
92
 
93
+ if text:
94
+ with st.expander("View extracted text"):
95
+ st.text(text[:1000] + "...")
96
+
97
+ if st.button("Generate Summary", key="summarize_btn"):
98
+ with st.spinner("Summarizing..."):
99
+ model = load_summarizer()
100
+ if model:
101
+ summary = summarize(text, model)
102
+
103
+ st.subheader("AI Summary")
104
+ st.write(summary)
105
+
106
+ st.download_button(
107
+ "Download Summary",
108
+ data=summary,
109
+ file_name="summary.txt",
110
+ mime="text/plain",
111
+ key="download_btn"
112
+ )
113
+
114
+ with tab2:
115
+ st.header("Ask a Question")
116
+ st.write("Coming Soon: Ask questions about your uploaded documents")
117
+ st.image("https://via.placeholder.com/600x200?text=Question+Answering+Feature+Coming+Soon",
118
+ caption="AI question answering will be available in the next update")
119
+
120
+ # Placeholder for future functionality
121
+ question = st.text_input("What would you like to ask about your document?")
122
+ if question:
123
+ st.info("This feature is currently in development. Please check back soon!")
124
+
125
+ with tab3:
126
+ st.header("Your Feedback")
127
+ st.write("Help us improve PrepPal")
128
+
129
+ feedback = st.text_area("What do you think about PrepPal?")
130
+ if st.button("Submit Feedback", key="feedback_btn"):
131
+ if feedback:
132
+ st.success("Thank you for your feedback! We'll use this to improve the app.")
133
+ # In a real app, you would store this feedback somewhere
134
+ else:
135
+ st.warning("Please enter your feedback before submitting")
136
 
137
+ if __name__ == "__main__":
138
+ main()