ikraamkb commited on
Commit
1be9899
Β·
verified Β·
1 Parent(s): fbf2ce7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -42
app.py CHANGED
@@ -3,63 +3,64 @@ import fitz # PyMuPDF for PDF parsing
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
 
6
  from PIL import Image
7
  from transformers import pipeline
8
  import gradio as gr
9
- from fastapi.responses import RedirectResponse
10
  import numpy as np
11
  import easyocr
12
 
13
- # Initialize FastAPI
14
  app = FastAPI()
15
 
16
  print(f"πŸ”„ Loading models")
17
 
18
- doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0")
19
  image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
- print("Models loaded")
21
 
22
- # Initialize OCR Model (Lazy Load)
23
- reader = easyocr.Reader(["en"], gpu=True)
24
 
25
  # Allowed File Extensions
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
27
 
28
- def validate_file_type(file: UploadFile):
29
  ext = file.filename.split(".")[-1].lower()
30
  print(f"πŸ” Validating file type: {ext}")
31
  if ext not in ALLOWED_EXTENSIONS:
32
  return f"❌ Unsupported file format: {ext}"
33
  return None
34
 
 
35
  def truncate_text(text, max_tokens=450):
36
  words = text.split()
37
  truncated = " ".join(words[:max_tokens])
38
  print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
39
  return truncated
40
 
41
- def extract_text_from_pdf(pdf_file: UploadFile):
 
42
  try:
43
- print("πŸ“ Extracting text from PDF...")
44
- pdf_bytes = pdf_file.file.read()
45
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
46
  text = "\n".join([page.get_text("text") for page in doc])
47
  return text if text else "⚠️ No text found."
48
  except Exception as e:
49
  return f"❌ Error reading PDF: {str(e)}"
50
 
51
- def extract_text_with_tika(file: UploadFile):
52
  try:
53
  print("πŸ“ Extracting text with Tika...")
54
- parsed = parser.from_buffer(file.file.read())
55
  return parsed.get("content", "⚠️ No text found.").strip()
56
  except Exception as e:
57
  return f"❌ Error reading document: {str(e)}"
58
 
59
- def extract_text_from_excel(excel_file: UploadFile):
60
  try:
61
- print("πŸ“ Extracting text from Excel...")
62
- wb = openpyxl.load_workbook(excel_file.file, read_only=True)
63
  text = []
64
  for sheet in wb.worksheets:
65
  for row in sheet.iter_rows(values_only=True):
@@ -75,13 +76,14 @@ def answer_question_from_document(file: UploadFile, question: str):
75
  return validation_error
76
 
77
  file_ext = file.filename.split(".")[-1].lower()
78
-
 
79
  if file_ext == "pdf":
80
- text = extract_text_from_pdf(file)
81
  elif file_ext in ["docx", "pptx"]:
82
- text = extract_text_with_tika(file)
83
  elif file_ext == "xlsx":
84
- text = extract_text_from_excel(file)
85
  else:
86
  return "❌ Unsupported file format!"
87
 
@@ -94,13 +96,13 @@ def answer_question_from_document(file: UploadFile, question: str):
94
 
95
  return response[0]["generated_text"]
96
 
97
- def answer_question_from_image(image, question: str):
98
  try:
99
- print("🎨 Converting image for processing...")
100
- if isinstance(image, np.ndarray):
101
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
102
 
103
- print("🎨 Generating caption for image...")
104
  caption = image_captioning_pipeline(image)[0]['generated_text']
105
 
106
  print("πŸ€– Answering question based on caption...")
@@ -110,6 +112,7 @@ def answer_question_from_image(image, question: str):
110
  except Exception as e:
111
  return f"❌ Error processing image: {str(e)}"
112
 
 
113
  doc_interface = gr.Interface(
114
  fn=answer_question_from_document,
115
  inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
@@ -119,24 +122,13 @@ doc_interface = gr.Interface(
119
 
120
  img_interface = gr.Interface(
121
  fn=answer_question_from_image,
122
- inputs=[gr.Image(label="🎨 Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
123
  outputs="text",
124
- title="🎨 AI Image Question Answering"
125
  )
126
 
127
- # Use Gradio Blocks (instead of TabbedInterface)
128
- with gr.Blocks() as demo:
129
- gr.TabbedLayout(
130
- [doc_interface, img_interface],
131
- ["πŸ“„ Document QA", "🎨 Image QA"]
132
- )
133
-
134
- # Mount Gradio to FastAPI properly
135
- from gradio.routes import App as GradioApp
136
- gradio_app = GradioApp.create_app(demo)
137
- app.mount("/", gradio_app)
138
-
139
- # Redirect FastAPI root to Gradio UI
140
- @app.get("/")
141
- def home():
142
- return RedirectResponse(url="/")
 
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
+ import torch
7
  from PIL import Image
8
  from transformers import pipeline
9
  import gradio as gr
 
10
  import numpy as np
11
  import easyocr
12
 
13
+ # Initialize FastAPI (not needed for HF Spaces, but kept for flexibility)
14
  app = FastAPI()
15
 
16
  print(f"πŸ”„ Loading models")
17
 
18
+ doc_qa_pipeline = pipeline("text2text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
19
  image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
+ print("βœ… Models loaded")
21
 
22
+ # Initialize OCR Model (CPU Mode)
23
+ reader = easyocr.Reader(["en"], gpu=False)
24
 
25
  # Allowed File Extensions
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
27
 
28
+ def validate_file_type(file):
29
  ext = file.filename.split(".")[-1].lower()
30
  print(f"πŸ” Validating file type: {ext}")
31
  if ext not in ALLOWED_EXTENSIONS:
32
  return f"❌ Unsupported file format: {ext}"
33
  return None
34
 
35
+ # Function to truncate text to 450 tokens
36
  def truncate_text(text, max_tokens=450):
37
  words = text.split()
38
  truncated = " ".join(words[:max_tokens])
39
  print(f"βœ‚οΈ Truncated text to {max_tokens} tokens.")
40
  return truncated
41
 
42
+ # Document Text Extraction Functions
43
+ def extract_text_from_pdf(pdf_bytes):
44
  try:
45
+ print("πŸ“„ Extracting text from PDF...")
 
46
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
47
  text = "\n".join([page.get_text("text") for page in doc])
48
  return text if text else "⚠️ No text found."
49
  except Exception as e:
50
  return f"❌ Error reading PDF: {str(e)}"
51
 
52
+ def extract_text_with_tika(file_bytes):
53
  try:
54
  print("πŸ“ Extracting text with Tika...")
55
+ parsed = parser.from_buffer(file_bytes)
56
  return parsed.get("content", "⚠️ No text found.").strip()
57
  except Exception as e:
58
  return f"❌ Error reading document: {str(e)}"
59
 
60
+ def extract_text_from_excel(excel_bytes):
61
  try:
62
+ print("πŸ“Š Extracting text from Excel...")
63
+ wb = openpyxl.load_workbook(excel_bytes, read_only=True)
64
  text = []
65
  for sheet in wb.worksheets:
66
  for row in sheet.iter_rows(values_only=True):
 
76
  return validation_error
77
 
78
  file_ext = file.filename.split(".")[-1].lower()
79
+ file_bytes = file.file.read()
80
+
81
  if file_ext == "pdf":
82
+ text = extract_text_from_pdf(file_bytes)
83
  elif file_ext in ["docx", "pptx"]:
84
+ text = extract_text_with_tika(file_bytes)
85
  elif file_ext == "xlsx":
86
+ text = extract_text_from_excel(file_bytes)
87
  else:
88
  return "❌ Unsupported file format!"
89
 
 
96
 
97
  return response[0]["generated_text"]
98
 
99
+ def answer_question_from_image(image, question):
100
  try:
101
+ print("πŸ–ΌοΈ Processing image for QA...")
102
+ if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
103
+ image = Image.fromarray(image) # Convert to PIL Image
104
 
105
+ print("πŸ–ΌοΈ Generating caption for image...")
106
  caption = image_captioning_pipeline(image)[0]['generated_text']
107
 
108
  print("πŸ€– Answering question based on caption...")
 
112
  except Exception as e:
113
  return f"❌ Error processing image: {str(e)}"
114
 
115
+ # Gradio UI for Document & Image QA
116
  doc_interface = gr.Interface(
117
  fn=answer_question_from_document,
118
  inputs=[gr.File(label="πŸ“‚ Upload Document"), gr.Textbox(label="πŸ’¬ Ask a Question")],
 
122
 
123
  img_interface = gr.Interface(
124
  fn=answer_question_from_image,
125
+ inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
126
  outputs="text",
127
+ title="πŸ–ΌοΈ AI Image Question Answering"
128
  )
129
 
130
+ # Launch Gradio
131
+ app = gr.TabbedInterface([doc_interface, img_interface], ["πŸ“„ Document QA", "πŸ–ΌοΈ Image QA"])
132
+
133
+ if __name__ == "__main__":
134
+ app.launch(share=True) # For Hugging Face Spaces