ikraamkb commited on
Commit
254a090
Β·
verified Β·
1 Parent(s): 84d9e96

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -14
app.py CHANGED
@@ -4,23 +4,21 @@ from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
  import torch
7
- from PIL import Image
8
  from transformers import pipeline
9
  import gradio as gr
 
10
  import numpy as np
11
- import easyocr
12
 
13
- # Initialize FastAPI (not needed for HF Spaces, but kept for flexibility)
14
  app = FastAPI()
15
 
16
  print(f"πŸ”„ Loading models")
17
 
 
18
  doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
19
- image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
20
- print("βœ… Models loaded")
21
 
22
- # Initialize OCR Model (CPU Mode)
23
- reader = easyocr.Reader(["en"], gpu=False)
24
 
25
  # Allowed File Extensions
26
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
@@ -69,6 +67,7 @@ def extract_text_from_excel(excel_bytes):
69
  except Exception as e:
70
  return f"❌ Error reading Excel: {str(e)}"
71
 
 
72
  def answer_question_from_document(file: UploadFile, question: str):
73
  print("πŸ“‚ Processing document for QA...")
74
  validation_error = validate_file_type(file)
@@ -96,19 +95,17 @@ def answer_question_from_document(file: UploadFile, question: str):
96
 
97
  return response[0]["generated_text"]
98
 
 
99
  def answer_question_from_image(image, question):
100
  try:
101
  print("πŸ–ΌοΈ Processing image for QA...")
102
  if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
103
  image = Image.fromarray(image) # Convert to PIL Image
104
 
105
- print("πŸ–ΌοΈ Generating caption for image...")
106
- caption = image_captioning_pipeline(image)[0]['generated_text']
107
-
108
- print("πŸ€– Answering question based on caption...")
109
- response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
110
 
111
- return response[0]["generated_text"]
112
  except Exception as e:
113
  return f"❌ Error processing image: {str(e)}"
114
 
@@ -124,7 +121,7 @@ img_interface = gr.Interface(
124
  fn=answer_question_from_image,
125
  inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
126
  outputs="text",
127
- title="πŸ–ΌοΈ AI Image Question Answering"
128
  )
129
 
130
  # Launch Gradio
 
4
  import openpyxl
5
  from pptx import Presentation
6
  import torch
 
7
  from transformers import pipeline
8
  import gradio as gr
9
+ from PIL import Image
10
  import numpy as np
 
11
 
12
+ # Initialize FastAPI (not needed for HF Spaces but kept for flexibility)
13
  app = FastAPI()
14
 
15
  print(f"πŸ”„ Loading models")
16
 
17
+ # Load Hugging Face Models
18
  doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
19
+ vqa_pipeline = pipeline("vqa", model="Salesforce/blip-vqa-base") # VQA model for images
 
20
 
21
+ print("βœ… Models loaded")
 
22
 
23
  # Allowed File Extensions
24
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 
67
  except Exception as e:
68
  return f"❌ Error reading Excel: {str(e)}"
69
 
70
+ # Function to process documents and answer questions
71
  def answer_question_from_document(file: UploadFile, question: str):
72
  print("πŸ“‚ Processing document for QA...")
73
  validation_error = validate_file_type(file)
 
95
 
96
  return response[0]["generated_text"]
97
 
98
+ # Function to process images and answer questions (NO OCR)
99
  def answer_question_from_image(image, question):
100
  try:
101
  print("πŸ–ΌοΈ Processing image for QA...")
102
  if isinstance(image, np.ndarray): # If it's a NumPy array from Gradio
103
  image = Image.fromarray(image) # Convert to PIL Image
104
 
105
+ print("πŸ€– Answering question based on image content...")
106
+ response = vqa_pipeline(image=image, question=question)
 
 
 
107
 
108
+ return response[0]["answer"]
109
  except Exception as e:
110
  return f"❌ Error processing image: {str(e)}"
111
 
 
121
  fn=answer_question_from_image,
122
  inputs=[gr.Image(label="πŸ–ΌοΈ Upload Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
123
  outputs="text",
124
+ title="πŸ–ΌοΈ AI Image Question Answering (NO OCR)"
125
  )
126
 
127
  # Launch Gradio