ikraamkb commited on
Commit
d74850e
Β·
verified Β·
1 Parent(s): 753db53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -29
app.py CHANGED
@@ -1,26 +1,30 @@
1
- import gradio as gr
 
2
  import fitz # PyMuPDF for PDF parsing
3
  from tika import parser # Apache Tika for document parsing
4
  import openpyxl
5
  from pptx import Presentation
6
  from PIL import Image
7
  from transformers import pipeline
8
- import torch
9
  import numpy as np
10
 
11
- # Load Optimized Hugging Face Models
 
 
12
  print("πŸ”„ Loading models...")
13
 
 
14
  qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
15
  image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
16
 
17
  print("βœ… Models loaded (Optimized for Speed)")
18
 
19
  # Allowed File Extensions
20
- ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
21
 
22
  def validate_file_type(file):
23
- ext = file.name.split(".")[-1].lower()
24
  if ext not in ALLOWED_EXTENSIONS:
25
  return f"❌ Unsupported file format: {ext}"
26
  return None
@@ -48,8 +52,16 @@ def extract_text_from_excel(excel_bytes):
48
  text.append(" ".join(map(str, row)))
49
  return "\n".join(text) if text else "⚠️ No text found."
50
 
51
- # Function to process document and answer question
52
- def answer_question_from_document(file, question):
 
 
 
 
 
 
 
 
53
  validation_error = validate_file_type(file)
54
  if validation_error:
55
  return validation_error
@@ -74,29 +86,23 @@ def answer_question_from_document(file, question):
74
 
75
  return response[0]["generated_text"]
76
 
77
- # Function to process image and answer question
78
- def answer_question_from_image(image, question):
79
- if isinstance(image, np.ndarray):
80
- image = Image.fromarray(image)
81
-
82
- caption = image_captioning_pipeline(image)[0]['generated_text']
83
- response = qa_pipeline(f"Question: {question}\nContext: {caption}")
84
-
85
- return response[0]["generated_text"]
86
-
87
- # Gradio Interface
88
  interface = gr.Interface(
89
- fn=lambda file, image, question: (
90
- answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
91
- ),
92
- inputs=[
93
- gr.File(label="πŸ“‚ Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
94
- gr.Image(label="πŸ–ΌοΈ Upload Image", optional=True),
95
- gr.Textbox(label="πŸ’¬ Ask a Question")
96
- ],
97
  outputs="text",
98
- title="πŸ“„ AI Document & Image Question Answering",
99
- description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content."
100
  )
101
 
102
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from fastapi.responses import RedirectResponse
3
  import fitz # PyMuPDF for PDF parsing
4
  from tika import parser # Apache Tika for document parsing
5
  import openpyxl
6
  from pptx import Presentation
7
  from PIL import Image
8
  from transformers import pipeline
9
+ import gradio as gr
10
  import numpy as np
11
 
12
+ # Initialize FastAPI
13
+ app = FastAPI()
14
+
15
  print("πŸ”„ Loading models...")
16
 
17
+ # Load Hugging Face Models
18
  qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
19
  image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
20
 
21
  print("βœ… Models loaded (Optimized for Speed)")
22
 
23
  # Allowed File Extensions
24
+ ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx", "jpg", "jpeg", "png"}
25
 
26
  def validate_file_type(file):
27
+ ext = file.filename.split(".")[-1].lower()
28
  if ext not in ALLOWED_EXTENSIONS:
29
  return f"❌ Unsupported file format: {ext}"
30
  return None
 
52
  text.append(" ".join(map(str, row)))
53
  return "\n".join(text) if text else "⚠️ No text found."
54
 
55
+ # Function to process file (document or image) and answer question
56
+ def answer_question(file, question: str):
57
+ if isinstance(file, np.ndarray):
58
+ # Image processing
59
+ image = Image.fromarray(file)
60
+ caption = image_captioning_pipeline(image)[0]['generated_text']
61
+ response = qa_pipeline(f"Question: {question}\nContext: {caption}")
62
+ return response[0]["generated_text"]
63
+
64
+ # Document processing
65
  validation_error = validate_file_type(file)
66
  if validation_error:
67
  return validation_error
 
86
 
87
  return response[0]["generated_text"]
88
 
89
+ # Gradio Interface for both images & documents
 
 
 
 
 
 
 
 
 
 
90
  interface = gr.Interface(
91
+ fn=answer_question,
92
+ inputs=[gr.File(label="πŸ“‚ Upload Document or Image"), gr.Textbox(label="πŸ’¬ Ask a Question")],
 
 
 
 
 
 
93
  outputs="text",
94
+ title="πŸ“„πŸ–ΌοΈ AI Document & Image Question Answering"
 
95
  )
96
 
97
+ # Mount Gradio with FastAPI
98
+ demo = interface
99
+ app = gr.mount_gradio_app(app, demo, path="/")
100
+
101
+ @app.get("/")
102
+ def home():
103
+ return RedirectResponse(url="/")
104
+
105
+ # Run FastAPI + Gradio together
106
+ if __name__ == "__main__":
107
+ import uvicorn
108
+ uvicorn.run(app, host="0.0.0.0", port=7860)