Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Mar 26

Commit

0540355

verified ·

1 Parent(s): d74850e

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -57

app.py CHANGED Viewed

@@ -1,74 +1,83 @@
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import RedirectResponse
-import fitz  # PyMuPDF for PDF parsing
-from tika import parser  # Apache Tika for document parsing
-import openpyxl
-from pptx import Presentation
-from PIL import Image
-from transformers import pipeline
 import gradio as gr
 import numpy as np
 # Initialize FastAPI
 app = FastAPI()
-print("🔄 Loading models...")
-# Load Hugging Face Models
-qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
-image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
-print("✅ Models loaded (Optimized for Speed)")
-# Allowed File Extensions
-ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx", "jpg", "jpeg", "png"}
 def validate_file_type(file):
-    ext = file.filename.split(".")[-1].lower()
-    if ext not in ALLOWED_EXTENSIONS:
-        return f"❌ Unsupported file format: {ext}"
-    return None
-# Function to truncate text to 450 tokens
-def truncate_text(text, max_tokens=450):
-    words = text.split()
-    return " ".join(words[:max_tokens])
-# Document Text Extraction Functions
 def extract_text_from_pdf(pdf_bytes):
     doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-    text = "\n".join([page.get_text("text") for page in doc])
-    return text if text else "⚠️ No text found."
 def extract_text_with_tika(file_bytes):
-    parsed = parser.from_buffer(file_bytes)
-    return parsed.get("content", "⚠️ No text found.").strip()
-def extract_text_from_excel(excel_bytes):
-    wb = openpyxl.load_workbook(excel_bytes, read_only=True)
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
-            text.append(" ".join(map(str, row)))
-    return "\n".join(text) if text else "⚠️ No text found."
-# Function to process file (document or image) and answer question
 def answer_question(file, question: str):
     if isinstance(file, np.ndarray):
-        # Image processing
-        image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
-    # Document processing
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
-    file_ext = file.name.split(".")[-1].lower()
-    file_bytes = file.read()
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
     elif file_ext in ["docx", "pptx"]:
@@ -77,32 +86,35 @@ def answer_question(file, question: str):
         text = extract_text_from_excel(file_bytes)
     else:
         return "❌ Unsupported file format!"
     if not text:
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
-# Gradio Interface for both images & documents
-interface = gr.Interface(
-    fn=answer_question,
-    inputs=[gr.File(label="📂 Upload Document or Image"), gr.Textbox(label="💬 Ask a Question")],
-    outputs="text",
-    title="📄🖼️ AI Document & Image Question Answering"
-)
-# Mount Gradio with FastAPI
-demo = interface
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
     return RedirectResponse(url="/")
-# Run FastAPI + Gradio together
 if __name__ == "__main__":
-    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import gradio as gr
+import uvicorn
 import numpy as np
+import fitz  # PyMuPDF
+import tika
+import torch
+from fastapi import FastAPI
+from transformers import pipeline
+from PIL import Image
+from io import BytesIO
+from starlette.responses import RedirectResponse
+from tika import parser
+from openpyxl import load_workbook
+# Initialize Tika for DOCX & PPTX parsing
+tika.initVM()
 # Initialize FastAPI
 app = FastAPI()
+# Load models
+device = "cuda" if torch.cuda.is_available() else "cpu"
+qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=device)
+image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
+# ✅ Function to Validate File Type
 def validate_file_type(file):
+    if isinstance(file, str):  # Text-based input (NamedString)
+        return None
+    if hasattr(file, "name"):
+        ext = file.name.split(".")[-1].lower()
+        if ext not in ALLOWED_EXTENSIONS:
+            return f"❌ Unsupported file format: {ext}"
+        return None
+    return "❌ Invalid file format!"
+# ✅ Extract Text from PDF
 def extract_text_from_pdf(pdf_bytes):
     doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    return "\n".join([page.get_text() for page in doc])
+# ✅ Extract Text from DOCX & PPTX using Tika
 def extract_text_with_tika(file_bytes):
+    return parser.from_buffer(file_bytes)["content"]
+# ✅ Extract Text from Excel
+def extract_text_from_excel(file_bytes):
+    wb = load_workbook(BytesIO(file_bytes), data_only=True)
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
+            text.append(" ".join(str(cell) for cell in row if cell))
+    return "\n".join(text)
+# ✅ Truncate Long Text for Model
+def truncate_text(text, max_length=2048):
+    return text[:max_length] if len(text) > max_length else text
+# ✅ Answer Questions from Image or Document
 def answer_question(file, question: str):
+    # Image Processing (Gradio sends images as NumPy arrays)
     if isinstance(file, np.ndarray):
+        image = Image.fromarray(file)
         caption = image_captioning_pipeline(image)[0]['generated_text']
         response = qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
+    # Validate File
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
+    file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
+    file_bytes = file.read() if hasattr(file, "read") else None
+    if not file_bytes:
+        return "❌ Could not read file content!"
+    # Extract Text from Supported Documents
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
     elif file_ext in ["docx", "pptx"]:
         text = extract_text_from_excel(file_bytes)
     else:
         return "❌ Unsupported file format!"
     if not text:
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
     response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
+# ✅ Gradio Interface (Unified for Images & Documents)
+with gr.Blocks() as demo:
+    gr.Markdown("## 📄 AI-Powered Document & Image QA")
+    with gr.Row():
+        file_input = gr.File(label="Upload Document / Image")
+        question_input = gr.Textbox(label="Ask a Question", placeholder="What is this document about?")
+    answer_output = gr.Textbox(label="Answer")
+    submit_btn = gr.Button("Get Answer")
+    submit_btn.click(answer_question, inputs=[file_input, question_input], outputs=answer_output)
+# ✅ Mount Gradio with FastAPI
 app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
     return RedirectResponse(url="/")
+# ✅ Run FastAPI + Gradio
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=7860)