Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Mar 26

Commit

753db53

verified ·

1 Parent(s): 0b363e7

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -97

app.py CHANGED Viewed

@@ -1,43 +1,26 @@
-from fastapi import FastAPI, File, UploadFile
-from fastapi.responses import RedirectResponse
 import fitz  # PyMuPDF for PDF parsing
 from tika import parser  # Apache Tika for document parsing
 import openpyxl
 from pptx import Presentation
 from PIL import Image
-import torch
 from transformers import pipeline
-import gradio as gr
 import numpy as np
-# Initialize FastAPI
-app = FastAPI()
-print(f"🔄 Loading models (Running on GPU: {torch.cuda.is_available()})")
-# Load Hugging Face Models
-doc_qa_pipeline = pipeline(
-    "text-generation",
-    model="Qwen/Qwen2.5-VL-7B-Instruct",
-    device=0 if torch.cuda.is_available() else -1
-)
-image_captioning_pipeline = pipeline(
-    "image-to-text",
-    model="Salesforce/blip-image-captioning-base",
-    device=0 if torch.cuda.is_available() else -1,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    use_fast=True
-)
-print("✅ Models loaded successfully")
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 def validate_file_type(file):
-    ext = file.filename.split(".")[-1].lower()
-    print(f"🔍 Validating file type: {ext}")
     if ext not in ALLOWED_EXTENSIONS:
         return f"❌ Unsupported file format: {ext}"
     return None
@@ -45,48 +28,34 @@ def validate_file_type(file):
 # Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
-    truncated = " ".join(words[:max_tokens])
-    print(f"✂️ Truncated text to {max_tokens} tokens.")
-    return truncated
 # Document Text Extraction Functions
 def extract_text_from_pdf(pdf_bytes):
-    try:
-        print("📄 Extracting text from PDF...")
-        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
-        text = "\n".join([page.get_text("text") for page in doc])
-        return text if text else "⚠️ No text found."
-    except Exception as e:
-        return f"❌ Error reading PDF: {str(e)}"
 def extract_text_with_tika(file_bytes):
-    try:
-        print("📝 Extracting text with Tika...")
-        parsed = parser.from_buffer(file_bytes)
-        return parsed.get("content", "⚠️ No text found.").strip()
-    except Exception as e:
-        return f"❌ Error reading document: {str(e)}"
 def extract_text_from_excel(excel_bytes):
-    try:
-        print("📊 Extracting text from Excel...")
-        wb = openpyxl.load_workbook(excel_bytes, read_only=True)
-        text = []
-        for sheet in wb.worksheets:
-            for row in sheet.iter_rows(values_only=True):
-                text.append(" ".join(map(str, row)))
-        return "\n".join(text) if text else "⚠️ No text found."
-    except Exception as e:
-        return f"❌ Error reading Excel: {str(e)}"
-def answer_question_from_document(file: UploadFile, question: str):
-    print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
-    file_ext = file.filename.split(".")[-1].lower()
-    file_bytes = file.file.read()
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
@@ -101,51 +70,33 @@ def answer_question_from_document(file: UploadFile, question: str):
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
-    print("🤖 Generating response with Qwen2.5-VL-7B...")
-    response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}", max_length=100)
     return response[0]["generated_text"]
 def answer_question_from_image(image, question):
-    try:
-        print("🖼️ Processing image for QA...")
-        if isinstance(image, np.ndarray):  # If it's a NumPy array from Gradio
-            image = Image.fromarray(image)  # Convert to PIL Image
-        print("🖼️ Generating caption for image...")
-        caption = image_captioning_pipeline(image)[0]['generated_text']
-        print("🤖 Answering question based on caption with Qwen2.5-VL-7B...")
-        response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}", max_length=100)
-        return response[0]["generated_text"]
-    except Exception as e:
-        return f"❌ Error processing image: {str(e)}"
-# Gradio UI for Document & Image QA
-doc_interface = gr.Interface(
-    fn=answer_question_from_document,
-    inputs=[gr.File(label="📂 Upload Document"), gr.Textbox(label="💬 Ask a Question")],
-    outputs="text",
-    title="📄 AI Document Question Answering"
-)
-img_interface = gr.Interface(
-    fn=answer_question_from_image,
-    inputs=[gr.Image(label="🖼️ Upload Image"), gr.Textbox(label="💬 Ask a Question")],
     outputs="text",
-    title="🖼️ AI Image Question Answering"
 )
-# Mount Gradio Interfaces
-demo = gr.TabbedInterface([doc_interface, img_interface], ["📄 Document QA", "🖼️ Image QA"])
-app = gr.mount_gradio_app(app, demo, path="/")
-@app.get("/")
-def home():
-    return RedirectResponse(url="/")
-# Run FastAPI + Gradio together
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+import gradio as gr
 import fitz  # PyMuPDF for PDF parsing
 from tika import parser  # Apache Tika for document parsing
 import openpyxl
 from pptx import Presentation
 from PIL import Image
 from transformers import pipeline
+import torch
 import numpy as np
+# Load Optimized Hugging Face Models
+print("🔄 Loading models...")
+qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
+image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
+print("✅ Models loaded (Optimized for Speed)")
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
 def validate_file_type(file):
+    ext = file.name.split(".")[-1].lower()
     if ext not in ALLOWED_EXTENSIONS:
         return f"❌ Unsupported file format: {ext}"
     return None
 # Function to truncate text to 450 tokens
 def truncate_text(text, max_tokens=450):
     words = text.split()
+    return " ".join(words[:max_tokens])
 # Document Text Extraction Functions
 def extract_text_from_pdf(pdf_bytes):
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    text = "\n".join([page.get_text("text") for page in doc])
+    return text if text else "⚠️ No text found."
 def extract_text_with_tika(file_bytes):
+    parsed = parser.from_buffer(file_bytes)
+    return parsed.get("content", "⚠️ No text found.").strip()
 def extract_text_from_excel(excel_bytes):
+    wb = openpyxl.load_workbook(excel_bytes, read_only=True)
+    text = []
+    for sheet in wb.worksheets:
+        for row in sheet.iter_rows(values_only=True):
+            text.append(" ".join(map(str, row)))
+    return "\n".join(text) if text else "⚠️ No text found."
+# Function to process document and answer question
+def answer_question_from_document(file, question):
     validation_error = validate_file_type(file)
     if validation_error:
         return validation_error
+    file_ext = file.name.split(".")[-1].lower()
+    file_bytes = file.read()
     if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
+    response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
+# Function to process image and answer question
 def answer_question_from_image(image, question):
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
+    caption = image_captioning_pipeline(image)[0]['generated_text']
+    response = qa_pipeline(f"Question: {question}\nContext: {caption}")
+    return response[0]["generated_text"]
+# Gradio Interface
+interface = gr.Interface(
+    fn=lambda file, image, question: (
+        answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
+    ),
+    inputs=[
+        gr.File(label="📂 Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
+        gr.Image(label="🖼️ Upload Image", optional=True),
+        gr.Textbox(label="💬 Ask a Question")
+    ],
     outputs="text",
+    title="📄 AI Document & Image Question Answering",
+    description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content."
 )
+interface.launch()