Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Apr 5

Commit

811b0b3

verified ·

1 Parent(s): f23d324

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -45

app.py CHANGED Viewed

@@ -1,27 +1,17 @@
-import gradio as gr
 import fitz  # PyMuPDF for PDFs
 import easyocr  # OCR for images
 import openpyxl  # XLSX processing
 import pptx  # PPTX processing
 import docx  # DOCX processing
-import json  # Exporting results
-from deep_translator import GoogleTranslator
 from transformers import pipeline
-from fastapi import FastAPI
-from starlette.responses import RedirectResponse
-# Initialize FastAPI app
-app = FastAPI()
 # Initialize AI Models
 qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
-image_captioning = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 reader = easyocr.Reader(['en', 'fr'])  # OCR for English & French
 # ---- TEXT EXTRACTION FUNCTIONS ----
 def extract_text_from_pdf(pdf_file):
-    """Extract text from a PDF file."""
     text = []
     try:
         with fitz.open(pdf_file) as doc:
@@ -32,12 +22,10 @@ def extract_text_from_pdf(pdf_file):
     return "\n".join(text)
 def extract_text_from_docx(docx_file):
-    """Extract text from a DOCX file."""
     doc = docx.Document(docx_file)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 def extract_text_from_pptx(pptx_file):
-    """Extract text from a PPTX file."""
     text = []
     try:
         presentation = pptx.Presentation(pptx_file)
@@ -50,7 +38,6 @@ def extract_text_from_pptx(pptx_file):
     return "\n".join(text)
 def extract_text_from_xlsx(xlsx_file):
-    """Extract text from an XLSX file."""
     text = []
     try:
         wb = openpyxl.load_workbook(xlsx_file)
@@ -62,12 +49,10 @@ def extract_text_from_xlsx(xlsx_file):
         return f"Error reading XLSX: {e}"
     return "\n".join(text)
-# ---- MAIN PROCESSING FUNCTIONS ----
 def answer_question_from_doc(file, question):
-    """Process document and answer a question based on its content."""
     ext = file.name.split(".")[-1].lower()
     if ext == "pdf":
         context = extract_text_from_pdf(file.name)
     elif ext == "docx":
@@ -77,38 +62,13 @@ def answer_question_from_doc(file, question):
     elif ext == "xlsx":
         context = extract_text_from_xlsx(file.name)
     else:
-        return """Unsupported file format."""
     if not context.strip():
-        return """No text found in the document."""
-    # Generate answer using QA pipeline correctly
     try:
         result = qa_model({"question": question, "context": context})
         return result["answer"]
     except Exception as e:
         return f"Error generating answer: {e}"
-    try:
-        result = qa_model({"question": question, "context": img_text})
-        return result["answer"]
-    except Exception as e:
-        return f"Error generating answer: {e}"
-with gr.Blocks() as img_interface:
-    gr.Markdown("## 🖼️ Image Question Answering")
-    image_input = gr.Image(label="Upload an Image")
-    img_question_input = gr.Textbox(label="Ask a question")
-    img_answer_output = gr.Textbox(label="Answer")
-    image_submit = gr.Button("Get Answer")
-    image_submit.click(answer_question_from_image, inputs=[image_input, img_question_input], outputs=img_answer_output)
-# ---- MOUNT GRADIO APP ----
-demo = gr.TabbedInterface(img_interface, "Image QA")
-app = gr.mount_gradio_app(app, demo, path="/")
-@app.get("/")
-def home():
-    return RedirectResponse(url="/")

+# app.py
 import fitz  # PyMuPDF for PDFs
 import easyocr  # OCR for images
 import openpyxl  # XLSX processing
 import pptx  # PPTX processing
 import docx  # DOCX processing
 from transformers import pipeline
 # Initialize AI Models
 qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
 reader = easyocr.Reader(['en', 'fr'])  # OCR for English & French
 # ---- TEXT EXTRACTION FUNCTIONS ----
 def extract_text_from_pdf(pdf_file):
     text = []
     try:
         with fitz.open(pdf_file) as doc:
     return "\n".join(text)
 def extract_text_from_docx(docx_file):
     doc = docx.Document(docx_file)
     return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
 def extract_text_from_pptx(pptx_file):
     text = []
     try:
         presentation = pptx.Presentation(pptx_file)
     return "\n".join(text)
 def extract_text_from_xlsx(xlsx_file):
     text = []
     try:
         wb = openpyxl.load_workbook(xlsx_file)
         return f"Error reading XLSX: {e}"
     return "\n".join(text)
+# ---- MAIN QA FUNCTION ----
 def answer_question_from_doc(file, question):
     ext = file.name.split(".")[-1].lower()
     if ext == "pdf":
         context = extract_text_from_pdf(file.name)
     elif ext == "docx":
     elif ext == "xlsx":
         context = extract_text_from_xlsx(file.name)
     else:
+        return "Unsupported file format."
     if not context.strip():
+        return "No text found in the document."
     try:
         result = qa_model({"question": question, "context": context})
         return result["answer"]
     except Exception as e:
         return f"Error generating answer: {e}"