Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Mar 22

Commit

85ffcb4

verified ·

1 Parent(s): 85e13cf

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -8

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
-from fastapi import FastAPI, File, UploadFile
 import pdfplumber
-import pytesseract
-from PIL import Image
 import easyocr
 import docx
 import openpyxl
@@ -21,14 +19,16 @@ app = FastAPI()
 vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
 code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
 table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
-qa_pipeline = pipeline("text-generation", model="facebook/bart-large-cnn")
 # ✅ Functions for Document & Image QA
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
         for page in pdf.pages:
-            text += page.extract_text() + "\n"
     return text.strip()
 def extract_text_from_docx(docx_file):
@@ -49,7 +49,7 @@ def extract_text_from_excel(excel_file):
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
-            text.append(" ".join(map(str, row)))
     return "\n".join(text)
 def extract_text_from_image(image_file):
@@ -74,7 +74,7 @@ def answer_question_from_document(file, question):
     if not text:
         return "No text extracted from the document."
-    response = qa_pipeline(question=question, context=text)
     return response["answer"]
 def answer_question_from_image(image, question):
@@ -82,7 +82,7 @@ def answer_question_from_image(image, question):
     if not image_text:
         return "No text detected in the image."
-    response = qa_pipeline(question=question, context=image_text)
     return response["answer"]
 # ✅ Gradio UI for Document & Image QA
@@ -124,6 +124,9 @@ def generate_visualization(excel_file, viz_type, user_request):
         else:
             generated_code = "Error: Model did not return valid code."
         try:
             exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
             exec(generated_code, exec_globals)

+from fastapi import FastAPI
 import pdfplumber
 import easyocr
 import docx
 import openpyxl
 vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
 code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
 table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
+# ✅ Corrected Question-Answering Model
+qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
 # ✅ Functions for Document & Image QA
 def extract_text_from_pdf(pdf_file):
     text = ""
     with pdfplumber.open(pdf_file) as pdf:
         for page in pdf.pages:
+            text += page.extract_text() + "\n" if page.extract_text() else ""
     return text.strip()
 def extract_text_from_docx(docx_file):
     text = []
     for sheet in wb.worksheets:
         for row in sheet.iter_rows(values_only=True):
+            text.append(" ".join([str(cell) if cell is not None else "" for cell in row]))
     return "\n".join(text)
 def extract_text_from_image(image_file):
     if not text:
         return "No text extracted from the document."
+    response = qa_pipeline({"question": question, "context": text})
     return response["answer"]
 def answer_question_from_image(image, question):
     if not image_text:
         return "No text detected in the image."
+    response = qa_pipeline({"question": question, "context": image_text})
     return response["answer"]
 # ✅ Gradio UI for Document & Image QA
         else:
             generated_code = "Error: Model did not return valid code."
+        if "plt" not in generated_code or "sns" not in generated_code:
+            return generated_code, "Generated code seems incorrect."
         try:
             exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
             exec(generated_code, exec_globals)