Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
-
from fastapi import FastAPI
|
| 2 |
import pdfplumber
|
|
|
|
|
|
|
| 3 |
import easyocr
|
| 4 |
import docx
|
| 5 |
import openpyxl
|
|
@@ -19,16 +21,14 @@ app = FastAPI()
|
|
| 19 |
vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
|
| 20 |
code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
|
| 21 |
table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
|
| 22 |
-
|
| 23 |
-
# β
Corrected Question-Answering Model
|
| 24 |
-
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
| 25 |
|
| 26 |
# β
Functions for Document & Image QA
|
| 27 |
def extract_text_from_pdf(pdf_file):
|
| 28 |
text = ""
|
| 29 |
with pdfplumber.open(pdf_file) as pdf:
|
| 30 |
for page in pdf.pages:
|
| 31 |
-
text += page.extract_text() + "\n"
|
| 32 |
return text.strip()
|
| 33 |
|
| 34 |
def extract_text_from_docx(docx_file):
|
|
@@ -49,7 +49,7 @@ def extract_text_from_excel(excel_file):
|
|
| 49 |
text = []
|
| 50 |
for sheet in wb.worksheets:
|
| 51 |
for row in sheet.iter_rows(values_only=True):
|
| 52 |
-
text.append(" ".join(
|
| 53 |
return "\n".join(text)
|
| 54 |
|
| 55 |
def extract_text_from_image(image_file):
|
|
@@ -74,7 +74,7 @@ def answer_question_from_document(file, question):
|
|
| 74 |
if not text:
|
| 75 |
return "No text extracted from the document."
|
| 76 |
|
| 77 |
-
response = qa_pipeline(
|
| 78 |
return response["answer"]
|
| 79 |
|
| 80 |
def answer_question_from_image(image, question):
|
|
@@ -82,7 +82,7 @@ def answer_question_from_image(image, question):
|
|
| 82 |
if not image_text:
|
| 83 |
return "No text detected in the image."
|
| 84 |
|
| 85 |
-
response = qa_pipeline(
|
| 86 |
return response["answer"]
|
| 87 |
|
| 88 |
# β
Gradio UI for Document & Image QA
|
|
@@ -124,9 +124,6 @@ def generate_visualization(excel_file, viz_type, user_request):
|
|
| 124 |
else:
|
| 125 |
generated_code = "Error: Model did not return valid code."
|
| 126 |
|
| 127 |
-
if "plt" not in generated_code or "sns" not in generated_code:
|
| 128 |
-
return generated_code, "Generated code seems incorrect."
|
| 129 |
-
|
| 130 |
try:
|
| 131 |
exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
|
| 132 |
exec(generated_code, exec_globals)
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile
|
| 2 |
import pdfplumber
|
| 3 |
+
import pytesseract
|
| 4 |
+
from PIL import Image
|
| 5 |
import easyocr
|
| 6 |
import docx
|
| 7 |
import openpyxl
|
|
|
|
| 21 |
vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
|
| 22 |
code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
|
| 23 |
table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
|
| 24 |
+
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # β
FIXED MODEL
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# β
Functions for Document & Image QA
|
| 27 |
def extract_text_from_pdf(pdf_file):
|
| 28 |
text = ""
|
| 29 |
with pdfplumber.open(pdf_file) as pdf:
|
| 30 |
for page in pdf.pages:
|
| 31 |
+
text += page.extract_text() + "\n"
|
| 32 |
return text.strip()
|
| 33 |
|
| 34 |
def extract_text_from_docx(docx_file):
|
|
|
|
| 49 |
text = []
|
| 50 |
for sheet in wb.worksheets:
|
| 51 |
for row in sheet.iter_rows(values_only=True):
|
| 52 |
+
text.append(" ".join(map(str, row)))
|
| 53 |
return "\n".join(text)
|
| 54 |
|
| 55 |
def extract_text_from_image(image_file):
|
|
|
|
| 74 |
if not text:
|
| 75 |
return "No text extracted from the document."
|
| 76 |
|
| 77 |
+
response = qa_pipeline(question=question, context=text) # β
FIXED
|
| 78 |
return response["answer"]
|
| 79 |
|
| 80 |
def answer_question_from_image(image, question):
|
|
|
|
| 82 |
if not image_text:
|
| 83 |
return "No text detected in the image."
|
| 84 |
|
| 85 |
+
response = qa_pipeline(question=question, context=image_text) # β
FIXED
|
| 86 |
return response["answer"]
|
| 87 |
|
| 88 |
# β
Gradio UI for Document & Image QA
|
|
|
|
| 124 |
else:
|
| 125 |
generated_code = "Error: Model did not return valid code."
|
| 126 |
|
|
|
|
|
|
|
|
|
|
| 127 |
try:
|
| 128 |
exec_globals = {"plt": plt, "sns": sns, "pd": pd, "df": df, "io": io}
|
| 129 |
exec(generated_code, exec_globals)
|