Spaces:
Running
Running
working doc
Browse files
app.py
CHANGED
|
@@ -72,7 +72,7 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
| 72 |
@app.get("/")
|
| 73 |
def root():
|
| 74 |
return RedirectResponse(url="/")
|
| 75 |
-
|
| 76 |
import gradio as gr
|
| 77 |
import fitz # PyMuPDF for PDFs
|
| 78 |
import easyocr # OCR for images
|
|
@@ -95,7 +95,7 @@ reader = easyocr.Reader(['en', 'fr']) # OCR for English & French
|
|
| 95 |
|
| 96 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
| 97 |
def extract_text_from_pdf(pdf_file):
|
| 98 |
-
"Extract text from a PDF file.""
|
| 99 |
text = []
|
| 100 |
try:
|
| 101 |
with fitz.open(pdf_file) as doc:
|
|
@@ -106,12 +106,12 @@ def extract_text_from_pdf(pdf_file):
|
|
| 106 |
return "\n".join(text)
|
| 107 |
|
| 108 |
def extract_text_from_docx(docx_file):
|
| 109 |
-
"Extract text from a DOCX file."
|
| 110 |
doc = docx.Document(docx_file)
|
| 111 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
| 112 |
|
| 113 |
def extract_text_from_pptx(pptx_file):
|
| 114 |
-
"Extract text from a PPTX file."
|
| 115 |
text = []
|
| 116 |
try:
|
| 117 |
presentation = pptx.Presentation(pptx_file)
|
|
@@ -124,7 +124,7 @@ def extract_text_from_pptx(pptx_file):
|
|
| 124 |
return "\n".join(text)
|
| 125 |
|
| 126 |
def extract_text_from_xlsx(xlsx_file):
|
| 127 |
-
"Extract text from an XLSX file."
|
| 128 |
text = []
|
| 129 |
try:
|
| 130 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
@@ -137,13 +137,13 @@ def extract_text_from_xlsx(xlsx_file):
|
|
| 137 |
return "\n".join(text)
|
| 138 |
|
| 139 |
def extract_text_from_image(image_path):
|
| 140 |
-
"Extract text from an image using EasyOCR.""
|
| 141 |
result = reader.readtext(image_path, detail=0)
|
| 142 |
return " ".join(result) # Return text as a single string
|
| 143 |
|
| 144 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
| 145 |
def answer_question_from_doc(file, question):
|
| 146 |
-
"Process document and answer a question based on its content."
|
| 147 |
ext = file.name.split(".")[-1].lower()
|
| 148 |
|
| 149 |
if ext == "pdf":
|
|
@@ -155,10 +155,10 @@ def answer_question_from_doc(file, question):
|
|
| 155 |
elif ext == "xlsx":
|
| 156 |
context = extract_text_from_xlsx(file.name)
|
| 157 |
else:
|
| 158 |
-
return "Unsupported file format."
|
| 159 |
|
| 160 |
if not context.strip():
|
| 161 |
-
return "No text found in the document."
|
| 162 |
|
| 163 |
# Generate answer using QA pipeline correctly
|
| 164 |
try:
|
|
@@ -168,10 +168,10 @@ def answer_question_from_doc(file, question):
|
|
| 168 |
return f"Error generating answer: {e}"
|
| 169 |
|
| 170 |
def answer_question_from_image(image, question):
|
| 171 |
-
"Process an image, extract text, and answer a question.""
|
| 172 |
img_text = extract_text_from_image(image)
|
| 173 |
if not img_text.strip():
|
| 174 |
-
return "No readable text found in the image."
|
| 175 |
|
| 176 |
try:
|
| 177 |
result = qa_model({"question": question, "context": img_text})
|
|
@@ -203,146 +203,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
| 203 |
@app.get("/")
|
| 204 |
def home():
|
| 205 |
return RedirectResponse(url="/")
|
| 206 |
-
"""
|
| 207 |
-
from fastapi import FastAPI
|
| 208 |
-
from fastapi.responses import RedirectResponse
|
| 209 |
-
import gradio as gr
|
| 210 |
-
|
| 211 |
-
import fitz # PyMuPDF for PDFs
|
| 212 |
-
import easyocr # OCR for images
|
| 213 |
-
import openpyxl # XLSX processing
|
| 214 |
-
import pptx # PPTX processing
|
| 215 |
-
import docx # DOCX processing
|
| 216 |
-
|
| 217 |
-
from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering
|
| 218 |
-
from PIL import Image
|
| 219 |
-
import torch
|
| 220 |
-
|
| 221 |
-
# === Initialize FastAPI App ===
|
| 222 |
-
app = FastAPI()
|
| 223 |
-
|
| 224 |
-
# === Initialize QA Model for Documents and OCR ===
|
| 225 |
-
qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
|
| 226 |
-
|
| 227 |
-
# === Initialize Image QA Model (VQA) ===
|
| 228 |
-
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
| 229 |
-
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
| 230 |
-
|
| 231 |
-
# === OCR Reader ===
|
| 232 |
-
reader = easyocr.Reader(['en', 'fr'])
|
| 233 |
-
|
| 234 |
-
# === Document Text Extraction Functions ===
|
| 235 |
-
def extract_text_from_pdf(file_obj):
|
| 236 |
-
doc = fitz.open(stream=file_obj.read(), filetype="pdf")
|
| 237 |
-
return "\n".join([page.get_text() for page in doc])
|
| 238 |
-
|
| 239 |
-
def extract_text_from_docx(docx_file):
|
| 240 |
-
doc = docx.Document(docx_file)
|
| 241 |
-
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
| 242 |
-
|
| 243 |
-
def extract_text_from_pptx(pptx_file):
|
| 244 |
-
text = []
|
| 245 |
-
try:
|
| 246 |
-
presentation = pptx.Presentation(pptx_file)
|
| 247 |
-
for slide in presentation.slides:
|
| 248 |
-
for shape in slide.shapes:
|
| 249 |
-
if hasattr(shape, "text"):
|
| 250 |
-
text.append(shape.text)
|
| 251 |
-
except Exception as e:
|
| 252 |
-
return f"Error reading PPTX: {e}"
|
| 253 |
-
return "\n".join(text)
|
| 254 |
-
|
| 255 |
-
def extract_text_from_xlsx(xlsx_file):
|
| 256 |
-
text = []
|
| 257 |
-
try:
|
| 258 |
-
wb = openpyxl.load_workbook(xlsx_file)
|
| 259 |
-
for sheet in wb.sheetnames:
|
| 260 |
-
ws = wb[sheet]
|
| 261 |
-
for row in ws.iter_rows(values_only=True):
|
| 262 |
-
text.append(" ".join(str(cell) for cell in row if cell))
|
| 263 |
-
except Exception as e:
|
| 264 |
-
return f"Error reading XLSX: {e}"
|
| 265 |
-
return "\n".join(text)
|
| 266 |
-
|
| 267 |
-
# === Image OCR ===
|
| 268 |
-
def extract_text_from_image(image_path):
|
| 269 |
-
result = reader.readtext(image_path, detail=0)
|
| 270 |
-
return " ".join(result)
|
| 271 |
-
|
| 272 |
-
# === QA for Document Files ===
|
| 273 |
-
def answer_question_from_doc(file, question):
|
| 274 |
-
if file is None or not question.strip():
|
| 275 |
-
return "Please upload a document and ask a question."
|
| 276 |
-
|
| 277 |
-
ext = file.name.split(".")[-1].lower()
|
| 278 |
-
if ext == "pdf":
|
| 279 |
-
context = extract_text_from_pdf(file)
|
| 280 |
-
elif ext == "docx":
|
| 281 |
-
context = extract_text_from_docx(file)
|
| 282 |
-
elif ext == "pptx":
|
| 283 |
-
context = extract_text_from_pptx(file)
|
| 284 |
-
elif ext == "xlsx":
|
| 285 |
-
context = extract_text_from_xlsx(file)
|
| 286 |
-
else:
|
| 287 |
-
return "Unsupported file format."
|
| 288 |
-
|
| 289 |
-
if not context.strip():
|
| 290 |
-
return "No text found in the document."
|
| 291 |
-
|
| 292 |
-
try:
|
| 293 |
-
result = qa_model({"question": question, "context": context})
|
| 294 |
-
return result["answer"]
|
| 295 |
-
except Exception as e:
|
| 296 |
-
return f"Error generating answer: {e}"
|
| 297 |
-
|
| 298 |
-
# === QA for Images using EasyOCR and QA model ===
|
| 299 |
-
def answer_question_from_image_text(image, question):
|
| 300 |
-
img_text = extract_text_from_image(image)
|
| 301 |
-
if not img_text.strip():
|
| 302 |
-
return "No readable text found in the image."
|
| 303 |
-
try:
|
| 304 |
-
result = qa_model({"question": question, "context": img_text})
|
| 305 |
-
return result["answer"]
|
| 306 |
-
except Exception as e:
|
| 307 |
-
return f"Error generating answer: {e}"
|
| 308 |
|
| 309 |
-
# === QA for Images using ViLT (Visual QA Model) ===
|
| 310 |
-
def answer_question_from_image_visual(image, question):
|
| 311 |
-
if image is None or not question.strip():
|
| 312 |
-
return "Please upload an image and ask a question."
|
| 313 |
-
inputs = vqa_processor(image, question, return_tensors="pt")
|
| 314 |
-
with torch.no_grad():
|
| 315 |
-
outputs = vqa_model(**inputs)
|
| 316 |
-
predicted_id = outputs.logits.argmax(-1).item()
|
| 317 |
-
return vqa_model.config.id2label[predicted_id]
|
| 318 |
-
|
| 319 |
-
# === Gradio Interfaces ===
|
| 320 |
-
with gr.Blocks() as doc_interface:
|
| 321 |
-
gr.Markdown("## 📄 Document Question Answering")
|
| 322 |
-
file_input = gr.File(label="Upload DOCX, PPTX, XLSX, or PDF")
|
| 323 |
-
question_input = gr.Textbox(label="Ask a Question")
|
| 324 |
-
answer_output = gr.Textbox(label="Answer")
|
| 325 |
-
file_submit = gr.Button("Get Answer")
|
| 326 |
-
file_submit.click(fn=answer_question_from_doc, inputs=[file_input, question_input], outputs=answer_output)
|
| 327 |
-
|
| 328 |
-
with gr.Blocks() as image_interface:
|
| 329 |
-
gr.Markdown("## 🖼️ Image Question Answering (OCR + VQA)")
|
| 330 |
-
with gr.Tabs():
|
| 331 |
-
with gr.TabItem("OCR-based Image QA"):
|
| 332 |
-
image_input = gr.Image(label="Upload Image")
|
| 333 |
-
img_question_input = gr.Textbox(label="Ask a Question")
|
| 334 |
-
img_answer_output = gr.Textbox(label="Answer")
|
| 335 |
-
gr.Button("Get Answer").click(fn=answer_question_from_image_text, inputs=[image_input, img_question_input], outputs=img_answer_output)
|
| 336 |
-
with gr.TabItem("Visual QA (ViLT)"):
|
| 337 |
-
image_input_vqa = gr.Image(label="Upload Image")
|
| 338 |
-
vqa_question_input = gr.Textbox(label="Ask a Question")
|
| 339 |
-
vqa_answer_output = gr.Textbox(label="Answer")
|
| 340 |
-
gr.Button("Get Answer").click(fn=answer_question_from_image_visual, inputs=[image_input_vqa, vqa_question_input], outputs=vqa_answer_output)
|
| 341 |
-
|
| 342 |
-
# === Mount Gradio on FastAPI ===
|
| 343 |
-
demo = gr.TabbedInterface([doc_interface, image_interface], ["Document QA", "Image QA"])
|
| 344 |
-
app = gr.mount_gradio_app(app, demo, path="/")
|
| 345 |
-
|
| 346 |
-
@app.get("/")
|
| 347 |
-
def root():
|
| 348 |
-
return RedirectResponse(url="/")
|
|
|
|
| 72 |
@app.get("/")
|
| 73 |
def root():
|
| 74 |
return RedirectResponse(url="/")
|
| 75 |
+
"""
|
| 76 |
import gradio as gr
|
| 77 |
import fitz # PyMuPDF for PDFs
|
| 78 |
import easyocr # OCR for images
|
|
|
|
| 95 |
|
| 96 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
| 97 |
def extract_text_from_pdf(pdf_file):
|
| 98 |
+
"""Extract text from a PDF file."""
|
| 99 |
text = []
|
| 100 |
try:
|
| 101 |
with fitz.open(pdf_file) as doc:
|
|
|
|
| 106 |
return "\n".join(text)
|
| 107 |
|
| 108 |
def extract_text_from_docx(docx_file):
|
| 109 |
+
"""Extract text from a DOCX file."""
|
| 110 |
doc = docx.Document(docx_file)
|
| 111 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
| 112 |
|
| 113 |
def extract_text_from_pptx(pptx_file):
|
| 114 |
+
"""Extract text from a PPTX file."""
|
| 115 |
text = []
|
| 116 |
try:
|
| 117 |
presentation = pptx.Presentation(pptx_file)
|
|
|
|
| 124 |
return "\n".join(text)
|
| 125 |
|
| 126 |
def extract_text_from_xlsx(xlsx_file):
|
| 127 |
+
"""Extract text from an XLSX file."""
|
| 128 |
text = []
|
| 129 |
try:
|
| 130 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
|
|
| 137 |
return "\n".join(text)
|
| 138 |
|
| 139 |
def extract_text_from_image(image_path):
|
| 140 |
+
"""Extract text from an image using EasyOCR."""
|
| 141 |
result = reader.readtext(image_path, detail=0)
|
| 142 |
return " ".join(result) # Return text as a single string
|
| 143 |
|
| 144 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
| 145 |
def answer_question_from_doc(file, question):
|
| 146 |
+
"""Process document and answer a question based on its content."""
|
| 147 |
ext = file.name.split(".")[-1].lower()
|
| 148 |
|
| 149 |
if ext == "pdf":
|
|
|
|
| 155 |
elif ext == "xlsx":
|
| 156 |
context = extract_text_from_xlsx(file.name)
|
| 157 |
else:
|
| 158 |
+
return """Unsupported file format."""
|
| 159 |
|
| 160 |
if not context.strip():
|
| 161 |
+
return """No text found in the document."""
|
| 162 |
|
| 163 |
# Generate answer using QA pipeline correctly
|
| 164 |
try:
|
|
|
|
| 168 |
return f"Error generating answer: {e}"
|
| 169 |
|
| 170 |
def answer_question_from_image(image, question):
|
| 171 |
+
"""Process an image, extract text, and answer a question."""
|
| 172 |
img_text = extract_text_from_image(image)
|
| 173 |
if not img_text.strip():
|
| 174 |
+
return """No readable text found in the image."""
|
| 175 |
|
| 176 |
try:
|
| 177 |
result = qa_model({"question": question, "context": img_text})
|
|
|
|
| 203 |
@app.get("/")
|
| 204 |
def home():
|
| 205 |
return RedirectResponse(url="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|