Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
|
| 2 |
-
|
| 3 |
from fastapi import FastAPI
|
| 4 |
from fastapi.responses import RedirectResponse
|
| 5 |
import gradio as gr
|
|
@@ -93,7 +93,7 @@ reader = easyocr.Reader(['en', 'fr']) # OCR for English & French
|
|
| 93 |
|
| 94 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
| 95 |
def extract_text_from_pdf(pdf_file):
|
| 96 |
-
"
|
| 97 |
text = []
|
| 98 |
try:
|
| 99 |
with fitz.open(pdf_file) as doc:
|
|
@@ -104,12 +104,12 @@ def extract_text_from_pdf(pdf_file):
|
|
| 104 |
return "\n".join(text)
|
| 105 |
|
| 106 |
def extract_text_from_docx(docx_file):
|
| 107 |
-
"
|
| 108 |
doc = docx.Document(docx_file)
|
| 109 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
| 110 |
|
| 111 |
def extract_text_from_pptx(pptx_file):
|
| 112 |
-
"
|
| 113 |
text = []
|
| 114 |
try:
|
| 115 |
presentation = pptx.Presentation(pptx_file)
|
|
@@ -122,7 +122,7 @@ def extract_text_from_pptx(pptx_file):
|
|
| 122 |
return "\n".join(text)
|
| 123 |
|
| 124 |
def extract_text_from_xlsx(xlsx_file):
|
| 125 |
-
"
|
| 126 |
text = []
|
| 127 |
try:
|
| 128 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
@@ -135,13 +135,13 @@ def extract_text_from_xlsx(xlsx_file):
|
|
| 135 |
return "\n".join(text)
|
| 136 |
|
| 137 |
def extract_text_from_image(image_path):
|
| 138 |
-
"
|
| 139 |
result = reader.readtext(image_path, detail=0)
|
| 140 |
return " ".join(result) # Return text as a single string
|
| 141 |
|
| 142 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
| 143 |
def answer_question_from_doc(file, question):
|
| 144 |
-
"
|
| 145 |
ext = file.name.split(".")[-1].lower()
|
| 146 |
|
| 147 |
if ext == "pdf":
|
|
@@ -166,7 +166,7 @@ def answer_question_from_doc(file, question):
|
|
| 166 |
return f"Error generating answer: {e}"
|
| 167 |
|
| 168 |
def answer_question_from_image(image, question):
|
| 169 |
-
"
|
| 170 |
img_text = extract_text_from_image(image)
|
| 171 |
if not img_text.strip():
|
| 172 |
return "No readable text found in the image."
|
|
@@ -201,3 +201,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
|
|
| 201 |
@app.get("/")
|
| 202 |
def home():
|
| 203 |
return RedirectResponse(url="/")
|
|
|
|
|
|
| 1 |
|
| 2 |
+
|
| 3 |
from fastapi import FastAPI
|
| 4 |
from fastapi.responses import RedirectResponse
|
| 5 |
import gradio as gr
|
|
|
|
| 93 |
|
| 94 |
# ---- TEXT EXTRACTION FUNCTIONS ----
|
| 95 |
def extract_text_from_pdf(pdf_file):
|
| 96 |
+
"Extract text from a PDF file.""
|
| 97 |
text = []
|
| 98 |
try:
|
| 99 |
with fitz.open(pdf_file) as doc:
|
|
|
|
| 104 |
return "\n".join(text)
|
| 105 |
|
| 106 |
def extract_text_from_docx(docx_file):
|
| 107 |
+
"Extract text from a DOCX file."
|
| 108 |
doc = docx.Document(docx_file)
|
| 109 |
return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
|
| 110 |
|
| 111 |
def extract_text_from_pptx(pptx_file):
|
| 112 |
+
"Extract text from a PPTX file."
|
| 113 |
text = []
|
| 114 |
try:
|
| 115 |
presentation = pptx.Presentation(pptx_file)
|
|
|
|
| 122 |
return "\n".join(text)
|
| 123 |
|
| 124 |
def extract_text_from_xlsx(xlsx_file):
|
| 125 |
+
"Extract text from an XLSX file."
|
| 126 |
text = []
|
| 127 |
try:
|
| 128 |
wb = openpyxl.load_workbook(xlsx_file)
|
|
|
|
| 135 |
return "\n".join(text)
|
| 136 |
|
| 137 |
def extract_text_from_image(image_path):
|
| 138 |
+
"Extract text from an image using EasyOCR.""
|
| 139 |
result = reader.readtext(image_path, detail=0)
|
| 140 |
return " ".join(result) # Return text as a single string
|
| 141 |
|
| 142 |
# ---- MAIN PROCESSING FUNCTIONS ----
|
| 143 |
def answer_question_from_doc(file, question):
|
| 144 |
+
"Process document and answer a question based on its content."
|
| 145 |
ext = file.name.split(".")[-1].lower()
|
| 146 |
|
| 147 |
if ext == "pdf":
|
|
|
|
| 166 |
return f"Error generating answer: {e}"
|
| 167 |
|
| 168 |
def answer_question_from_image(image, question):
|
| 169 |
+
"Process an image, extract text, and answer a question.""
|
| 170 |
img_text = extract_text_from_image(image)
|
| 171 |
if not img_text.strip():
|
| 172 |
return "No readable text found in the image."
|
|
|
|
| 201 |
@app.get("/")
|
| 202 |
def home():
|
| 203 |
return RedirectResponse(url="/")
|
| 204 |
+
"""
|