Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -197,7 +197,7 @@ def truncate_text(text, max_length=2048):
|
|
| 197 |
# β
Answer Questions from Image or Document
|
| 198 |
def answer_question(file, question: str):
|
| 199 |
try:
|
| 200 |
-
# β
Image
|
| 201 |
if isinstance(file, np.ndarray):
|
| 202 |
image = Image.fromarray(file)
|
| 203 |
caption = image_captioning_pipeline(image)[0]['generated_text']
|
|
@@ -209,26 +209,31 @@ def answer_question(file, question: str):
|
|
| 209 |
if validation_error:
|
| 210 |
return validation_error
|
| 211 |
|
| 212 |
-
# β
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
file_bytes = file
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
else:
|
| 218 |
-
|
| 219 |
|
| 220 |
-
# β
|
| 221 |
file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
|
| 222 |
|
| 223 |
-
# β
Extract Text from Supported Documents
|
| 224 |
-
text = None
|
| 225 |
if file_ext == "pdf":
|
| 226 |
text = extract_text_from_pdf(file_bytes)
|
| 227 |
elif file_ext in ["docx", "pptx"]:
|
| 228 |
text = extract_text_with_tika(file_bytes)
|
| 229 |
elif file_ext == "xlsx":
|
| 230 |
text = extract_text_from_excel(file_bytes)
|
|
|
|
|
|
|
| 231 |
|
|
|
|
| 232 |
if not text or "β" in text:
|
| 233 |
return f"β οΈ No text extracted. Error: {text}"
|
| 234 |
|
|
@@ -238,7 +243,7 @@ def answer_question(file, question: str):
|
|
| 238 |
return response[0]["generated_text"]
|
| 239 |
|
| 240 |
except Exception as e:
|
| 241 |
-
print(f"β General Processing Error: {e}") # Log error
|
| 242 |
return f"β Processing Error: {str(e)}"
|
| 243 |
|
| 244 |
# β
Gradio Interface (Unified for Images & Documents)
|
|
|
|
| 197 |
# β
Answer Questions from Image or Document
|
| 198 |
def answer_question(file, question: str):
|
| 199 |
try:
|
| 200 |
+
# β
Handle Image Files
|
| 201 |
if isinstance(file, np.ndarray):
|
| 202 |
image = Image.fromarray(file)
|
| 203 |
caption = image_captioning_pipeline(image)[0]['generated_text']
|
|
|
|
| 209 |
if validation_error:
|
| 210 |
return validation_error
|
| 211 |
|
| 212 |
+
# β
Extract File Bytes Correctly
|
| 213 |
+
file_bytes = None
|
| 214 |
+
|
| 215 |
+
if isinstance(file, bytes):
|
| 216 |
+
file_bytes = file # Directly received bytes
|
| 217 |
+
elif hasattr(file, "read"):
|
| 218 |
+
file_bytes = file.read() # Read bytes from file object
|
| 219 |
+
elif isinstance(file, str):
|
| 220 |
+
return "β Error: File received as a string, expected binary data!"
|
| 221 |
else:
|
| 222 |
+
return f"β Unexpected file type received! Type: {type(file)}"
|
| 223 |
|
| 224 |
+
# β
Extract Text Based on File Extension
|
| 225 |
file_ext = file.name.split(".")[-1].lower() if hasattr(file, "name") else None
|
| 226 |
|
|
|
|
|
|
|
| 227 |
if file_ext == "pdf":
|
| 228 |
text = extract_text_from_pdf(file_bytes)
|
| 229 |
elif file_ext in ["docx", "pptx"]:
|
| 230 |
text = extract_text_with_tika(file_bytes)
|
| 231 |
elif file_ext == "xlsx":
|
| 232 |
text = extract_text_from_excel(file_bytes)
|
| 233 |
+
else:
|
| 234 |
+
return f"β Unsupported file format: {file_ext}"
|
| 235 |
|
| 236 |
+
# β
Validate Extraction
|
| 237 |
if not text or "β" in text:
|
| 238 |
return f"β οΈ No text extracted. Error: {text}"
|
| 239 |
|
|
|
|
| 243 |
return response[0]["generated_text"]
|
| 244 |
|
| 245 |
except Exception as e:
|
| 246 |
+
print(f"β General Processing Error: {e}") # Log error to console
|
| 247 |
return f"β Processing Error: {str(e)}"
|
| 248 |
|
| 249 |
# β
Gradio Interface (Unified for Images & Documents)
|