Spaces:
Sleeping
Sleeping
File size: 3,567 Bytes
753db53 2852c90 2be14bd 6e8ae10 0b363e7 753db53 0c9548a b1622cb 753db53 2be14bd 753db53 239c804 753db53 8e24199 1be9899 753db53 8e24199 d2931fe 8e24199 1be9899 c724805 753db53 2be14bd 1be9899 4c11732 753db53 2be14bd 4c11732 753db53 2be14bd 4c11732 753db53 0b363e7 4c11732 753db53 4c11732 0b363e7 4c11732 0b363e7 4c11732 0b363e7 4c11732 2be14bd d2931fe 4c11732 2be14bd d2931fe 4c11732 7e5ddc3 753db53 2852c90 2be14bd 753db53 4c11732 753db53 01cb6f1 753db53 4c11732 753db53 01cb6f1 753db53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import fitz # PyMuPDF for PDF parsing
from tika import parser # Apache Tika for document parsing
import openpyxl
from pptx import Presentation
from PIL import Image
from transformers import pipeline
import torch
import numpy as np
# Load Optimized Hugging Face Models
print("π Loading models...")
qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True)
print("β
Models loaded (Optimized for Speed)")
# Allowed File Extensions
ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
def validate_file_type(file):
ext = file.name.split(".")[-1].lower()
if ext not in ALLOWED_EXTENSIONS:
return f"β Unsupported file format: {ext}"
return None
# Function to truncate text to 450 tokens
def truncate_text(text, max_tokens=450):
words = text.split()
return " ".join(words[:max_tokens])
# Document Text Extraction Functions
def extract_text_from_pdf(pdf_bytes):
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text if text else "β οΈ No text found."
def extract_text_with_tika(file_bytes):
parsed = parser.from_buffer(file_bytes)
return parsed.get("content", "β οΈ No text found.").strip()
def extract_text_from_excel(excel_bytes):
wb = openpyxl.load_workbook(excel_bytes, read_only=True)
text = []
for sheet in wb.worksheets:
for row in sheet.iter_rows(values_only=True):
text.append(" ".join(map(str, row)))
return "\n".join(text) if text else "β οΈ No text found."
# Function to process document and answer question
def answer_question_from_document(file, question):
validation_error = validate_file_type(file)
if validation_error:
return validation_error
file_ext = file.name.split(".")[-1].lower()
file_bytes = file.read()
if file_ext == "pdf":
text = extract_text_from_pdf(file_bytes)
elif file_ext in ["docx", "pptx"]:
text = extract_text_with_tika(file_bytes)
elif file_ext == "xlsx":
text = extract_text_from_excel(file_bytes)
else:
return "β Unsupported file format!"
if not text:
return "β οΈ No text extracted from the document."
truncated_text = truncate_text(text)
response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
return response[0]["generated_text"]
# Function to process image and answer question
def answer_question_from_image(image, question):
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
caption = image_captioning_pipeline(image)[0]['generated_text']
response = qa_pipeline(f"Question: {question}\nContext: {caption}")
return response[0]["generated_text"]
# Gradio Interface
interface = gr.Interface(
fn=lambda file, image, question: (
answer_question_from_document(file, question) if file else answer_question_from_image(image, question)
),
inputs=[
gr.File(label="π Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True),
gr.Image(label="πΌοΈ Upload Image", optional=True),
gr.Textbox(label="π¬ Ask a Question")
],
outputs="text",
title="π AI Document & Image Question Answering",
description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content."
)
interface.launch()
|