Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import fitz # PyMuPDF for PDF parsing | |
| from tika import parser # Apache Tika for document parsing | |
| import openpyxl | |
| from pptx import Presentation | |
| from PIL import Image | |
| from transformers import pipeline | |
| import torch | |
| import numpy as np | |
| # Load Optimized Hugging Face Models | |
| print("π Loading models...") | |
| qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1) | |
| image_captioning_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=-1, use_fast=True) | |
| print("β Models loaded (Optimized for Speed)") | |
| # Allowed File Extensions | |
| ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"} | |
| def validate_file_type(file): | |
| ext = file.name.split(".")[-1].lower() | |
| if ext not in ALLOWED_EXTENSIONS: | |
| return f"β Unsupported file format: {ext}" | |
| return None | |
| # Function to truncate text to 450 tokens | |
| def truncate_text(text, max_tokens=450): | |
| words = text.split() | |
| return " ".join(words[:max_tokens]) | |
| # Document Text Extraction Functions | |
| def extract_text_from_pdf(pdf_bytes): | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| text = "\n".join([page.get_text("text") for page in doc]) | |
| return text if text else "β οΈ No text found." | |
| def extract_text_with_tika(file_bytes): | |
| parsed = parser.from_buffer(file_bytes) | |
| return parsed.get("content", "β οΈ No text found.").strip() | |
| def extract_text_from_excel(excel_bytes): | |
| wb = openpyxl.load_workbook(excel_bytes, read_only=True) | |
| text = [] | |
| for sheet in wb.worksheets: | |
| for row in sheet.iter_rows(values_only=True): | |
| text.append(" ".join(map(str, row))) | |
| return "\n".join(text) if text else "β οΈ No text found." | |
| # Function to process document and answer question | |
| def answer_question_from_document(file, question): | |
| validation_error = validate_file_type(file) | |
| if validation_error: | |
| return validation_error | |
| file_ext = file.name.split(".")[-1].lower() | |
| file_bytes = file.read() | |
| if file_ext == "pdf": | |
| text = extract_text_from_pdf(file_bytes) | |
| elif file_ext in ["docx", "pptx"]: | |
| text = extract_text_with_tika(file_bytes) | |
| elif file_ext == "xlsx": | |
| text = extract_text_from_excel(file_bytes) | |
| else: | |
| return "β Unsupported file format!" | |
| if not text: | |
| return "β οΈ No text extracted from the document." | |
| truncated_text = truncate_text(text) | |
| response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}") | |
| return response[0]["generated_text"] | |
| # Function to process image and answer question | |
| def answer_question_from_image(image, question): | |
| if isinstance(image, np.ndarray): | |
| image = Image.fromarray(image) | |
| caption = image_captioning_pipeline(image)[0]['generated_text'] | |
| response = qa_pipeline(f"Question: {question}\nContext: {caption}") | |
| return response[0]["generated_text"] | |
| # Gradio Interface | |
| interface = gr.Interface( | |
| fn=lambda file, image, question: ( | |
| answer_question_from_document(file, question) if file else answer_question_from_image(image, question) | |
| ), | |
| inputs=[ | |
| gr.File(label="π Upload Document (PDF, DOCX, PPTX, XLSX)", optional=True), | |
| gr.Image(label="πΌοΈ Upload Image", optional=True), | |
| gr.Textbox(label="π¬ Ask a Question") | |
| ], | |
| outputs="text", | |
| title="π AI Document & Image Question Answering", | |
| description="Upload a **document** (PDF, DOCX, PPTX, XLSX) or an **image**, then ask a question about its content." | |
| ) | |
| interface.launch() | |