Spaces:
Running
Running
| """ | |
| from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse | |
| import gradio as gr | |
| from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM | |
| from PIL import Image | |
| import torch | |
| import fitz # PyMuPDF for PDF | |
| app = FastAPI() | |
| # ========== Document QA Setup ========== | |
| doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") | |
| def read_pdf(file): | |
| doc = fitz.open(stream=file.read(), filetype="pdf") | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def answer_question_from_doc(file, question): | |
| if file is None or not question.strip(): | |
| return "Please upload a document and ask a question." | |
| text = read_pdf(file) | |
| prompt = f"Context: {text}\nQuestion: {question}\nAnswer:" | |
| inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) | |
| with torch.no_grad(): | |
| outputs = doc_model.generate(**inputs, max_new_tokens=100) | |
| answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return answer.split("Answer:")[-1].strip() | |
| # ========== Image QA Setup ========== | |
| vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") | |
| def answer_question_from_image(image, question): | |
| if image is None or not question.strip(): | |
| return "Please upload an image and ask a question." | |
| inputs = vqa_processor(image, question, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = vqa_model(**inputs) | |
| predicted_id = outputs.logits.argmax(-1).item() | |
| return vqa_model.config.id2label[predicted_id] | |
| # ========== Gradio Interfaces ========== | |
| doc_interface = gr.Interface( | |
| fn=answer_question_from_doc, | |
| inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")], | |
| outputs="text", | |
| title="Document Question Answering" | |
| ) | |
| img_interface = gr.Interface( | |
| fn=answer_question_from_image, | |
| inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")], | |
| outputs="text", | |
| title="Image Question Answering" | |
| ) | |
| # ========== Combine and Mount ========== | |
| demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| @app.get("/") | |
| def root(): | |
| return RedirectResponse(url="/") | |
| """ | |
| from fastapi import FastAPI | |
| from fastapi.responses import RedirectResponse | |
| import gradio as gr | |
| import pytesseract | |
| from PIL import Image | |
| import fitz # PyMuPDF | |
| import pdfplumber | |
| import easyocr | |
| import docx | |
| import openpyxl | |
| from pptx import Presentation | |
| from transformers import pipeline | |
| from deep_translator import GoogleTranslator | |
| import json | |
| import os | |
| app = FastAPI() | |
| qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") | |
| reader = easyocr.Reader(['en']) | |
| # Utility functions | |
| def extract_text_from_pdf(pdf_file): | |
| with pdfplumber.open(pdf_file) as pdf: | |
| return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) | |
| def extract_text_from_docx(docx_file): | |
| doc = docx.Document(docx_file) | |
| return "\n".join([para.text for para in doc.paragraphs]) | |
| def extract_text_from_pptx(pptx_file): | |
| prs = Presentation(pptx_file) | |
| text = [] | |
| for slide in prs.slides: | |
| for shape in slide.shapes: | |
| if hasattr(shape, "text"): | |
| text.append(shape.text) | |
| return "\n".join(text) | |
| def extract_text_from_xlsx(xlsx_file): | |
| wb = openpyxl.load_workbook(xlsx_file) | |
| text = [] | |
| for sheet in wb.worksheets: | |
| for row in sheet.iter_rows(): | |
| text.extend([str(cell.value) for cell in row if cell.value is not None]) | |
| return "\n".join(text) | |
| def extract_text(file): | |
| ext = os.path.splitext(file.name)[1].lower() | |
| if ext == ".pdf": | |
| return extract_text_from_pdf(file) | |
| elif ext == ".docx": | |
| return extract_text_from_docx(file) | |
| elif ext == ".pptx": | |
| return extract_text_from_pptx(file) | |
| elif ext == ".xlsx": | |
| return extract_text_from_xlsx(file) | |
| else: | |
| return "Unsupported file type" | |
| def answer_question_from_doc(file, question, translate_to="en"): | |
| context = extract_text(file) | |
| result = qa_pipeline(question=question, context=context) | |
| translated = GoogleTranslator(source='auto', target=translate_to).translate(result["answer"]) | |
| return { | |
| "answer": translated, | |
| "score": result["score"], | |
| "original": result["answer"] | |
| } | |
| def answer_question_from_image(image, question, translate_to="en"): | |
| img_text = pytesseract.image_to_string(image) | |
| if not img_text.strip(): | |
| img_text = "\n".join([line[1] for line in reader.readtext(image)]) | |
| result = qa_pipeline(question=question, context=img_text) | |
| translated = GoogleTranslator(source='auto', target=translate_to).translate(result["answer"]) | |
| return { | |
| "answer": translated, | |
| "score": result["score"], | |
| "original": result["answer"] | |
| } | |
| # Gradio Interfaces | |
| doc_interface = gr.Interface( | |
| fn=answer_question_from_doc, | |
| inputs=[ | |
| gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)"), | |
| gr.Textbox(label="Ask a Question"), | |
| gr.Textbox(label="Translate Answer To (e.g., en, fr, ar)", value="en") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Translated Answer"), | |
| gr.Number(label="Confidence Score"), | |
| gr.Textbox(label="Original Answer") | |
| ], | |
| title="๐ Document QA + Translation + Export" | |
| ) | |
| img_interface = gr.Interface( | |
| fn=answer_question_from_image, | |
| inputs=[ | |
| gr.Image(label="Upload Image"), | |
| gr.Textbox(label="Ask a Question"), | |
| gr.Textbox(label="Translate Answer To (e.g., en, fr, ar)", value="en") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Translated Answer"), | |
| gr.Number(label="Confidence Score"), | |
| gr.Textbox(label="Original Answer") | |
| ], | |
| title="๐ผ๏ธ Image QA + OCR + Translation + Export" | |
| ) | |
| # Combine interfaces | |
| demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| def root(): | |
| return RedirectResponse(url="/") | |