""" from fastapi import FastAPI from fastapi.responses import RedirectResponse import gradio as gr from transformers import pipeline, ViltProcessor, ViltForQuestionAnswering, AutoTokenizer, AutoModelForCausalLM from PIL import Image import torch import fitz # PyMuPDF for PDF app = FastAPI() # ========== Document QA Setup ========== doc_tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") doc_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") def read_pdf(file): doc = fitz.open(stream=file.read(), filetype="pdf") text = "" for page in doc: text += page.get_text() return text def answer_question_from_doc(file, question): if file is None or not question.strip(): return "Please upload a document and ask a question." text = read_pdf(file) prompt = f"Context: {text}\nQuestion: {question}\nAnswer:" inputs = doc_tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048) with torch.no_grad(): outputs = doc_model.generate(**inputs, max_new_tokens=100) answer = doc_tokenizer.decode(outputs[0], skip_special_tokens=True) return answer.split("Answer:")[-1].strip() # ========== Image QA Setup ========== vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa") vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa") def answer_question_from_image(image, question): if image is None or not question.strip(): return "Please upload an image and ask a question." inputs = vqa_processor(image, question, return_tensors="pt") with torch.no_grad(): outputs = vqa_model(**inputs) predicted_id = outputs.logits.argmax(-1).item() return vqa_model.config.id2label[predicted_id] # ========== Gradio Interfaces ========== doc_interface = gr.Interface( fn=answer_question_from_doc, inputs=[gr.File(label="Upload Document (PDF)"), gr.Textbox(label="Ask a Question")], outputs="text", title="Document Question Answering" ) img_interface = gr.Interface( fn=answer_question_from_image, inputs=[gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question")], outputs="text", title="Image Question Answering" ) # ========== Combine and Mount ========== demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def root(): return RedirectResponse(url="/") """ from fastapi import FastAPI from fastapi.responses import RedirectResponse import gradio as gr import pytesseract from PIL import Image import fitz # PyMuPDF import pdfplumber import easyocr import docx import openpyxl from pptx import Presentation from transformers import pipeline from deep_translator import GoogleTranslator import json import os app = FastAPI() qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") reader = easyocr.Reader(['en']) # Utility functions def extract_text_from_pdf(pdf_file): with pdfplumber.open(pdf_file) as pdf: return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()]) def extract_text_from_docx(docx_file): doc = docx.Document(docx_file) return "\n".join([para.text for para in doc.paragraphs]) def extract_text_from_pptx(pptx_file): prs = Presentation(pptx_file) text = [] for slide in prs.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) def extract_text_from_xlsx(xlsx_file): wb = openpyxl.load_workbook(xlsx_file) text = [] for sheet in wb.worksheets: for row in sheet.iter_rows(): text.extend([str(cell.value) for cell in row if cell.value is not None]) return "\n".join(text) def extract_text(file): ext = os.path.splitext(file.name)[1].lower() if ext == ".pdf": return extract_text_from_pdf(file) elif ext == ".docx": return extract_text_from_docx(file) elif ext == ".pptx": return extract_text_from_pptx(file) elif ext == ".xlsx": return extract_text_from_xlsx(file) else: return "Unsupported file type" def answer_question_from_doc(file, question, translate_to="en"): context = extract_text(file) result = qa_pipeline(question=question, context=context) translated = GoogleTranslator(source='auto', target=translate_to).translate(result["answer"]) return { "answer": translated, "score": result["score"], "original": result["answer"] } def answer_question_from_image(image, question, translate_to="en"): img_text = pytesseract.image_to_string(image) if not img_text.strip(): img_text = "\n".join([line[1] for line in reader.readtext(image)]) result = qa_pipeline(question=question, context=img_text) translated = GoogleTranslator(source='auto', target=translate_to).translate(result["answer"]) return { "answer": translated, "score": result["score"], "original": result["answer"] } # Gradio Interfaces doc_interface = gr.Interface( fn=answer_question_from_doc, inputs=[ gr.File(label="Upload Document (PDF, DOCX, PPTX, XLSX)"), gr.Textbox(label="Ask a Question"), gr.Textbox(label="Translate Answer To (e.g., en, fr, ar)", value="en") ], outputs=[ gr.Textbox(label="Translated Answer"), gr.Number(label="Confidence Score"), gr.Textbox(label="Original Answer") ], title="📄 Document QA + Translation + Export" ) img_interface = gr.Interface( fn=answer_question_from_image, inputs=[ gr.Image(label="Upload Image"), gr.Textbox(label="Ask a Question"), gr.Textbox(label="Translate Answer To (e.g., en, fr, ar)", value="en") ], outputs=[ gr.Textbox(label="Translated Answer"), gr.Number(label="Confidence Score"), gr.Textbox(label="Original Answer") ], title="🖼️ Image QA + OCR + Translation + Export" ) # Combine interfaces demo = gr.TabbedInterface([doc_interface, img_interface], ["Document QA", "Image QA"]) app = gr.mount_gradio_app(app, demo, path="/") @app.get("/") def root(): return RedirectResponse(url="/")