Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

qtAnswering / app.py

ikraamkb

Update app.py

2be14bd verified 9 months ago

raw

history blame

2.73 kB

	from fastapi import FastAPI, File, UploadFile, Form
	from typing import List
	import pdfplumber
	import pytesseract
	from PIL import Image
	import easyocr
	import docx
	import openpyxl
	from pptx import Presentation
	from transformers import pipeline
	import io

	app = FastAPI()

	# Load Hugging Face models
	qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
	vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base") # For images

	def extract_text_from_pdf(pdf_file):
	text = ""
	with pdfplumber.open(pdf_file) as pdf:
	for page in pdf.pages:
	text += page.extract_text() + "\n"
	return text.strip()

	def extract_text_from_docx(docx_file):
	doc = docx.Document(docx_file)
	return "\n".join([para.text for para in doc.paragraphs])

	def extract_text_from_pptx(pptx_file):
	ppt = Presentation(pptx_file)
	text = []
	for slide in ppt.slides:
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	text.append(shape.text)
	return "\n".join(text)

	def extract_text_from_excel(excel_file):
	wb = openpyxl.load_workbook(excel_file)
	text = []
	for sheet in wb.worksheets:
	for row in sheet.iter_rows(values_only=True):
	text.append(" ".join(map(str, row)))
	return "\n".join(text)

	def extract_text_from_image(image_file):
	reader = easyocr.Reader(["en"])
	result = reader.readtext(image_file)
	return " ".join([res[1] for res in result])

	@app.post("/qa/document/")
	async def qa_document(file: UploadFile = File(...), question: str = Form(...)):
	file_ext = file.filename.split(".")[-1].lower()

	if file_ext == "pdf":
	text = extract_text_from_pdf(io.BytesIO(await file.read()))
	elif file_ext == "docx":
	text = extract_text_from_docx(io.BytesIO(await file.read()))
	elif file_ext == "pptx":
	text = extract_text_from_pptx(io.BytesIO(await file.read()))
	elif file_ext == "xlsx":
	text = extract_text_from_excel(io.BytesIO(await file.read()))
	else:
	return {"error": "Unsupported file format!"}

	if not text:
	return {"error": "No text extracted from the document."}

	response = qa_pipeline(question=question, context=text)
	return {"question": question, "answer": response["answer"]}

	@app.post("/qa/image/")
	async def qa_image(file: UploadFile = File(...), question: str = Form(...)):
	image = Image.open(io.BytesIO(await file.read()))
	image_text = extract_text_from_image(image)

	if not image_text:
	return {"error": "No text detected in the image."}

	response = qa_pipeline(question=question, context=image_text)
	return {"question": question, "answer": response["answer"]}