Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Mar 26

Commit

0b363e7

verified ·

1 Parent(s): 0f8e09c

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -37

app.py CHANGED Viewed

@@ -5,26 +5,32 @@ from tika import parser  # Apache Tika for document parsing
 import openpyxl
 from pptx import Presentation
 from PIL import Image
-from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration
-import gradio as gr
 import torch
 import numpy as np
 # Initialize FastAPI
 app = FastAPI()
-print(f"🔄 Loading models")
 # Load Hugging Face Models
-doc_qa_pipeline = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", device=-1)
-# Load Image Captioning Model
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-model = model.to(dtype=torch.float16)  # Quantizing to FP16
-print("✅ Models loaded")
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
@@ -73,31 +79,20 @@ def extract_text_from_excel(excel_bytes):
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
-def answer_question_from_document(file, question: str):
     print("📂 Processing document for QA...")
-    # Ensure file is not None
-    if not file:
-        return "❌ No file uploaded."
-    ext = file.name.split(".")[-1].lower()
-    print(f"🔍 Validating file type: {ext}")
-    if ext not in ALLOWED_EXTENSIONS:
-        return f"❌ Unsupported file format: {ext}"
-    # Read file contents
-    try:
-        with open(file.name, "rb") as f:
-            file_bytes = f.read()
-    except Exception as e:
-        return f"❌ Error reading file: {str(e)}"
-    # Extract text based on file type
-    if ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
-    elif ext in ["docx", "pptx"]:
         text = extract_text_with_tika(file_bytes)
-    elif ext == "xlsx":
         text = extract_text_from_excel(file_bytes)
     else:
         return "❌ Unsupported file format!"
@@ -106,8 +101,8 @@ def answer_question_from_document(file, question: str):
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
-    print("🤖 Generating response...")
-    response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
@@ -118,12 +113,10 @@ def answer_question_from_image(image, question):
             image = Image.fromarray(image)  # Convert to PIL Image
         print("🖼️ Generating caption for image...")
-        inputs = processor(images=image, return_tensors="pt", use_fast=True).to(dtype=torch.float16)
-        output = model.generate(**inputs)
-        caption = processor.decode(output[0], skip_special_tokens=True)
-        print("🤖 Answering question based on caption...")
-        response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
         return response[0]["generated_text"]
     except Exception as e:

 import openpyxl
 from pptx import Presentation
 from PIL import Image
 import torch
+from transformers import pipeline
+import gradio as gr
 import numpy as np
 # Initialize FastAPI
 app = FastAPI()
+print(f"🔄 Loading models (Running on GPU: {torch.cuda.is_available()})")
 # Load Hugging Face Models
+doc_qa_pipeline = pipeline(
+    "text-generation",
+    model="Qwen/Qwen2.5-VL-7B-Instruct",
+    device=0 if torch.cuda.is_available() else -1
+)
+image_captioning_pipeline = pipeline(
+    "image-to-text",
+    model="Salesforce/blip-image-captioning-base",
+    device=0 if torch.cuda.is_available() else -1,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    use_fast=True
+)
+print("✅ Models loaded successfully")
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
+def answer_question_from_document(file: UploadFile, question: str):
     print("📂 Processing document for QA...")
+    validation_error = validate_file_type(file)
+    if validation_error:
+        return validation_error
+    file_ext = file.filename.split(".")[-1].lower()
+    file_bytes = file.file.read()
+    if file_ext == "pdf":
         text = extract_text_from_pdf(file_bytes)
+    elif file_ext in ["docx", "pptx"]:
         text = extract_text_with_tika(file_bytes)
+    elif file_ext == "xlsx":
         text = extract_text_from_excel(file_bytes)
     else:
         return "❌ Unsupported file format!"
         return "⚠️ No text extracted from the document."
     truncated_text = truncate_text(text)
+    print("🤖 Generating response with Qwen2.5-VL-7B...")
+    response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}", max_length=100)
     return response[0]["generated_text"]
             image = Image.fromarray(image)  # Convert to PIL Image
         print("🖼️ Generating caption for image...")
+        caption = image_captioning_pipeline(image)[0]['generated_text']
+        print("🤖 Answering question based on caption with Qwen2.5-VL-7B...")
+        response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}", max_length=100)
         return response[0]["generated_text"]
     except Exception as e: