Spaces:

ikraamkb
/

qtAnswering

Sleeping

App Files Files Community

ikraamkb commited on Mar 24

Commit

0878e54

verified ·

1 Parent(s): 790835b

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -33

app.py CHANGED Viewed

@@ -16,28 +16,20 @@ import easyocr
 # Initialize FastAPI
 app = FastAPI()
-# Load AI Model for Question Answering (Mistral-7B)
-model_name = "mistralai/Mistral-7B"
 print(f"🔄 Loading model: {model_name}...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
-qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
-# Load Pretrained Object Detection Model (Torchvision)
-from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
-weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
-object_detection_model = fasterrcnn_resnet50_fpn(weights=weights)
-object_detection_model.eval()
 # Initialize OCR Model (Lazy Load)
 reader = easyocr.Reader(["en"], gpu=True)
-# Image Transformations
-transform = transforms.Compose([
-    transforms.ToTensor()
-])
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
@@ -98,16 +90,6 @@ def extract_text_from_excel(excel_file):
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
-def extract_text_from_image(image_file):
-    print("🖼️ Extracting text from image...")
-    image = Image.open(image_file).convert("RGB")
-    if np.array(image).std() < 10:  # Low contrast = likely empty
-        return "⚠️ No meaningful content detected in the image."
-    result = reader.readtext(np.array(image))
-    return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
-# Function to answer questions based on document content
 def answer_question_from_document(file, question):
     print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
@@ -129,19 +111,16 @@ def answer_question_from_document(file, question):
     truncated_text = truncate_text(text)
     print("🤖 Generating response...")
-    response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
 def answer_question_from_image(image, question):
-    print("🖼️ Processing image for QA...")
-    image_text = extract_text_from_image(image)
-    if not image_text:
-        return "⚠️ No meaningful content detected in the image."
-    truncated_text = truncate_text(image_text)
-    print("🤖 Generating response...")
-    response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
@@ -166,4 +145,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
 @app.get("/")
 def home():
-    return RedirectResponse(url="/")

 # Initialize FastAPI
 app = FastAPI()
+# Load AI Model for Question Answering on Documents (Mistral-7B)
+model_name = "mistralai/Mistral-7B-Instruct"
 print(f"🔄 Loading model: {model_name}...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+doc_qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
+# Load Image Captioning Model (nlpconnect/vit-gpt2-image-captioning)
+image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 # Initialize OCR Model (Lazy Load)
 reader = easyocr.Reader(["en"], gpu=True)
 # Allowed File Extensions
 ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
     except Exception as e:
         return f"❌ Error reading Excel: {str(e)}"
 def answer_question_from_document(file, question):
     print("📂 Processing document for QA...")
     validation_error = validate_file_type(file)
     truncated_text = truncate_text(text)
     print("🤖 Generating response...")
+    response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
     return response[0]["generated_text"]
 def answer_question_from_image(image, question):
+    print("🖼️ Generating caption for image...")
+    caption = image_captioning_pipeline(image)[0]['generated_text']
+    print("🤖 Answering question based on caption...")
+    response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
     return response[0]["generated_text"]
 @app.get("/")
 def home():
+    return RedirectResponse(url="/")