Spaces:

ikraamkb
/

qtAnswering

Sleeping

ikraamkb commited on Mar 22

Commit

c724805

verified ·

1 Parent(s): f57a980

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,12 @@ app = FastAPI()
 vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
 code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
 table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
-qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")  # ✅ FIXED
 # ✅ Functions for Document & Image QA
 def extract_text_from_pdf(pdf_file):
@@ -74,18 +79,21 @@ def answer_question_from_document(file, question):
     if not text:
         return "No text extracted from the document."
-    # ✅ FLAN-T5 expects input in a specific format
-    input_text = f"Question: {question} Context: {text}"
     response = qa_pipeline(input_text)
-    return response[0]["generated_text"]  # ✅ FIXED OUTPUT EXTRACTION
 def answer_question_from_image(image, question):
     image_text = extract_text_from_image(image)
     if not image_text:
         return "No text detected in the image."
-    input_text = f"Question: {question} Context: {image_text}"
     response = qa_pipeline(input_text)
     return response[0]["generated_text"]

 vqa_pipeline = pipeline("image-to-text", model="Salesforce/blip-vqa-base")
 code_generator = pipeline("text-generation", model="openai-community/gpt2-medium")
 table_analyzer = pipeline("table-question-answering", model="google/tapas-large-finetuned-wtq")
+qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")  # ✅ FLAN-T5 Fixed
+# ✅ Function to truncate text to 450 tokens
+def truncate_text(text, max_tokens=450):
+    words = text.split()
+    return " ".join(words[:max_tokens])  # ✅ Keeps only the first 450 words
 # ✅ Functions for Document & Image QA
 def extract_text_from_pdf(pdf_file):
     if not text:
         return "No text extracted from the document."
+    truncated_text = truncate_text(text)  # ✅ Prevents token limit error
+    input_text = f"Question: {question} Context: {truncated_text}"  # ✅ Proper FLAN-T5 format
     response = qa_pipeline(input_text)
+    return response[0]["generated_text"]  # ✅ Returns the correct output
 def answer_question_from_image(image, question):
     image_text = extract_text_from_image(image)
     if not image_text:
         return "No text detected in the image."
+    truncated_text = truncate_text(image_text)  # ✅ Prevents token limit error
+    input_text = f"Question: {question} Context: {truncated_text}"
     response = qa_pipeline(input_text)
     return response[0]["generated_text"]