ikraamkb commited on
Commit
0878e54
Β·
verified Β·
1 Parent(s): 790835b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -33
app.py CHANGED
@@ -16,28 +16,20 @@ import easyocr
16
  # Initialize FastAPI
17
  app = FastAPI()
18
 
19
- # Load AI Model for Question Answering (Mistral-7B)
20
- model_name = "mistralai/Mistral-7B"
21
  print(f"πŸ”„ Loading model: {model_name}...")
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
24
 
25
- qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
26
 
27
- # Load Pretrained Object Detection Model (Torchvision)
28
- from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
29
- weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
30
- object_detection_model = fasterrcnn_resnet50_fpn(weights=weights)
31
- object_detection_model.eval()
32
 
33
  # Initialize OCR Model (Lazy Load)
34
  reader = easyocr.Reader(["en"], gpu=True)
35
 
36
- # Image Transformations
37
- transform = transforms.Compose([
38
- transforms.ToTensor()
39
- ])
40
-
41
  # Allowed File Extensions
42
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
43
 
@@ -98,16 +90,6 @@ def extract_text_from_excel(excel_file):
98
  except Exception as e:
99
  return f"❌ Error reading Excel: {str(e)}"
100
 
101
- def extract_text_from_image(image_file):
102
- print("πŸ–ΌοΈ Extracting text from image...")
103
- image = Image.open(image_file).convert("RGB")
104
- if np.array(image).std() < 10: # Low contrast = likely empty
105
- return "⚠️ No meaningful content detected in the image."
106
-
107
- result = reader.readtext(np.array(image))
108
- return " ".join([res[1] for res in result]) if result else "⚠️ No text found."
109
-
110
- # Function to answer questions based on document content
111
  def answer_question_from_document(file, question):
112
  print("πŸ“‚ Processing document for QA...")
113
  validation_error = validate_file_type(file)
@@ -129,19 +111,16 @@ def answer_question_from_document(file, question):
129
 
130
  truncated_text = truncate_text(text)
131
  print("πŸ€– Generating response...")
132
- response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
133
 
134
  return response[0]["generated_text"]
135
 
136
  def answer_question_from_image(image, question):
137
- print("πŸ–ΌοΈ Processing image for QA...")
138
- image_text = extract_text_from_image(image)
139
- if not image_text:
140
- return "⚠️ No meaningful content detected in the image."
141
 
142
- truncated_text = truncate_text(image_text)
143
- print("πŸ€– Generating response...")
144
- response = qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
145
 
146
  return response[0]["generated_text"]
147
 
@@ -166,4 +145,4 @@ app = gr.mount_gradio_app(app, demo, path="/")
166
 
167
  @app.get("/")
168
  def home():
169
- return RedirectResponse(url="/")
 
16
  # Initialize FastAPI
17
  app = FastAPI()
18
 
19
+ # Load AI Model for Question Answering on Documents (Mistral-7B)
20
+ model_name = "mistralai/Mistral-7B-Instruct"
21
  print(f"πŸ”„ Loading model: {model_name}...")
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+ model = AutoModelForCausalLM.from_pretrained(model_name)
24
 
25
+ doc_qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=-1)
26
 
27
+ # Load Image Captioning Model (nlpconnect/vit-gpt2-image-captioning)
28
+ image_captioning_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 
 
 
29
 
30
  # Initialize OCR Model (Lazy Load)
31
  reader = easyocr.Reader(["en"], gpu=True)
32
 
 
 
 
 
 
33
  # Allowed File Extensions
34
  ALLOWED_EXTENSIONS = {"pdf", "docx", "pptx", "xlsx"}
35
 
 
90
  except Exception as e:
91
  return f"❌ Error reading Excel: {str(e)}"
92
 
 
 
 
 
 
 
 
 
 
 
93
  def answer_question_from_document(file, question):
94
  print("πŸ“‚ Processing document for QA...")
95
  validation_error = validate_file_type(file)
 
111
 
112
  truncated_text = truncate_text(text)
113
  print("πŸ€– Generating response...")
114
+ response = doc_qa_pipeline(f"Question: {question}\nContext: {truncated_text}")
115
 
116
  return response[0]["generated_text"]
117
 
118
  def answer_question_from_image(image, question):
119
+ print("πŸ–ΌοΈ Generating caption for image...")
120
+ caption = image_captioning_pipeline(image)[0]['generated_text']
 
 
121
 
122
+ print("πŸ€– Answering question based on caption...")
123
+ response = doc_qa_pipeline(f"Question: {question}\nContext: {caption}")
 
124
 
125
  return response[0]["generated_text"]
126
 
 
145
 
146
  @app.get("/")
147
  def home():
148
+ return RedirectResponse(url="/")