submission-template-mobilevit

Sleeping

App Files Files Community

Guill-Bla commited on Jan 30

Commit

de2943e

verified ·

1 Parent(s): bb54cea

Update tasks/image.py

Browse files

Files changed (1) hide show

tasks/image.py +36 -20

tasks/image.py CHANGED Viewed

@@ -36,6 +36,29 @@ model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobile
 model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
 model.eval()
 def preprocess(image):
     # Ensure input image is resized to a fixed size (512, 512)
     image = image.resize((512, 512))
@@ -153,39 +176,32 @@ async def evaluate_image(request: ImageEvaluationRequest):
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline with your model inference
     #--------------------------------------------------------------------------------------------
-    dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)
     predictions = []
     true_labels = []
     pred_boxes = []
     true_boxes_list = []
-    for batch_idx, batch_examples in enumerate(dataloader):
-        # Extract images and preprocess
-        images = [example["image"] for example in batch_examples]
-        annotations = [example.get("annotations", "").strip() for example in batch_examples]
-        has_smoke_list = [len(annotation) > 0 for annotation in annotations]
-        true_labels.extend([1 if has_smoke else 0 for has_smoke in has_smoke_list])
-        # Preprocess images and extract features
-        preprocessed_images = preprocess_batch(images)
-        image_inputs = feature_extractor(images=preprocessed_images, return_tensors="pt", padding=True).pixel_values
         # Perform inference
         with torch.no_grad():
             outputs = model(pixel_values=image_inputs)
             logits = outputs.logits
-        # Threshold and process the segmentation masks
         probabilities = torch.sigmoid(logits)
         batch_predicted_masks = (probabilities[:, 1, :, :] > 0.30).cpu().numpy().astype(np.uint8)
-        for mask in batch_predicted_masks:
-            mask_resized = cv2.resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST)
-            predicted_boxes = get_bounding_boxes_from_mask(mask_resized)
             pred_boxes.append(predicted_boxes)
             # Append smoke detection based on bounding boxes
             predictions.append(1 if len(predicted_boxes) > 0 else 0)

 model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
 model.eval()
+from torch.utils.data import Dataset
+class SmokeDataset(Dataset):
+    def __init__(self, dataset):
+        self.dataset = dataset
+    def __len__(self):
+        return len(self.dataset)
+    def __getitem__(self, idx):
+        example = self.dataset[idx]
+        image = example["image"]
+        annotation = example.get("annotations", "").strip()
+        # Resize and preprocess the image directly here
+        image = image.resize((512, 512))
+        image = np.array(image)[:, :, ::-1]  # Convert RGB to BGR
+        image = np.array(image, dtype=np.float32) / 255.0
+        # Return both the preprocessed image and annotation
+        return torch.tensor(image).permute(2, 0, 1), annotation
 def preprocess(image):
     # Ensure input image is resized to a fixed size (512, 512)
     image = image.resize((512, 512))
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline with your model inference
     #--------------------------------------------------------------------------------------------
+    smoke_dataset = SmokeDataset(test_dataset)
+    dataloader = DataLoader(smoke_dataset, batch_size=16, shuffle=False)
     predictions = []
     true_labels = []
     pred_boxes = []
     true_boxes_list = []
+    for batch_images, batch_annotations in dataloader:
+        image_inputs = feature_extractor(images=batch_images, return_tensors="pt", padding=True).pixel_values
         # Perform inference
         with torch.no_grad():
             outputs = model(pixel_values=image_inputs)
             logits = outputs.logits
         probabilities = torch.sigmoid(logits)
         batch_predicted_masks = (probabilities[:, 1, :, :] > 0.30).cpu().numpy().astype(np.uint8)
+        # Post-process predictions and compute metrics
+        for mask, annotation in zip(batch_predicted_masks, batch_annotations):
+            predicted_mask_resized = cv2.resize(mask, (512, 512), interpolation=cv2.INTER_NEAREST)
+            predicted_boxes = get_bounding_boxes_from_mask(predicted_mask_resized)
             pred_boxes.append(predicted_boxes)
+            predictions.append(1 if len(predicted_boxes) > 0 else 0)
+            true_labels.append(1 if annotation else 0)
             # Append smoke detection based on bounding boxes
             predictions.append(1 if len(predicted_boxes) > 0 else 0)