Spaces:

AndreasLH
/

Weak-Cube-RCNN

Sleeping

App Files Files Community

AndreasLH commited on May 19

Commit

8b8567f

1 Parent(s): 163c8c1

make the image rescale

Browse files

Files changed (1) hide show

app.py +14 -17

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ from cubercnn.modeling.backbone import build_dla_from_vision_fpn_backbone # this
 from cubercnn import util, vis
-def do_test(im, threshold, model_str):
     if im is None:
         return None, None
     model = load_model_config()
@@ -28,8 +28,8 @@ def do_test(im, threshold, model_str):
     thres = threshold
-    min_size = 512
-    max_size = 4096
     augmentations = T.AugmentationList([T.ResizeShortestEdge(min_size, max_size, "choice")])
     category_path = 'configs/category_meta.json'
@@ -40,9 +40,11 @@ def do_test(im, threshold, model_str):
     metadata = util.load_json(category_path)
     cats = metadata['thing_classes']
-    image_shape = im.shape[:2]  # h, w
     h, w = image_shape
     focal_length_ndc = 4.0
@@ -56,13 +58,9 @@ def do_test(im, threshold, model_str):
         [0.0, 0.0, 1.0]
     ])
-    # dummy
-    aug_input = T.AugInput(im)
-    tfms = augmentations(aug_input)
-    image = aug_input.image
     # model.to(device)
     batched = [{
-            'image': torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))),
             'height': image_shape[0], 'width': image_shape[1], 'K': K
         }]
     with torch.no_grad():
@@ -133,7 +131,7 @@ if __name__ == "__main__":
         return model
     title = 'Weak Cube R-CNN'
-    description = "This showcases the different our model [`Weak Cube RCNN`](https://arxiv.org/abs/2504.13297). To create Weak Cube RCNN, we modify the framework by replacing its 3D loss functions with ones based solely on 2D annotations. Our methods rely heavily on external, strong generalised deep learning models to infer spatial information in scenes. Experimental results show that all models perform comparably to an annotation time-equalised Cube R-CNN, whereof the pseudo ground truth method achieves the highest accuracy. The results show the methods' ability to understand scenes in 3D, providing satisfactory visual results. Although not precise enough for centimetre accurate measurements, the method provide a solid foundation for further research. \n Check out the code on [GitHub](https://github.com/AndreasLH/Weak-Cube-R-CNN)"
     demo = gr.Interface(
@@ -141,15 +139,14 @@ if __name__ == "__main__":
         fn=do_test,
         inputs=[
             gr.Image(label="Input Image"),
-            gr.Slider(0, 1, value=0.25, label="Threshold", info="Only show predictions with a confidence above this threshold"),
-            gr.Textbox(value="Weak Cube R-CNN", visible=False, render=False)
             ],
         outputs=[gr.Image(label="Predictions"), gr.Image(label="Top view")],
             description=description,
-            allow_flagging='never',
-            examples=[["examples/ex2.jpg"],[],[],["examples/ex1.jpg"]],
         )
-    demo.launch(server_name="0.0.0.0", server_port=7860)
-    # demo.launch()

 from cubercnn import util, vis
+def do_test(im, threshold):
     if im is None:
         return None, None
     model = load_model_config()
     thres = threshold
+    min_size = 500
+    max_size = 1000
     augmentations = T.AugmentationList([T.ResizeShortestEdge(min_size, max_size, "choice")])
     category_path = 'configs/category_meta.json'
     metadata = util.load_json(category_path)
     cats = metadata['thing_classes']
+    aug_input = T.AugInput(im)
+    tfms = augmentations(aug_input)
+    im = tfms.apply_image(im)
+    image_shape = im.shape[:2]  # h, w'
     h, w = image_shape
     focal_length_ndc = 4.0
         [0.0, 0.0, 1.0]
     ])
     # model.to(device)
     batched = [{
+            'image': torch.as_tensor(np.ascontiguousarray(im.transpose(2, 0, 1))),
             'height': image_shape[0], 'width': image_shape[1], 'K': K
         }]
     with torch.no_grad():
         return model
     title = 'Weak Cube R-CNN'
+    description = "This showcases the different our model [`Weak Cube RCNN`](https://arxiv.org/abs/2504.13297). To create Weak Cube RCNN, we modify the framework by replacing its 3D loss functions with ones based solely on 2D annotations. Our methods rely heavily on external, strong generalised deep learning models to infer spatial information in scenes. Experimental results show that all models perform comparably to an annotation time-equalised Cube R-CNN, whereof the pseudo ground truth method achieves the highest accuracy. The results show the methods' ability to understand scenes in 3D, providing satisfactory visual results. Although not precise enough for centimetre accurate measurements, the method provide a solid foundation for further research. \n Check out the code on [`GitHub`](https://github.com/AndreasLH/Weak-Cube-R-CNN)"
     demo = gr.Interface(
         fn=do_test,
         inputs=[
             gr.Image(label="Input Image"),
+            gr.Slider(0, 1, value=0.5, label="Threshold", info="Only show predictions with a confidence above this threshold"),
             ],
         outputs=[gr.Image(label="Predictions"), gr.Image(label="Top view")],
             description=description,
+            flagging_mode="never",
+            examples=[["examples/ex2.jpg"],["examples/ex1.jpg"]],
         )
+    # demo.launch(server_name="0.0.0.0", server_port=7860)
+    demo.launch()