Spaces:

geyongtao
/

HumanWild

Running on Zero

App Files Files Community

geyongtao commited on Jul 12, 2024

Commit

ec2cf52

verified ·

1 Parent(s): 2c3438e

Create app.py

Browse files

Files changed (1) hide show

app.py +205 -0

app.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import gradio as gr
+import spaces
+import torch
+from gradio_rerun import Rerun
+import rerun as rr
+import rerun.blueprint as rrb
+from pathlib import Path
+import uuid
+from mini_dust3r.api import OptimizedResult, inferece_dust3r, log_optimized_result
+from mini_dust3r.model import AsymmetricCroCo3DStereo
+from mini_dust3r.utils.misc import (
+    fill_default_args,
+    freeze_all_params,
+    is_symmetrized,
+    interleave,
+    transpose_to_landscape,
+)
+from .head import Cat_MLP_LocalFeatures_DPT_Pts3d
+DEVICE = "cuda" if torch.cuda.is_available() else "CPU"
+# model = AsymmetricCroCo3DStereo.from_pretrained(
+#    "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
+# ).to(DEVICE)
+from .linear_head import LinearPts3d
+from .dpt_head import create_dpt_head
+def head_factory(head_type, output_mode, net, has_conf=False):
+    """" build a prediction head for the decoder
+    """
+    if head_type == 'linear' and output_mode == 'pts3d':
+        return LinearPts3d(net, has_conf)
+    elif head_type == 'dpt' and output_mode == 'pts3d':
+        return create_dpt_head(net, has_conf=has_conf)
+    if head_type == 'catmlp+dpt' and output_mode.startswith('pts3d+desc'):
+        local_feat_dim = int(output_mode[10:])
+        assert net.dec_depth > 9
+        l2 = net.dec_depth
+        feature_dim = 256
+        last_dim = feature_dim // 2
+        out_nchan = 3
+        ed = net.enc_embed_dim
+        dd = net.dec_embed_dim
+        return Cat_MLP_LocalFeatures_DPT_Pts3d(net, local_feat_dim=local_feat_dim, has_conf=has_conf,
+                                               num_channels=out_nchan + has_conf,
+                                               feature_dim=feature_dim,
+                                               last_dim=last_dim,
+                                               hooks_idx=[0, l2 * 2 // 4, l2 * 3 // 4, l2],
+                                               dim_tokens=[ed, dd, dd, dd],
+                                               postprocess=postprocess,
+                                               depth_mode=net.depth_mode,
+                                               conf_mode=net.conf_mode,
+                                               head_type='regression')
+    else:
+        raise NotImplementedError(f"unexpected {head_type=} and {output_mode=}")
+class AsymmetricMASt3R(AsymmetricCroCo3DStereo):
+    def __init__(self, desc_mode=('norm'), two_confs=False, desc_conf_mode=None, **kwargs):
+        self.desc_mode = desc_mode
+        self.two_confs = two_confs
+        self.desc_conf_mode = desc_conf_mode
+        super().__init__(**kwargs)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kw):
+        if os.path.isfile(pretrained_model_name_or_path):
+            return load_model(pretrained_model_name_or_path, device='cpu')
+        else:
+            return super(AsymmetricMASt3R, cls).from_pretrained(pretrained_model_name_or_path, **kw)
+    def set_downstream_head(self, output_mode, head_type, landscape_only, depth_mode, conf_mode, patch_size, img_size, **kw):
+        assert img_size[0] % patch_size == 0 and img_size[
+            1] % patch_size == 0, f'{img_size=} must be multiple of {patch_size=}'
+        self.output_mode = output_mode
+        self.head_type = head_type
+        self.depth_mode = depth_mode
+        self.conf_mode = conf_mode
+        if self.desc_conf_mode is None:
+            self.desc_conf_mode = conf_mode
+        # allocate heads
+        self.downstream_head1 = head_factory(head_type, output_mode, self, has_conf=bool(conf_mode))
+        self.downstream_head2 = head_factory(head_type, output_mode, self, has_conf=bool(conf_mode))
+        # magic wrapper
+        self.head1 = transpose_to_landscape(self.downstream_head1, activate=landscape_only)
+        self.head2 = transpose_to_landscape(self.downstream_head2, activate=landscape_only)
+model = AsymmetricMASt3R.from_pretrained(
+    "naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric").to(DEVICE)
+def create_blueprint(image_name_list: list[str], log_path: Path) -> rrb.Blueprint:
+    # dont show 2d views if there are more than 4 images as to not clutter the view
+    if len(image_name_list) > 4:
+        blueprint = rrb.Blueprint(
+            rrb.Horizontal(
+                rrb.Spatial3DView(origin=f"{log_path}"),
+            ),
+            collapse_panels=True,
+        )
+    else:
+        blueprint = rrb.Blueprint(
+            rrb.Horizontal(
+                contents=[
+                    rrb.Spatial3DView(origin=f"{log_path}"),
+                    rrb.Vertical(
+                        contents=[
+                            rrb.Spatial2DView(
+                                origin=f"{log_path}/camera_{i}/pinhole/",
+                                contents=[
+                                    "+ $origin/**",
+                                ],
+                            )
+                            for i in range(len(image_name_list))
+                        ]
+                    ),
+                ],
+                column_shares=[3, 1],
+            ),
+            collapse_panels=True,
+        )
+    return blueprint
+@spaces.GPU
+def predict(image_name_list: list[str] | str):
+    # check if is list or string and if not raise error
+    if not isinstance(image_name_list, list) and not isinstance(image_name_list, str):
+        raise gr.Error(
+            f"Input must be a list of strings or a string, got: {type(image_name_list)}"
+        )
+    uuid_str = str(uuid.uuid4())
+    filename = Path(f"/tmp/gradio/{uuid_str}.rrd")
+    rr.init(f"{uuid_str}")
+    log_path = Path("world")
+    if isinstance(image_name_list, str):
+        image_name_list = [image_name_list]
+    optimized_results: OptimizedResult = inferece_dust3r(
+        image_dir_or_list=image_name_list,
+        model=model,
+        device=DEVICE,
+        batch_size=1,
+    )
+    blueprint: rrb.Blueprint = create_blueprint(image_name_list, log_path)
+    rr.send_blueprint(blueprint)
+    rr.set_time_sequence("sequence", 0)
+    log_optimized_result(optimized_results, log_path)
+    rr.save(filename.as_posix())
+    return filename.as_posix()
+with gr.Blocks(
+    css=""".gradio-container {margin: 0 !important; min-width: 100%};""",
+    title="Mini-DUSt3R Demo",
+) as demo:
+    # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
+    gr.HTML('<h2 style="text-align: center;">Mini-DUSt3R Demo</h2>')
+    gr.HTML(
+        '<p style="text-align: center;">Unofficial DUSt3R demo using the mini-dust3r pip package</p>'
+    )
+    gr.HTML(
+        '<p style="text-align: center;">More info <a href="https://github.com/pablovela5620/mini-dust3r">here</a></p>'
+    )
+    with gr.Tab(label="Single Image"):
+        with gr.Column():
+            single_image = gr.Image(type="filepath", height=300)
+            run_btn_single = gr.Button("Run")
+            rerun_viewer_single = Rerun(height=900)
+            run_btn_single.click(
+                fn=predict, inputs=[single_image], outputs=[rerun_viewer_single]
+            )
+            example_single_dir = Path("examples/single_image")
+            example_single_files = sorted(example_single_dir.glob("*.png"))
+            examples_single = gr.Examples(
+                examples=example_single_files,
+                inputs=[single_image],
+                outputs=[rerun_viewer_single],
+                fn=predict,
+                cache_examples="lazy",
+            )
+    with gr.Tab(label="Multi Image"):
+        with gr.Column():
+            multi_files = gr.File(file_count="multiple")
+            run_btn_multi = gr.Button("Run")
+            rerun_viewer_multi = Rerun(height=900)
+            run_btn_multi.click(
+                fn=predict, inputs=[multi_files], outputs=[rerun_viewer_multi]
+            )
+demo.launch()