Spaces:

facebook
/

sapiens-depth

Paused

App Files Files Community

rawalkhirodkar commited on Sep 9, 2024

Commit

b869a3a

0 Parent(s):

Add initial commit

Browse files

Files changed (24) hide show

.gitattributes +40 -0
README.md +13 -0
app.py +326 -0
assets/checkpoints/sapiens_0.3b_render_people_epoch_100_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_0.6b_render_people_epoch_70_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_1b_render_people_epoch_88_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_1b_seg_foreground_epoch_8_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_2b_render_people_epoch_25_torchscript.pt2 +3 -0
assets/images/68204.png +3 -0
assets/images/68210.png +3 -0
assets/images/68658.png +3 -0
assets/images/68666.png +3 -0
assets/images/68691.png +3 -0
assets/images/68956.png +3 -0
assets/images/pexels-amresh444-17315601.png +3 -0
assets/images/pexels-gabby-k-6311686.png +3 -0
assets/images/pexels-julia-m-cameron-4145040.png +3 -0
assets/images/pexels-marcus-aurelius-6787357.png +3 -0
assets/images/pexels-mo-saeed-3616599-5409085.png +3 -0
assets/images/pexels-riedelmax-27355495.png +3 -0
assets/images/pexels-sergeymakashin-5368660.png +3 -0
assets/images/pexels-vinicius-wiesehofer-289347-4219918.png +3 -0
requirements.txt +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,40 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Sapiens Depth
+emoji: 🦀
+colorFrom: red
+colorTo: yellow
+sdk: gradio
+sdk_version: 4.42.0
+app_file: app.py
+pinned: false
+license: cc-by-nc-4.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,326 @@

+import os
+import io
+import gradio as gr
+import numpy as np
+import spaces
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+import matplotlib.pyplot as plt
+import tempfile
+class Config:
+    ASSETS_DIR = os.path.join(os.path.dirname(__file__), 'assets')
+    CHECKPOINTS_DIR = os.path.join(ASSETS_DIR, "checkpoints")
+    CHECKPOINTS = {
+        "0.3b": "sapiens_0.3b_render_people_epoch_100_torchscript.pt2",
+        "0.6b": "sapiens_0.6b_render_people_epoch_70_torchscript.pt2",
+        "1b": "sapiens_1b_render_people_epoch_88_torchscript.pt2",
+        "2b": "sapiens_2b_render_people_epoch_25_torchscript.pt2",
+    }
+    SEG_CHECKPOINTS = {
+        "fg-bg-1b (recommended)": "sapiens_1b_seg_foreground_epoch_8_torchscript.pt2",
+        "no-bg-removal": None,
+        "part-seg-1b": "sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2",
+    }
+class ModelManager:
+    @staticmethod
+    def load_model(checkpoint_name: str):
+        if checkpoint_name is None:
+            return None
+        checkpoint_path = os.path.join(Config.CHECKPOINTS_DIR, checkpoint_name)
+        model = torch.jit.load(checkpoint_path)
+        model.eval()
+        model.to("cuda")
+        return model
+    @staticmethod
+    @torch.inference_mode()
+    def run_model(model, input_tensor, height, width):
+        output = model(input_tensor)
+        return F.interpolate(output, size=(height, width), mode="bilinear", align_corners=False)
+class ImageProcessor:
+    def __init__(self):
+        self.transform_fn = transforms.Compose([
+            transforms.Resize((1024, 768)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[123.5/255, 116.5/255, 103.5/255], std=[58.5/255, 57.0/255, 57.5/255]),
+        ])
+    @spaces.GPU
+    def process_image(self, image: Image.Image, depth_model_name: str, seg_model_name: str):
+        depth_model = ModelManager.load_model(Config.CHECKPOINTS[depth_model_name])
+        input_tensor = self.transform_fn(image).unsqueeze(0).to("cuda")
+        depth_output = ModelManager.run_model(depth_model, input_tensor, image.height, image.width)
+        depth_map = depth_output.squeeze().cpu().numpy()
+        if seg_model_name != "no-bg-removal":
+            seg_model = ModelManager.load_model(Config.SEG_CHECKPOINTS[seg_model_name])
+            seg_output = ModelManager.run_model(seg_model, input_tensor, image.height, image.width)
+            seg_mask = (seg_output.argmax(dim=1) > 0).float().cpu().numpy()[0]
+            depth_map[seg_mask == 0] = np.nan
+        depth_colored = self.colorize_depth_map(depth_map)
+        npy_path = tempfile.mktemp(suffix='.npy')
+        np.save(npy_path, depth_map)
+        return Image.fromarray(depth_colored), npy_path
+    @staticmethod
+    def colorize_depth_map(depth_map):
+        depth_foreground = depth_map[~np.isnan(depth_map)]
+        if len(depth_foreground) > 0:
+            min_val, max_val = np.nanmin(depth_foreground), np.nanmax(depth_foreground)
+            depth_normalized = (depth_map - min_val) / (max_val - min_val)
+            depth_normalized = 1 - depth_normalized
+            depth_normalized = np.nan_to_num(depth_normalized, nan=0)
+            cmap = plt.get_cmap('inferno')
+            depth_colored = (cmap(depth_normalized) * 255).astype(np.uint8)[:, :, :3]
+        else:
+            depth_colored = np.zeros((depth_map.shape[0], depth_map.shape[1], 3), dtype=np.uint8)
+        return depth_colored
+class GradioInterface:
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def create_interface(self):
+        app_styles = """
+        <style>
+            /* Global Styles */
+            body, #root {
+                font-family: Helvetica, Arial, sans-serif;
+                background-color: #1a1a1a;
+                color: #fafafa;
+            }
+            /* Header Styles */
+            .app-header {
+                background: linear-gradient(45deg, #1a1a1a 0%, #333333 100%);
+                padding: 24px;
+                border-radius: 8px;
+                margin-bottom: 24px;
+                text-align: center;
+            }
+            .app-title {
+                font-size: 48px;
+                margin: 0;
+                color: #fafafa;
+            }
+            .app-subtitle {
+                font-size: 24px;
+                margin: 8px 0 16px;
+                color: #fafafa;
+            }
+            .app-description {
+                font-size: 16px;
+                line-height: 1.6;
+                opacity: 0.8;
+                margin-bottom: 24px;
+            }
+            /* Button Styles */
+            .publication-links {
+                display: flex;
+                justify-content: center;
+                flex-wrap: wrap;
+                gap: 8px;
+                margin-bottom: 16px;
+            }
+            .publication-link {
+                display: inline-flex;
+                align-items: center;
+                padding: 8px 16px;
+                background-color: #333;
+                color: #fff !important;
+                text-decoration: none !important;
+                border-radius: 20px;
+                font-size: 14px;
+                transition: background-color 0.3s;
+            }
+            .publication-link:hover {
+                background-color: #555;
+            }
+            .publication-link i {
+                margin-right: 8px;
+            }
+            /* Content Styles */
+            .content-container {
+                background-color: #2a2a2a;
+                border-radius: 8px;
+                padding: 24px;
+                margin-bottom: 24px;
+            }
+            /* Image Styles */
+            .image-preview img {
+                max-width: 512px;
+                max-height: 512px;
+                margin: 0 auto;
+                border-radius: 4px;
+                display: block;
+                object-fit: contain;
+            }
+            /* Control Styles */
+            .control-panel {
+                background-color: #333;
+                padding: 16px;
+                border-radius: 8px;
+                margin-top: 16px;
+            }
+            /* Gradio Component Overrides */
+            .gr-button {
+                background-color: #4a4a4a;
+                color: #fff;
+                border: none;
+                border-radius: 4px;
+                padding: 8px 16px;
+                cursor: pointer;
+                transition: background-color 0.3s;
+            }
+            .gr-button:hover {
+                background-color: #5a5a5a;
+            }
+            .gr-input, .gr-dropdown {
+                background-color: #3a3a3a;
+                color: #fff;
+                border: 1px solid #4a4a4a;
+                border-radius: 4px;
+                padding: 8px;
+            }
+            .gr-form {
+                background-color: transparent;
+            }
+            .gr-panel {
+                border: none;
+                background-color: transparent;
+            }
+            /* Override any conflicting styles from Bulma */
+            .button.is-normal.is-rounded.is-dark {
+                color: #fff !important;
+                text-decoration: none !important;
+            }
+        </style>
+        """
+        header_html = f"""
+        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css">
+        <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
+        {app_styles}
+        <div class="app-header">
+            <h1 class="app-title">Sapiens: Depth Estimation</h1>
+            <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
+            <p class="app-description">
+                Meta presents Sapiens, foundation models for human tasks pretrained on 300 million human images.
+                This demo showcases the finetuned depth model.
+            </p>
+            <div class="publication-links">
+                <a href="https://arxiv.org/abs/2408.12569" class="publication-link">
+                    <i class="fas fa-file-pdf"></i>arXiv
+                </a>
+                <a href="https://github.com/facebookresearch/sapiens" class="publication-link">
+                    <i class="fab fa-github"></i>Code
+                </a>
+                <a href="https://about.meta.com/realitylabs/codecavatars/sapiens/" class="publication-link">
+                    <i class="fas fa-globe"></i>Meta
+                </a>
+                <a href="https://rawalkhirodkar.github.io/sapiens" class="publication-link">
+                    <i class="fas fa-chart-bar"></i>Results
+                </a>
+            </div>
+            <div class="publication-links">
+                <a href="https://huggingface.co/spaces/facebook/sapiens_pose" class="publication-link">
+                    <i class="fas fa-user"></i>Demo-Pose
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_seg" class="publication-link">
+                    <i class="fas fa-puzzle-piece"></i>Demo-Seg
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_depth" class="publication-link">
+                    <i class="fas fa-cube"></i>Demo-Depth
+                </a>
+                <a href="https://huggingface.co/spaces/facebook/sapiens_normal" class="publication-link">
+                    <i class="fas fa-vector-square"></i>Demo-Normal
+                </a>
+            </div>
+        </div>
+        """
+        js_func = """
+        function refresh() {
+            const url = new URL(window.location);
+            if (url.searchParams.get('__theme') !== 'dark') {
+                url.searchParams.set('__theme', 'dark');
+                window.location.href = url.href;
+            }
+        }
+        """
+        def process_image(image, depth_model_name, seg_model_name):
+            result, npy_path = self.image_processor.process_image(image, depth_model_name, seg_model_name)
+            return result, npy_path
+        with gr.Blocks(js=js_func, theme=gr.themes.Default()) as demo:
+            gr.HTML(header_html)
+            with gr.Row(elem_classes="content-container"):
+                with gr.Column():
+                    input_image = gr.Image(label="Input Image", type="pil", format="png", elem_classes="image-preview")
+                    with gr.Row(elem_classes="control-panel"):
+                        depth_model_name = gr.Dropdown(
+                            label="Depth Model Size",
+                            choices=list(Config.CHECKPOINTS.keys()),
+                            value="1b",
+                        )
+                        seg_model_name = gr.Dropdown(
+                            label="Background Removal Model",
+                            choices=list(Config.SEG_CHECKPOINTS.keys()),
+                            value="fg-bg-1b (recommended)",
+                        )
+                    example_model = gr.Examples(
+                        inputs=input_image,
+                        examples_per_page=14,
+                        examples=[
+                            os.path.join(Config.ASSETS_DIR, "images", img)
+                            for img in os.listdir(os.path.join(Config.ASSETS_DIR, "images"))
+                        ],
+                    )
+                with gr.Column():
+                    result_image = gr.Image(label="Depth Estimation Result", type="pil", elem_classes="image-preview")
+                    npy_output = gr.File(label="Output (.npy). Note: Background depth is NaN.")
+                    run_button = gr.Button("Run", elem_classes="gr-button")
+            run_button.click(
+                fn=process_image,
+                inputs=[input_image, depth_model_name, seg_model_name],
+                outputs=[result_image, npy_output],
+            )
+        return demo
+def main():
+    if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    interface = GradioInterface()
+    demo = interface.create_interface()
+    demo.launch(share=False)
+if __name__ == "__main__":
+    main()

assets/checkpoints/sapiens_0.3b_render_people_epoch_100_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65054e6b6083171b1edf39a9786e34a47f3bfb28c1e0098f73de2ef823b7286e
+size 1280489853

assets/checkpoints/sapiens_0.6b_render_people_epoch_70_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f18bef54e4902810172bec9877d3f4d287d5e087a1704150ac73ed09a6097892
+size 2600455553

assets/checkpoints/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33bba30f3de8d9cfd44e4eaa4817b1bfdd98c188edfc87fa7cc031ba0f4edc17
+size 4716314057

assets/checkpoints/sapiens_1b_render_people_epoch_88_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ff0c7a8fa48f1d30f97a49aee05abb905f64ee4fe6a35efa805821be5756a8c
+size 4625326609

assets/checkpoints/sapiens_1b_seg_foreground_epoch_8_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88d2f1590fe040189ad5e9b689099fe3e7a242b4b14bc4d53cff101c20818946
+size 4716180479

assets/checkpoints/sapiens_2b_render_people_epoch_25_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1878b3f06b867f5d15b7bc7468b0114777cd0c28ceaef90c78ef777e0093b2
+size 8606159777

assets/images/68204.png ADDED Viewed

Git LFS Details

SHA256: 9b0268cb801ed164864a4b5f6d131e0ac5cc2fbd149a6467d5d0c97da47122c2
Pointer size: 132 Bytes
Size of remote file: 4.29 MB

assets/images/68210.png ADDED Viewed

Git LFS Details

SHA256: dbe5f80498af4ebd1ff09ae4184f37c20ba981e53bd554c3cc78d39ae0ee7fd7
Pointer size: 132 Bytes
Size of remote file: 3.93 MB

assets/images/68658.png ADDED Viewed

Git LFS Details

SHA256: 61a68b619bd17235e683324f2826ce0693322e45ab8c86f1c057851ecb333ac7
Pointer size: 132 Bytes
Size of remote file: 5.1 MB

assets/images/68666.png ADDED Viewed

Git LFS Details

SHA256: ea3047e6c2ccb485fdb3966aa2325e803cbf49c27c0bff00287b44bc16f18914
Pointer size: 132 Bytes
Size of remote file: 4.56 MB

assets/images/68691.png ADDED Viewed

Git LFS Details

SHA256: fae39e4055c1b297af7068cdddfeeba8d685363281b839d8c5afac1980204b57
Pointer size: 132 Bytes
Size of remote file: 3.74 MB

assets/images/68956.png ADDED Viewed

Git LFS Details

SHA256: eee1f27082b10999d0fa848121ecb06cda3386b1a864b9aa0f59ae78261f8908
Pointer size: 132 Bytes
Size of remote file: 4.15 MB

assets/images/pexels-amresh444-17315601.png ADDED Viewed

Git LFS Details

SHA256: 4e17ee1b229147e4b52e8348a6ef426bc9e9a2f90738e776e15b26b325abb9b3
Pointer size: 132 Bytes
Size of remote file: 3.5 MB

assets/images/pexels-gabby-k-6311686.png ADDED Viewed

Git LFS Details

SHA256: 3f10eded3fb05ab04b963f7b9fd2e183d8d4e81b20569b1c6b0653549639421f
Pointer size: 132 Bytes
Size of remote file: 3.65 MB

assets/images/pexels-julia-m-cameron-4145040.png ADDED Viewed

Git LFS Details

SHA256: 459cf0280667b028ffbca16aa11188780d7a0205c0defec02916ff3cbaeecb72
Pointer size: 132 Bytes
Size of remote file: 2.92 MB

assets/images/pexels-marcus-aurelius-6787357.png ADDED Viewed

Git LFS Details

SHA256: 7d35452f76492125eaf7d5783aa9fd6b0d5990ebe0579fe9dfd58a9d634f4955
Pointer size: 132 Bytes
Size of remote file: 3.3 MB

assets/images/pexels-mo-saeed-3616599-5409085.png ADDED Viewed

Git LFS Details

SHA256: 7c1ca7afd6c2a654e94ef59d5fb56fca4f3cde5fb5216f6b218c34a7b8c143dc
Pointer size: 132 Bytes
Size of remote file: 3.13 MB

assets/images/pexels-riedelmax-27355495.png ADDED Viewed

Git LFS Details

SHA256: 4141d2f5f718f162ea1f6710c06b28b5cb51fd69598fde35948f8f3491228164
Pointer size: 132 Bytes
Size of remote file: 3.73 MB

assets/images/pexels-sergeymakashin-5368660.png ADDED Viewed

Git LFS Details

SHA256: af8f5a8f26dd102d87d94c1be36ec903791fe8e6d951c68ebb9ebcfc6d7397bb
Pointer size: 132 Bytes
Size of remote file: 4.08 MB

assets/images/pexels-vinicius-wiesehofer-289347-4219918.png ADDED Viewed

Git LFS Details

SHA256: a6eef5eee15b81fe65ea95627e9a46040b9889466689b3c1ca6ed273e02fe84f
Pointer size: 132 Bytes
Size of remote file: 3.63 MB

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+numpy
+torch
+torchvision
+matplotlib
+pillow
+spaces
+opencv-python