Spaces:
Runtime error
Runtime error
| # ------------------------------------------------------------------------ | |
| # Semantic SAM | |
| # Copyright (c) MicroSoft, Inc. and its affiliates. | |
| # Modified from OpenSeed https://github.com/IDEA-Research/OpenSeed by Feng Li. | |
| # ------------------------------------------------------------------------ | |
| ################## | |
| # Task settings | |
| ################## | |
| WEIGHT: '' | |
| PORT: 53711 | |
| VERBOSE: true | |
| OUTPUT_DIR: '../../data/output/test' | |
| # misc | |
| LOADER: | |
| JOINT: True | |
| KEY_DATASET: 'coco' | |
| # model | |
| MODEL: | |
| NAME: interactive_mask_dino | |
| HEAD: general_head | |
| MASK_ON: false | |
| KEYPOINT_ON: false | |
| LOAD_PROPOSALS: false | |
| DIM_PROJ: 512 | |
| BACKBONE_DIM: 768 | |
| BACKGROUND: False | |
| WEIGHTS: '' | |
| TEXT: | |
| ARCH: noencoder # no language encoder for training only sa-1b data | |
| NAME: transformer | |
| TOKENIZER: clip | |
| CONTEXT_LENGTH: 18 # 77 | |
| WIDTH: 512 | |
| HEADS: 8 | |
| LAYERS: 12 # 6 | |
| AUTOGRESSIVE: True | |
| BACKBONE: | |
| NAME: swin | |
| PRETRAINED: 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' | |
| LOAD_PRETRAINED: true | |
| SWIN: | |
| PRETRAIN_IMG_SIZE: 384 | |
| PATCH_SIZE: 4 | |
| EMBED_DIM: 192 | |
| DEPTHS: [ 2, 2, 18, 2 ] | |
| NUM_HEADS: [ 6, 12, 24, 48 ] | |
| WINDOW_SIZE: 12 | |
| MLP_RATIO: 4.0 | |
| QKV_BIAS: true | |
| QK_SCALE: ~ | |
| DROP_RATE: 0.0 | |
| ATTN_DROP_RATE: 0.0 | |
| DROP_PATH_RATE: 0.3 | |
| APE: false | |
| PATCH_NORM: true | |
| USE_CHECKPOINT: false | |
| OUT_FEATURES: [ 'res2', 'res3', 'res4', 'res5' ] | |
| ENCODER: | |
| NAME: encoder_deform | |
| IGNORE_VALUE: 255 | |
| NUM_CLASSES: 1 | |
| LOSS_WEIGHT: 1.0 | |
| CONVS_DIM: 256 | |
| MASK_DIM: 256 | |
| NORM: "GN" | |
| IN_FEATURES: [ "res2", "res3", "res4", "res5" ] | |
| DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: [ "res3", "res4", "res5" ] | |
| COMMON_STRIDE: 4 | |
| TRANSFORMER_ENC_LAYERS: 6 | |
| TOTAL_NUM_FEATURE_LEVELS: 4 | |
| NUM_FEATURE_LEVELS: 3 | |
| FEATURE_ORDER: "low2high" | |
| DECODER: | |
| NAME: interactive_mask_dino | |
| TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" | |
| MASK: True | |
| BOX: True | |
| PART: True | |
| GROUNDING: | |
| ENABLED: False | |
| MAX_LEN: 5 | |
| TEXT_WEIGHT: 2.0 | |
| CLASS_WEIGHT: 0.5 | |
| CAPTION: | |
| ENABLED: False | |
| PHRASE_PROB: 0.0 | |
| SIM_THRES: 0.95 | |
| CAPTIONING: | |
| ENABLED: False | |
| STEP: 50 | |
| RETRIEVAL: | |
| ENABLED: False | |
| DIM_IMG: 768 | |
| ENSEMBLE: True | |
| OPENIMAGE: | |
| ENABLED: False | |
| NEGATIVE_SAMPLES: 5 | |
| GROUNDING: | |
| ENABLED: False | |
| MAX_LEN: 5 | |
| DEEP_SUPERVISION: True | |
| NO_OBJECT_WEIGHT: 0.1 | |
| CLASS_WEIGHT: 4.0 | |
| MASK_WEIGHT: 5.0 | |
| DICE_WEIGHT: 5.0 | |
| BOX_WEIGHT: 5.0 | |
| GIOU_WEIGHT: 2.0 | |
| IOU_WEIGHT: 1.0 | |
| COST_CLASS_WEIGHT: 4.0 | |
| COST_DICE_WEIGHT: 5.0 | |
| COST_MASK_WEIGHT: 5.0 | |
| COST_BOX_WEIGHT: 5.0 | |
| COST_GIOU_WEIGHT: 2.0 | |
| HIDDEN_DIM: 256 | |
| NUM_OBJECT_QUERIES: 0 | |
| NHEADS: 8 | |
| DROPOUT: 0.0 | |
| DIM_FEEDFORWARD: 2048 | |
| ENC_LAYERS: 0 | |
| PRE_NORM: False | |
| ENFORCE_INPUT_PROJ: False | |
| SIZE_DIVISIBILITY: 32 | |
| DEC_LAYERS: 9 # 9 decoder layers, add one for the loss on learnable query | |
| TRAIN_NUM_POINTS: 12544 | |
| OVERSAMPLE_RATIO: 3.0 | |
| IMPORTANCE_SAMPLE_RATIO: 0.75 | |
| TWO_STAGE: False | |
| INITIALIZE_BOX_TYPE: 'no' | |
| DN: seg | |
| DN_NOISE_SCALE: 0.4 | |
| DN_NUM: 100 | |
| INITIAL_PRED: False | |
| LEARN_TGT: False | |
| TOTAL_NUM_FEATURE_LEVELS: 4 | |
| SEMANTIC_CE_LOSS: False | |
| PANO_BOX_LOSS: False | |
| COCO: False | |
| O365: False | |
| SAM: True | |
| PASCAL: False | |
| RE_POINT: True | |
| NUM_INTERACTIVE_TOKENS: 6 | |
| MAX_NUM_INSTANCE: 60 | |
| TEST: | |
| SEMANTIC_ON: True | |
| INSTANCE_ON: True | |
| PANOPTIC_ON: True | |
| BOX_INTERACTIVE: False | |
| CLASSIFICATION_ON: False | |
| OVERLAP_THRESHOLD: 0.8 | |
| OBJECT_MASK_THRESHOLD: 0.25 | |
| SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false | |
| TEST_FOUCUS_ON_BOX: False | |
| PANO_TRANSFORM_EVAL: True | |
| PANO_TEMPERATURE: 0.06 | |
| TEST: | |
| EVAL_PERIOD: 500000 | |
| PRECISE_BN: | |
| NUM_ITER: 1 | |
| ENABLED: False | |
| AUG: | |
| ENABLED: False | |
| SAM: | |
| INPUT: | |
| MIN_SIZE_TEST: 800 | |
| MAX_SIZE_TEST: 1333 | |
| IMAGE_SIZE: 1024 | |
| MIN_SCALE: 0.99 | |
| MAX_SCALE: 1.01 | |
| DATASET_MAPPER_NAME: "sam" | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: False | |
| SIZE_DIVISIBILITY: 32 | |
| RANDOM_FLIP: "horizontal" | |
| MASK_FORMAT: "polygon" | |
| FORMAT: "RGB" | |
| CROP: | |
| ENABLED: True | |
| DATASET: | |
| DATASET: 'sam' | |
| TEST: | |
| DETECTIONS_PER_IMAGE: 100 | |
| NAME: coco_eval | |
| IOU_TYPE: ['bbox', 'segm'] | |
| USE_MULTISCALE: false | |
| BATCH_SIZE_TOTAL: 8 | |
| MODEL_FILE: '' | |
| AUG: | |
| ENABLED: False | |
| TRAIN: | |
| BATCH_SIZE_TOTAL: 1 | |
| BATCH_SIZE_PER_GPU: 1 | |
| SHUFFLE: true | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 4 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| COCO: | |
| INPUT: | |
| MIN_SIZE_TEST: 800 | |
| MAX_SIZE_TEST: 1333 | |
| IMAGE_SIZE: 1024 | |
| MIN_SCALE: 0.1 | |
| MAX_SCALE: 2.0 | |
| DATASET_MAPPER_NAME: "coco_interactive_panoptic_lsj" | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: False | |
| SIZE_DIVISIBILITY: 32 | |
| RANDOM_FLIP: "horizontal" | |
| MASK_FORMAT: "polygon" | |
| FORMAT: "RGB" | |
| CROP: | |
| ENABLED: True | |
| DATASET: | |
| DATASET: 'coco' | |
| TEST: | |
| DETECTIONS_PER_IMAGE: 100 | |
| NAME: coco_eval | |
| IOU_TYPE: ['bbox', 'segm'] | |
| USE_MULTISCALE: false | |
| BATCH_SIZE_TOTAL: 1 | |
| MODEL_FILE: '' | |
| AUG: | |
| ENABLED: False | |
| TRAIN: | |
| BATCH_SIZE_TOTAL: 1 | |
| BATCH_SIZE_PER_GPU: 1 | |
| SHUFFLE: true | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 2 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| VLP: | |
| INPUT: | |
| IMAGE_SIZE: 224 | |
| DATASET_MAPPER_NAME: "vlpretrain" | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: False | |
| SIZE_DIVISIBILITY: 32 | |
| MASK_FORMAT: "polygon" | |
| FORMAT: "RGB" | |
| CROP: | |
| ENABLED: True | |
| TRAIN: | |
| BATCH_SIZE_TOTAL: 2 | |
| BATCH_SIZE_PER_GPU: 2 | |
| TEST: | |
| BATCH_SIZE_TOTAL: 256 | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 16 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| INPUT: | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| DATASETS: | |
| TRAIN: ["sam_train"] | |
| # interactive segmentation evaluation. | |
| TEST: ["coco_2017_val_panoptic_with_sem_seg_interactive_jointboxpoint"] | |
| # TEST: ["sam_minival"] | |
| CLASS_CONCAT: false | |
| SIZE_DIVISIBILITY: 32 | |
| PROPOSAL_FILES_TRAIN: [] | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 16 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| # Detectron2 training config for optimizer and lr scheduler | |
| SOLVER: | |
| BASE_LR_END: 0.0 | |
| MOMENTUM: 0.9 | |
| NESTEROV: False | |
| CHECKPOINT_PERIOD: 5000 | |
| IMS_PER_BATCH: 1 | |
| REFERENCE_WORLD_SIZE: 0 | |
| BIAS_LR_FACTOR: 1.0 | |
| WEIGHT_DECAY_BIAS: None | |
| # original | |
| BASE_LR: 0.0001 | |
| STEPS: [327778, 355092] | |
| MAX_ITER: 368750 | |
| GAMMA: 0.1 | |
| WARMUP_FACTOR: 1.0 | |
| WARMUP_ITERS: 10 | |
| WARMUP_METHOD: "linear" | |
| WEIGHT_DECAY: 0.05 | |
| OPTIMIZER: "ADAMW" | |
| LR_SCHEDULER_NAME: "WarmupMultiStepLR" | |
| LR_MULTIPLIER: | |
| backbone: 0.1 | |
| lang_encoder: 0.1 | |
| WEIGHT_DECAY_NORM: 0.0 | |
| WEIGHT_DECAY_EMBED: 0.0 | |
| CLIP_GRADIENTS: | |
| ENABLED: True | |
| CLIP_TYPE: "full_model" | |
| CLIP_VALUE: 0.01 | |
| NORM_TYPE: 2.0 | |
| AMP: | |
| ENABLED: True | |
| # Evaluation Dataset | |
| ADE20K: | |
| INPUT: | |
| MIN_SIZE_TRAIN: [320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280] | |
| MIN_SIZE_TRAIN_SAMPLING: "choice" | |
| MIN_SIZE_TEST: 640 | |
| MAX_SIZE_TRAIN: 2560 | |
| MAX_SIZE_TEST: 2560 | |
| MASK_FORMAT: "polygon" | |
| CROP: | |
| ENABLED: True | |
| TYPE: "absolute" | |
| SIZE: [640, 640] | |
| SINGLE_CATEGORY_MAX_AREA: 1.0 | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: True | |
| SIZE_DIVISIBILITY: 640 # used in dataset mapper | |
| DATASET_MAPPER_NAME: "mask_former_panoptic" | |
| FORMAT: "RGB" | |
| DATASET: | |
| DATASET: 'ade' | |
| TRAIN: | |
| ASPECT_RATIO_GROUPING: true | |
| BATCH_SIZE_TOTAL: 16 | |
| BATCH_SIZE_PER_GPU: 2 | |
| SHUFFLE: true | |
| TEST: | |
| DETECTIONS_PER_IMAGE: 100 | |
| NAME: coco_eval | |
| IOU_TYPE: ['bbox', 'segm'] | |
| USE_MULTISCALE: false | |
| BATCH_SIZE_TOTAL: 8 | |
| MODEL_FILE: '' | |
| AUG: | |
| ENABLED: False | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 8 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| #ADE20K: | |
| # INPUT: | |
| # MIN_SIZE_TRAIN: 640 | |
| # MIN_SIZE_TRAIN_SAMPLING: "choice" | |
| # MIN_SIZE_TEST: 640 | |
| # MAX_SIZE_TRAIN: 2560 | |
| # MAX_SIZE_TEST: 2560 | |
| # MASK_FORMAT: "polygon" | |
| # CROP: | |
| # ENABLED: True | |
| # TYPE: "absolute" | |
| # SIZE: (640, 640) | |
| # SINGLE_CATEGORY_MAX_AREA: 1.0 | |
| # COLOR_AUG_SSD: True | |
| # SIZE_DIVISIBILITY: 640 # used in dataset mapper | |
| # DATASET_MAPPER_NAME: "mask_former_panoptic" | |
| # FORMAT: "RGB" | |
| # DATASET: | |
| # DATASET: 'ade' | |
| # TEST: | |
| # BATCH_SIZE_TOTAL: 8 | |
| REF: | |
| INPUT: | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| MIN_SIZE_TEST: 512 | |
| MAX_SIZE_TEST: 1024 | |
| FORMAT: "RGB" | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 0 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: False | |
| TEST: | |
| BATCH_SIZE_TOTAL: 8 | |
| SUN: | |
| INPUT: | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| MIN_SIZE_TEST: 512 | |
| MAX_SIZE_TEST: 1024 | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 0 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: False | |
| TEST: | |
| BATCH_SIZE_TOTAL: 8 | |
| SCAN: | |
| INPUT: | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| MIN_SIZE_TEST: 512 | |
| MAX_SIZE_TEST: 1024 | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 0 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: False | |
| TEST: | |
| BATCH_SIZE_TOTAL: 8 | |
| BDD: | |
| INPUT: | |
| PIXEL_MEAN: [123.675, 116.280, 103.530] | |
| PIXEL_STD: [58.395, 57.120, 57.375] | |
| MIN_SIZE_TEST: 800 | |
| MAX_SIZE_TEST: 1333 | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 0 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: False | |
| TEST: | |
| BATCH_SIZE_TOTAL: 8 | |
| CITY: | |
| INPUT: | |
| MIN_SIZE_TRAIN: [ 512, 614, 716, 819, 921, 1024, 1126, 1228, 1331, 1433, 1536, 1638, 1740, 1843, 1945, 2048 ] | |
| MIN_SIZE_TRAIN_SAMPLING: "choice" | |
| MIN_SIZE_TEST: 1024 | |
| MAX_SIZE_TRAIN: 4096 | |
| MAX_SIZE_TEST: 2048 | |
| CROP: | |
| ENABLED: True | |
| TYPE: "absolute" | |
| SIZE: [ 512, 1024 ] | |
| SINGLE_CATEGORY_MAX_AREA: 1.0 | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: True | |
| SIZE_DIVISIBILITY: -1 | |
| FORMAT: "RGB" | |
| DATASET_MAPPER_NAME: "mask_former_panoptic" | |
| MASK_FORMAT: "polygon" | |
| TEST: | |
| EVAL_PERIOD: 5000 | |
| BATCH_SIZE_TOTAL: 1 | |
| AUG: | |
| ENABLED: False | |
| MIN_SIZES: [ 512, 768, 1024, 1280, 1536, 1792 ] | |
| MAX_SIZE: 4096 | |
| FLIP: True | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: True | |
| NUM_WORKERS: 2 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |
| TRAIN: | |
| ASPECT_RATIO_GROUPING: true | |
| BATCH_SIZE_TOTAL: 2 | |
| BATCH_SIZE_PER_GPU: 2 | |
| SHUFFLE: true | |
| PSACAL_PART: | |
| INPUT: | |
| MIN_SIZE_TEST: 800 | |
| MAX_SIZE_TEST: 1333 | |
| IMAGE_SIZE: 1024 | |
| MIN_SCALE: 0.1 | |
| MAX_SCALE: 2.0 | |
| DATASET_MAPPER_NAME: "pascal_part_lsj" | |
| IGNORE_VALUE: 255 | |
| COLOR_AUG_SSD: False | |
| SIZE_DIVISIBILITY: 32 | |
| RANDOM_FLIP: "horizontal" | |
| MASK_FORMAT: "polygon" | |
| FORMAT: "RGB" | |
| CROP: | |
| ENABLED: True | |
| MODEL: | |
| MASK_ON: True | |
| KEYPOINT_ON: False | |
| LOAD_PROPOSALS: False | |
| # DATASET: | |
| # DATASET: 'coco' | |
| TEST: | |
| DETECTIONS_PER_IMAGE: 100 | |
| NAME: coco_eval | |
| IOU_TYPE: ['bbox', 'segm'] | |
| USE_MULTISCALE: false | |
| BATCH_SIZE_TOTAL: 8 | |
| MODEL_FILE: '' | |
| AUG: | |
| ENABLED: False | |
| TRAIN: | |
| BATCH_SIZE_TOTAL: 1 | |
| BATCH_SIZE_PER_GPU: 1 | |
| SHUFFLE: true | |
| DATALOADER: | |
| FILTER_EMPTY_ANNOTATIONS: False | |
| NUM_WORKERS: 2 | |
| LOAD_PROPOSALS: False | |
| SAMPLER_TRAIN: "TrainingSampler" | |
| ASPECT_RATIO_GROUPING: True | |