diff --git a/configs/calibration_benchmark.yaml b/configs/calibration_benchmark.yaml new file mode 100644 index 0000000000000000000000000000000000000000..830e73f30e4ad2a2fd3e0b62f3dcaa7baebe5471 --- /dev/null +++ b/configs/calibration_benchmark.yaml @@ -0,0 +1,23 @@ +defaults: + - machine: aws + - model: default + - dataset: default + - _self_ + +output_dir: ${hydra:run.dir} +root_data_dir: ${machine.root_data_dir} +mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir} +root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir} +root_experiments_dir: ${machine.root_experiments_dir} +root_uniception_pretrained_checkpoints_dir: ${machine.root_uniception_pretrained_checkpoints_dir} + +### Benchmarking args +seed: 0 +# Disable CUDNN Benchmark (Disable for variable resolution & number of view training) +disable_cudnn_benchmark: true +# Batch size for inference (Metrics are computed per multi-view set and averaged, not per batch of multi-view sets) +batch_size: 20 +# Use mixed precision for inference +amp: 1 +# Floating point type to use for mixed precision +amp_dtype: "bf16" diff --git a/configs/dataset/ase_wai/default.yaml b/configs/dataset/ase_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/ase_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/ase_wai/train/default.yaml b/configs/dataset/ase_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f900e2b75ee2acb9256253bc753f3c355d61335 --- /dev/null +++ b/configs/dataset/ase_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ASEWAI( + split='${dataset.ase_wai.train.split}', + resolution=${dataset.ase_wai.train.dataset_resolution}, + principal_point_centered=${dataset.ase_wai.train.principal_point_centered}, + aug_crop=${dataset.ase_wai.train.aug_crop}, + transform='${dataset.ase_wai.train.transform}', + data_norm_type='${dataset.ase_wai.train.data_norm_type}', + ROOT='${dataset.ase_wai.train.ROOT}', + dataset_metadata_dir='${dataset.ase_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.ase_wai.train.overfit_num_sets}, + variable_num_views=${dataset.ase_wai.train.variable_num_views}, + num_views=${dataset.ase_wai.train.num_views}, + covisibility_thres=${dataset.ase_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/ase +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/ase_wai/val/default.yaml b/configs/dataset/ase_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55457816cb985e4a1a4d05a3826fffcfe10878dc --- /dev/null +++ b/configs/dataset/ase_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ASEWAI( + split='${dataset.ase_wai.val.split}', + resolution=${dataset.ase_wai.val.dataset_resolution}, + principal_point_centered=${dataset.ase_wai.val.principal_point_centered}, + seed=${dataset.ase_wai.val.seed}, + transform='${dataset.ase_wai.val.transform}', + data_norm_type='${dataset.ase_wai.val.data_norm_type}', + ROOT='${dataset.ase_wai.val.ROOT}', + dataset_metadata_dir='${dataset.ase_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.ase_wai.val.overfit_num_sets}, + variable_num_views=${dataset.ase_wai.val.variable_num_views}, + num_views=${dataset.ase_wai.val.num_views}, + covisibility_thres=${dataset.ase_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_ase} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/ase +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/benchmark_512_eth3d_snpp_tav2.yaml b/configs/dataset/benchmark_512_eth3d_snpp_tav2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..99c2db868664c48c66dbe46bc7907f7fbfd00f95 --- /dev/null +++ b/configs/dataset/benchmark_512_eth3d_snpp_tav2.yaml @@ -0,0 +1,20 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 2 + +# Test Resolution +resolution_test_eth3d: ${dataset.resolution_options.512_1_52_ar} +resolution_test_scannetpp: ${dataset.resolution_options.512_1_52_ar} +resolution_test_tav2_wb: ${dataset.resolution_options.512_1_00_ar} + +# Test Set +# Sample 10 multi-view sets from each scene +# ETH3D: 13 scenes +# ScanNet++V2: 30 scenes +# TartanAirV2-WB: 5 scenes +test_dataset: + "+ 130 @ ${dataset.eth3d_wai.test.dataset_str} + + 300 @ ${dataset.scannetpp_wai.test.dataset_str} + + 50 @ ${dataset.tav2_wb_wai.test.dataset_str}" diff --git a/configs/dataset/benchmark_518_eth3d_snpp_tav2.yaml b/configs/dataset/benchmark_518_eth3d_snpp_tav2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4af1b75495bca7d575a56d621f8e1dc529d30170 --- /dev/null +++ b/configs/dataset/benchmark_518_eth3d_snpp_tav2.yaml @@ -0,0 +1,20 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 2 + +# Test Resolution +resolution_test_eth3d: ${dataset.resolution_options.518_1_52_ar} +resolution_test_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_test_tav2_wb: ${dataset.resolution_options.518_1_00_ar} + +# Test Set +# Sample 10 multi-view sets from each scene +# ETH3D: 13 scenes +# ScanNet++V2: 30 scenes +# TartanAirV2-WB: 5 scenes +test_dataset: + "+ 130 @ ${dataset.eth3d_wai.test.dataset_str} + + 300 @ ${dataset.scannetpp_wai.test.dataset_str} + + 50 @ ${dataset.tav2_wb_wai.test.dataset_str}" diff --git a/configs/dataset/benchmark_sv_calib_518_many_ar_eth3d_snpp_tav2.yaml b/configs/dataset/benchmark_sv_calib_518_many_ar_eth3d_snpp_tav2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..67da8ece5612aa1cd46dd0ae93aa80cc0d8ff1b7 --- /dev/null +++ b/configs/dataset/benchmark_sv_calib_518_many_ar_eth3d_snpp_tav2.yaml @@ -0,0 +1,20 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 1 + +# Test Resolution +resolution_test_eth3d: ${dataset.resolution_options.518_many_ar} +resolution_test_scannetpp: ${dataset.resolution_options.518_many_ar} +resolution_test_tav2_wb: ${dataset.resolution_options.518_many_ar} + +# Test Set +# Sample 20 frames from each scene +# ETH3D: 13 scenes +# ScanNet++V2: 30 scenes +# TartanAirV2-WB: 5 scenes +test_dataset: + "+ 260 @ ${dataset.eth3d_wai.test.dataset_str} + + 600 @ ${dataset.scannetpp_wai.test.dataset_str} + + 100 @ ${dataset.tav2_wb_wai.test.dataset_str}" diff --git a/configs/dataset/blendedmvs_wai/default.yaml b/configs/dataset/blendedmvs_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/blendedmvs_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/blendedmvs_wai/train/default.yaml b/configs/dataset/blendedmvs_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63130dc49c81faf80aaf06c8ac545dac818b7746 --- /dev/null +++ b/configs/dataset/blendedmvs_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "BlendedMVSWAI( + split='${dataset.blendedmvs_wai.train.split}', + resolution=${dataset.blendedmvs_wai.train.dataset_resolution}, + principal_point_centered=${dataset.blendedmvs_wai.train.principal_point_centered}, + aug_crop=${dataset.blendedmvs_wai.train.aug_crop}, + transform='${dataset.blendedmvs_wai.train.transform}', + data_norm_type='${dataset.blendedmvs_wai.train.data_norm_type}', + ROOT='${dataset.blendedmvs_wai.train.ROOT}', + dataset_metadata_dir='${dataset.blendedmvs_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.blendedmvs_wai.train.overfit_num_sets}, + variable_num_views=${dataset.blendedmvs_wai.train.variable_num_views}, + num_views=${dataset.blendedmvs_wai.train.num_views}, + covisibility_thres=${dataset.blendedmvs_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/blendedmvs +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/blendedmvs_wai/val/default.yaml b/configs/dataset/blendedmvs_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9266a43316b8e278ce805478ea93d9fce585ec0a --- /dev/null +++ b/configs/dataset/blendedmvs_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "BlendedMVSWAI( + split='${dataset.blendedmvs_wai.val.split}', + resolution=${dataset.blendedmvs_wai.val.dataset_resolution}, + principal_point_centered=${dataset.blendedmvs_wai.val.principal_point_centered}, + seed=${dataset.blendedmvs_wai.val.seed}, + transform='${dataset.blendedmvs_wai.val.transform}', + data_norm_type='${dataset.blendedmvs_wai.val.data_norm_type}', + ROOT='${dataset.blendedmvs_wai.val.ROOT}', + dataset_metadata_dir='${dataset.blendedmvs_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.blendedmvs_wai.val.overfit_num_sets}, + variable_num_views=${dataset.blendedmvs_wai.val.variable_num_views}, + num_views=${dataset.blendedmvs_wai.val.num_views}, + covisibility_thres=${dataset.blendedmvs_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_blendedmvs} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/blendedmvs +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/bmvs_518_many_ar_48ipg_8g.yaml b/configs/dataset/bmvs_518_many_ar_48ipg_8g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8aae5ec4353558a5c9893f55d3df4f0fc82fd939 --- /dev/null +++ b/configs/dataset/bmvs_518_many_ar_48ipg_8g.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} + +# Training Set +train_dataset: + "+ 140_000 @ ${dataset.blendedmvs_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str}" diff --git a/configs/dataset/default.yaml b/configs/dataset/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..828c780302408299b435f35fe47478105a4725f5 --- /dev/null +++ b/configs/dataset/default.yaml @@ -0,0 +1,39 @@ +defaults: + - resolution_options: default + - ase_wai: default + - blendedmvs_wai: default + - dl3dv_wai: default + - dynamicreplica_wai: default + - eth3d_wai: default + - megadepth_wai: default + - mpsd_wai: default + - mvs_synth_wai: default + - paralleldomain4d_wai: default + - sailvos3d_wai: default + - scannetpp_wai: default + - spring_wai: default + - tav2_wb_wai: default + - unrealstereo4k_wai: default + +# Training Set, For example: BlendedMVS(split='train', resolution=(512, 384), transform=...) +train_dataset: ??? +# Validation Set +test_dataset: "[null]" +# Number of workers for dataloader +num_workers: 12 +# Default resolution for training +resolution_train: ??? +# Default resolution for validation +resolution_val: ??? +# Number of views parameter for multi-view datasets +num_views: 2 +# Use a centered principal point for all images +principal_point_centered: false +# Default config for multi-view datasets +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true +val: + variable_num_views: false +test: + variable_num_views: false diff --git a/configs/dataset/dl3dv_wai/default.yaml b/configs/dataset/dl3dv_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/dl3dv_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/dl3dv_wai/train/default.yaml b/configs/dataset/dl3dv_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ebbdc50856eaebef61a016b0c37e8301dbc541f --- /dev/null +++ b/configs/dataset/dl3dv_wai/train/default.yaml @@ -0,0 +1,28 @@ +dataset_str: + "DL3DVWAI( + split='${dataset.dl3dv_wai.train.split}', + resolution=${dataset.dl3dv_wai.train.dataset_resolution}, + principal_point_centered=${dataset.dl3dv_wai.train.principal_point_centered}, + aug_crop=${dataset.dl3dv_wai.train.aug_crop}, + transform='${dataset.dl3dv_wai.train.transform}', + data_norm_type='${dataset.dl3dv_wai.train.data_norm_type}', + ROOT='${dataset.dl3dv_wai.train.ROOT}', + dataset_metadata_dir='${dataset.dl3dv_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.dl3dv_wai.train.overfit_num_sets}, + variable_num_views=${dataset.dl3dv_wai.train.variable_num_views}, + num_views=${dataset.dl3dv_wai.train.num_views}, + covisibility_thres=${dataset.dl3dv_wai.train.covisibility_thres}, + mvs_confidence_filter_thres=${dataset.dl3dv_wai.train.mvs_confidence_filter_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/dl3dv +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 +mvs_confidence_filter_thres: 0.25 diff --git a/configs/dataset/dl3dv_wai/val/default.yaml b/configs/dataset/dl3dv_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..53f1d7d8d2ab1bb7849aae819d8d50d9c462ad2f --- /dev/null +++ b/configs/dataset/dl3dv_wai/val/default.yaml @@ -0,0 +1,28 @@ +dataset_str: + "DL3DVWAI( + split='${dataset.dl3dv_wai.val.split}', + resolution=${dataset.dl3dv_wai.val.dataset_resolution}, + principal_point_centered=${dataset.dl3dv_wai.val.principal_point_centered}, + seed=${dataset.dl3dv_wai.val.seed}, + transform='${dataset.dl3dv_wai.val.transform}', + data_norm_type='${dataset.dl3dv_wai.val.data_norm_type}', + ROOT='${dataset.dl3dv_wai.val.ROOT}', + dataset_metadata_dir='${dataset.dl3dv_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.dl3dv_wai.val.overfit_num_sets}, + variable_num_views=${dataset.dl3dv_wai.val.variable_num_views}, + num_views=${dataset.dl3dv_wai.val.num_views}, + covisibility_thres=${dataset.dl3dv_wai.val.covisibility_thres}, + mvs_confidence_filter_thres=${dataset.dl3dv_wai.val.mvs_confidence_filter_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_dl3dv} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/dl3dv +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 +mvs_confidence_filter_thres: 0.25 diff --git a/configs/dataset/dynamicreplica_wai/default.yaml b/configs/dataset/dynamicreplica_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/dynamicreplica_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/dynamicreplica_wai/train/default.yaml b/configs/dataset/dynamicreplica_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..62efc9b7cc342a46d2e74ae53969daf240caad83 --- /dev/null +++ b/configs/dataset/dynamicreplica_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "DynamicReplicaWAI( + split='${dataset.dynamicreplica_wai.train.split}', + resolution=${dataset.dynamicreplica_wai.train.dataset_resolution}, + principal_point_centered=${dataset.dynamicreplica_wai.train.principal_point_centered}, + aug_crop=${dataset.dynamicreplica_wai.train.aug_crop}, + transform='${dataset.dynamicreplica_wai.train.transform}', + data_norm_type='${dataset.dynamicreplica_wai.train.data_norm_type}', + ROOT='${dataset.dynamicreplica_wai.train.ROOT}', + dataset_metadata_dir='${dataset.dynamicreplica_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.dynamicreplica_wai.train.overfit_num_sets}, + variable_num_views=${dataset.dynamicreplica_wai.train.variable_num_views}, + num_views=${dataset.dynamicreplica_wai.train.num_views}, + covisibility_thres=${dataset.dynamicreplica_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/dynamicreplica +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/dynamicreplica_wai/val/default.yaml b/configs/dataset/dynamicreplica_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64f74ac5c6bf48beeccb656b27a4a4c7b05ce490 --- /dev/null +++ b/configs/dataset/dynamicreplica_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "DynamicReplicaWAI( + split='${dataset.dynamicreplica_wai.val.split}', + resolution=${dataset.dynamicreplica_wai.val.dataset_resolution}, + principal_point_centered=${dataset.dynamicreplica_wai.val.principal_point_centered}, + seed=${dataset.dynamicreplica_wai.val.seed}, + transform='${dataset.dynamicreplica_wai.val.transform}', + data_norm_type='${dataset.dynamicreplica_wai.val.data_norm_type}', + ROOT='${dataset.dynamicreplica_wai.val.ROOT}', + dataset_metadata_dir='${dataset.dynamicreplica_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.dynamicreplica_wai.val.overfit_num_sets}, + variable_num_views=${dataset.dynamicreplica_wai.val.variable_num_views}, + num_views=${dataset.dynamicreplica_wai.val.num_views}, + covisibility_thres=${dataset.dynamicreplica_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_dynamicreplica} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/dynamicreplica +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/eth3d_wai/default.yaml b/configs/dataset/eth3d_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1da6ae479fdbefc1598057293f4d5966e6aefd51 --- /dev/null +++ b/configs/dataset/eth3d_wai/default.yaml @@ -0,0 +1,2 @@ +defaults: + - test: default diff --git a/configs/dataset/eth3d_wai/test/default.yaml b/configs/dataset/eth3d_wai/test/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fca5b2b491e50ed84ba56e5e540afc9dd18302bd --- /dev/null +++ b/configs/dataset/eth3d_wai/test/default.yaml @@ -0,0 +1,22 @@ +dataset_str: + "ETH3DWAI( + resolution=${dataset.eth3d_wai.test.dataset_resolution}, + principal_point_centered=${dataset.eth3d_wai.test.principal_point_centered}, + seed=${dataset.eth3d_wai.test.seed}, + transform='${dataset.eth3d_wai.test.transform}', + data_norm_type='${dataset.eth3d_wai.test.data_norm_type}', + ROOT='${dataset.eth3d_wai.test.ROOT}', + dataset_metadata_dir='${dataset.eth3d_wai.test.dataset_metadata_dir}', + variable_num_views=${dataset.eth3d_wai.test.variable_num_views}, + num_views=${dataset.eth3d_wai.test.num_views}, + covisibility_thres=${dataset.eth3d_wai.test.covisibility_thres})" +dataset_resolution: ${dataset.resolution_test_eth3d} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/eth3d +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +variable_num_views: ${dataset.test.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.025 diff --git a/configs/dataset/megadepth_wai/default.yaml b/configs/dataset/megadepth_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/megadepth_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/megadepth_wai/train/default.yaml b/configs/dataset/megadepth_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a635383222cc8f505c7de8d9d8720bb550c7ceba --- /dev/null +++ b/configs/dataset/megadepth_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MegaDepthWAI( + split='${dataset.megadepth_wai.train.split}', + resolution=${dataset.megadepth_wai.train.dataset_resolution}, + principal_point_centered=${dataset.megadepth_wai.train.principal_point_centered}, + aug_crop=${dataset.megadepth_wai.train.aug_crop}, + transform='${dataset.megadepth_wai.train.transform}', + data_norm_type='${dataset.megadepth_wai.train.data_norm_type}', + ROOT='${dataset.megadepth_wai.train.ROOT}', + dataset_metadata_dir='${dataset.megadepth_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.megadepth_wai.train.overfit_num_sets}, + variable_num_views=${dataset.megadepth_wai.train.variable_num_views}, + num_views=${dataset.megadepth_wai.train.num_views}, + covisibility_thres=${dataset.megadepth_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/megadepth +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/megadepth_wai/val/default.yaml b/configs/dataset/megadepth_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1d40ed64bd64ca081192ca8973afb8feba1c65b4 --- /dev/null +++ b/configs/dataset/megadepth_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MegaDepthWAI( + split='${dataset.megadepth_wai.val.split}', + resolution=${dataset.megadepth_wai.val.dataset_resolution}, + principal_point_centered=${dataset.megadepth_wai.val.principal_point_centered}, + seed=${dataset.megadepth_wai.val.seed}, + transform='${dataset.megadepth_wai.val.transform}', + data_norm_type='${dataset.megadepth_wai.val.data_norm_type}', + ROOT='${dataset.megadepth_wai.val.ROOT}', + dataset_metadata_dir='${dataset.megadepth_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.megadepth_wai.val.overfit_num_sets}, + variable_num_views=${dataset.megadepth_wai.val.variable_num_views}, + num_views=${dataset.megadepth_wai.val.num_views}, + covisibility_thres=${dataset.megadepth_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_megadepth} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/megadepth +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/megatrain_13d_512_many_ar_24ipg_16g.yaml b/configs/dataset/megatrain_13d_512_many_ar_24ipg_16g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c4cccb89d934132b1790b4f468d6d80c56d6827a --- /dev/null +++ b/configs/dataset/megatrain_13d_512_many_ar_24ipg_16g.yaml @@ -0,0 +1,59 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.512_many_ar} + +# Validation Resolution +resolution_val_ase: ${dataset.resolution_options.512_1_00_ar} +resolution_val_blendedmvs: ${dataset.resolution_options.512_1_33_ar} +resolution_val_dl3dv: ${dataset.resolution_options.512_1_77_ar} +resolution_val_dynamicreplica: ${dataset.resolution_options.512_1_77_ar} +resolution_val_megadepth: ${dataset.resolution_options.512_1_52_ar} +resolution_val_mpsd: ${dataset.resolution_options.512_1_77_ar} +resolution_val_mvs_synth: ${dataset.resolution_options.512_1_77_ar} +resolution_val_paralleldomain4d: ${dataset.resolution_options.512_1_33_ar} +resolution_val_sailvos3d: ${dataset.resolution_options.512_1_52_ar} +resolution_val_scannetpp: ${dataset.resolution_options.512_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.512_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.512_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.512_1_77_ar} + +# Training Set +train_dataset: + "+ 52_500 @ ${dataset.ase_wai.train.dataset_str} + + 52_500 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 52_500 @ ${dataset.dl3dv_wai.train.dataset_str} + + 40_000 @ ${dataset.dynamicreplica_wai.train.dataset_str} + + 52_500 @ ${dataset.megadepth_wai.train.dataset_str} + + 52_500 @ ${dataset.mpsd_wai.train.dataset_str} + + 52_500 @ ${dataset.mvs_synth_wai.train.dataset_str} + + 52_500 @ ${dataset.paralleldomain4d_wai.train.dataset_str} + + 52_500 @ ${dataset.sailvos3d_wai.train.dataset_str} + + 52_500 @ ${dataset.scannetpp_wai.train.dataset_str} + + 2_000 @ ${dataset.spring_wai.train.dataset_str} + + 52_500 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 5_500 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.ase_wai.val.dataset_str} + + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str} + + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str} + + 4_000 @ ${dataset.megadepth_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str} + + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str} + + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_13d_518_many_ar_24ipg_16g.yaml b/configs/dataset/megatrain_13d_518_many_ar_24ipg_16g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c65bc4ebe5a8b22c79bfdf0e67fdcc95e068d75 --- /dev/null +++ b/configs/dataset/megatrain_13d_518_many_ar_24ipg_16g.yaml @@ -0,0 +1,59 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_ase: ${dataset.resolution_options.518_1_00_ar} +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar} +resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar} +resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar} +resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar} +resolution_val_sailvos3d: ${dataset.resolution_options.518_1_52_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 52_500 @ ${dataset.ase_wai.train.dataset_str} + + 52_500 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 52_500 @ ${dataset.dl3dv_wai.train.dataset_str} + + 40_000 @ ${dataset.dynamicreplica_wai.train.dataset_str} + + 52_500 @ ${dataset.megadepth_wai.train.dataset_str} + + 52_500 @ ${dataset.mpsd_wai.train.dataset_str} + + 52_500 @ ${dataset.mvs_synth_wai.train.dataset_str} + + 52_500 @ ${dataset.paralleldomain4d_wai.train.dataset_str} + + 52_500 @ ${dataset.sailvos3d_wai.train.dataset_str} + + 52_500 @ ${dataset.scannetpp_wai.train.dataset_str} + + 2_000 @ ${dataset.spring_wai.train.dataset_str} + + 52_500 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 5_500 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.ase_wai.val.dataset_str} + + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str} + + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str} + + 4_000 @ ${dataset.megadepth_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str} + + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str} + + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_13d_518_many_ar_24ipg_8g.yaml b/configs/dataset/megatrain_13d_518_many_ar_24ipg_8g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10a5c74b6a6c1fd8ccdcdda090f9b4eb74fe6e9f --- /dev/null +++ b/configs/dataset/megatrain_13d_518_many_ar_24ipg_8g.yaml @@ -0,0 +1,59 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_ase: ${dataset.resolution_options.518_1_00_ar} +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar} +resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar} +resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar} +resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar} +resolution_val_sailvos3d: ${dataset.resolution_options.518_1_52_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 26_250 @ ${dataset.ase_wai.train.dataset_str} + + 26_250 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 26_250 @ ${dataset.dl3dv_wai.train.dataset_str} + + 20_000 @ ${dataset.dynamicreplica_wai.train.dataset_str} + + 26_250 @ ${dataset.megadepth_wai.train.dataset_str} + + 26_250 @ ${dataset.mpsd_wai.train.dataset_str} + + 26_250 @ ${dataset.mvs_synth_wai.train.dataset_str} + + 26_250 @ ${dataset.paralleldomain4d_wai.train.dataset_str} + + 26_250 @ ${dataset.sailvos3d_wai.train.dataset_str} + + 26_250 @ ${dataset.scannetpp_wai.train.dataset_str} + + 1_000 @ ${dataset.spring_wai.train.dataset_str} + + 26_250 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 2_750 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.ase_wai.val.dataset_str} + + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str} + + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str} + + 4_000 @ ${dataset.megadepth_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str} + + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str} + + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_13d_518_many_ar_48ipg_64g.yaml b/configs/dataset/megatrain_13d_518_many_ar_48ipg_64g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d916f2a738e4cc0a9fc9c2df673509066b89581d --- /dev/null +++ b/configs/dataset/megatrain_13d_518_many_ar_48ipg_64g.yaml @@ -0,0 +1,59 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_ase: ${dataset.resolution_options.518_1_00_ar} +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar} +resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar} +resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar} +resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar} +resolution_val_sailvos3d: ${dataset.resolution_options.518_1_52_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 420_000 @ ${dataset.ase_wai.train.dataset_str} + + 420_000 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 420_000 @ ${dataset.dl3dv_wai.train.dataset_str} + + 320_000 @ ${dataset.dynamicreplica_wai.train.dataset_str} + + 420_000 @ ${dataset.megadepth_wai.train.dataset_str} + + 420_000 @ ${dataset.mpsd_wai.train.dataset_str} + + 420_000 @ ${dataset.mvs_synth_wai.train.dataset_str} + + 420_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str} + + 420_000 @ ${dataset.sailvos3d_wai.train.dataset_str} + + 420_000 @ ${dataset.scannetpp_wai.train.dataset_str} + + 16_000 @ ${dataset.spring_wai.train.dataset_str} + + 420_000 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 44_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.ase_wai.val.dataset_str} + + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str} + + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str} + + 4_000 @ ${dataset.megadepth_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str} + + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str} + + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_13d_518_many_ar_48ipg_8g_mono.yaml b/configs/dataset/megatrain_13d_518_many_ar_48ipg_8g_mono.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b9393fc3a45e1dc8410d6a2e7899c0dd42d27343 --- /dev/null +++ b/configs/dataset/megatrain_13d_518_many_ar_48ipg_8g_mono.yaml @@ -0,0 +1,59 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 1 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_ase: ${dataset.resolution_options.518_1_00_ar} +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_dl3dv: ${dataset.resolution_options.518_1_77_ar} +resolution_val_dynamicreplica: ${dataset.resolution_options.518_1_77_ar} +resolution_val_megadepth: ${dataset.resolution_options.518_1_52_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_mvs_synth: ${dataset.resolution_options.518_1_77_ar} +resolution_val_paralleldomain4d: ${dataset.resolution_options.518_1_33_ar} +resolution_val_sailvos3d: ${dataset.resolution_options.518_1_52_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 105_000 @ ${dataset.ase_wai.train.dataset_str} + + 105_000 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 105_000 @ ${dataset.dl3dv_wai.train.dataset_str} + + 80_000 @ ${dataset.dynamicreplica_wai.train.dataset_str} + + 105_000 @ ${dataset.megadepth_wai.train.dataset_str} + + 105_000 @ ${dataset.mpsd_wai.train.dataset_str} + + 105_000 @ ${dataset.mvs_synth_wai.train.dataset_str} + + 105_000 @ ${dataset.paralleldomain4d_wai.train.dataset_str} + + 105_000 @ ${dataset.sailvos3d_wai.train.dataset_str} + + 105_000 @ ${dataset.scannetpp_wai.train.dataset_str} + + 4_000 @ ${dataset.spring_wai.train.dataset_str} + + 105_000 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 11_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.ase_wai.val.dataset_str} + + 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.dl3dv_wai.val.dataset_str} + + 4_000 @ ${dataset.dynamicreplica_wai.val.dataset_str} + + 4_000 @ ${dataset.megadepth_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.mvs_synth_wai.val.dataset_str} + + 4_000 @ ${dataset.paralleldomain4d_wai.val.dataset_str} + + 4_000 @ ${dataset.sailvos3d_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_6d_518_many_ar_48ipg_64g.yaml b/configs/dataset/megatrain_6d_518_many_ar_48ipg_64g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..199d2637875dec96d629358b4ecbc549a72360f7 --- /dev/null +++ b/configs/dataset/megatrain_6d_518_many_ar_48ipg_64g.yaml @@ -0,0 +1,38 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 1_120_000 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 1_120_000 @ ${dataset.mpsd_wai.train.dataset_str} + + 1_120_000 @ ${dataset.scannetpp_wai.train.dataset_str} + + 44_000 @ ${dataset.spring_wai.train.dataset_str} + + 1_120_000 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 116_000 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/megatrain_6d_518_many_ar_48ipg_8g.yaml b/configs/dataset/megatrain_6d_518_many_ar_48ipg_8g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e83c5f2faeac7c7c081ee9fea1e6c0867c2d90e2 --- /dev/null +++ b/configs/dataset/megatrain_6d_518_many_ar_48ipg_8g.yaml @@ -0,0 +1,38 @@ +defaults: + - default + +# Number of views parameter for the multi-view datasets +num_views: 4 + +train: + # If True, the number of views can vary from batch to batch. The maximum number of views is num_views and minimum is 2. (On by default for N-view training) + variable_num_views: true + +# Train Resolution +resolution_train: ${dataset.resolution_options.518_many_ar} + +# Validation Resolution +resolution_val_blendedmvs: ${dataset.resolution_options.518_1_33_ar} +resolution_val_mpsd: ${dataset.resolution_options.518_1_77_ar} +resolution_val_scannetpp: ${dataset.resolution_options.518_1_52_ar} +resolution_val_spring: ${dataset.resolution_options.518_1_77_ar} +resolution_val_tav2_wb: ${dataset.resolution_options.518_1_00_ar} +resolution_val_unrealstereo4k: ${dataset.resolution_options.518_1_77_ar} + +# Training Set +train_dataset: + "+ 140_000 @ ${dataset.blendedmvs_wai.train.dataset_str} + + 140_000 @ ${dataset.mpsd_wai.train.dataset_str} + + 140_000 @ ${dataset.scannetpp_wai.train.dataset_str} + + 5_500 @ ${dataset.spring_wai.train.dataset_str} + + 140_000 @ ${dataset.tav2_wb_wai.train.dataset_str} + + 14_500 @ ${dataset.unrealstereo4k_wai.train.dataset_str}" + +# Validation Set +test_dataset: + "+ 4_000 @ ${dataset.blendedmvs_wai.val.dataset_str} + + 4_000 @ ${dataset.mpsd_wai.val.dataset_str} + + 4_000 @ ${dataset.scannetpp_wai.val.dataset_str} + + 500 @ ${dataset.spring_wai.val.dataset_str} + + 4_000 @ ${dataset.tav2_wb_wai.val.dataset_str} + + 500 @ ${dataset.unrealstereo4k_wai.val.dataset_str}" diff --git a/configs/dataset/mpsd_wai/default.yaml b/configs/dataset/mpsd_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/mpsd_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/mpsd_wai/train/default.yaml b/configs/dataset/mpsd_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f865279aaeb52a9ae41dd1004fbc2d544facade3 --- /dev/null +++ b/configs/dataset/mpsd_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MPSDWAI( + split='${dataset.mpsd_wai.train.split}', + resolution=${dataset.mpsd_wai.train.dataset_resolution}, + principal_point_centered=${dataset.mpsd_wai.train.principal_point_centered}, + aug_crop=${dataset.mpsd_wai.train.aug_crop}, + transform='${dataset.mpsd_wai.train.transform}', + data_norm_type='${dataset.mpsd_wai.train.data_norm_type}', + ROOT='${dataset.mpsd_wai.train.ROOT}', + dataset_metadata_dir='${dataset.mpsd_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.mpsd_wai.train.overfit_num_sets}, + variable_num_views=${dataset.mpsd_wai.train.variable_num_views}, + num_views=${dataset.mpsd_wai.train.num_views}, + covisibility_thres=${dataset.mpsd_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/mpsd +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.15 diff --git a/configs/dataset/mpsd_wai/val/default.yaml b/configs/dataset/mpsd_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6878f29f779791bd59a5d7e6c9733d93c14ca2f1 --- /dev/null +++ b/configs/dataset/mpsd_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MPSDWAI( + split='${dataset.mpsd_wai.val.split}', + resolution=${dataset.mpsd_wai.val.dataset_resolution}, + principal_point_centered=${dataset.mpsd_wai.val.principal_point_centered}, + seed=${dataset.mpsd_wai.val.seed}, + transform='${dataset.mpsd_wai.val.transform}', + data_norm_type='${dataset.mpsd_wai.val.data_norm_type}', + ROOT='${dataset.mpsd_wai.val.ROOT}', + dataset_metadata_dir='${dataset.mpsd_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.mpsd_wai.val.overfit_num_sets}, + variable_num_views=${dataset.mpsd_wai.val.variable_num_views}, + num_views=${dataset.mpsd_wai.val.num_views}, + covisibility_thres=${dataset.mpsd_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_mpsd} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/mpsd +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.15 diff --git a/configs/dataset/mvs_synth_wai/default.yaml b/configs/dataset/mvs_synth_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/mvs_synth_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/mvs_synth_wai/train/default.yaml b/configs/dataset/mvs_synth_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c44c9ac70103da4a220c7145611033b369a29ba7 --- /dev/null +++ b/configs/dataset/mvs_synth_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MVSSynthWAI( + split='${dataset.mvs_synth_wai.train.split}', + resolution=${dataset.mvs_synth_wai.train.dataset_resolution}, + principal_point_centered=${dataset.mvs_synth_wai.train.principal_point_centered}, + aug_crop=${dataset.mvs_synth_wai.train.aug_crop}, + transform='${dataset.mvs_synth_wai.train.transform}', + data_norm_type='${dataset.mvs_synth_wai.train.data_norm_type}', + ROOT='${dataset.mvs_synth_wai.train.ROOT}', + dataset_metadata_dir='${dataset.mvs_synth_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.mvs_synth_wai.train.overfit_num_sets}, + variable_num_views=${dataset.mvs_synth_wai.train.variable_num_views}, + num_views=${dataset.mvs_synth_wai.train.num_views}, + covisibility_thres=${dataset.mvs_synth_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/mvs_synth +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/mvs_synth_wai/val/default.yaml b/configs/dataset/mvs_synth_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7cfb7f2629a4a8cab43aa2105f1988b0f87bf421 --- /dev/null +++ b/configs/dataset/mvs_synth_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "MVSSynthWAI( + split='${dataset.mvs_synth_wai.val.split}', + resolution=${dataset.mvs_synth_wai.val.dataset_resolution}, + principal_point_centered=${dataset.mvs_synth_wai.val.principal_point_centered}, + seed=${dataset.mvs_synth_wai.val.seed}, + transform='${dataset.mvs_synth_wai.val.transform}', + data_norm_type='${dataset.mvs_synth_wai.val.data_norm_type}', + ROOT='${dataset.mvs_synth_wai.val.ROOT}', + dataset_metadata_dir='${dataset.mvs_synth_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.mvs_synth_wai.val.overfit_num_sets}, + variable_num_views=${dataset.mvs_synth_wai.val.variable_num_views}, + num_views=${dataset.mvs_synth_wai.val.num_views}, + covisibility_thres=${dataset.mvs_synth_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_mvs_synth} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/mvs_synth +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/paralleldomain4d_wai/default.yaml b/configs/dataset/paralleldomain4d_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/paralleldomain4d_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/paralleldomain4d_wai/train/default.yaml b/configs/dataset/paralleldomain4d_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58b07d8d2cf00d71f567c425c7708119aa9fd985 --- /dev/null +++ b/configs/dataset/paralleldomain4d_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ParallelDomain4DWAI( + split='${dataset.paralleldomain4d_wai.train.split}', + resolution=${dataset.paralleldomain4d_wai.train.dataset_resolution}, + principal_point_centered=${dataset.paralleldomain4d_wai.train.principal_point_centered}, + aug_crop=${dataset.paralleldomain4d_wai.train.aug_crop}, + transform='${dataset.paralleldomain4d_wai.train.transform}', + data_norm_type='${dataset.paralleldomain4d_wai.train.data_norm_type}', + ROOT='${dataset.paralleldomain4d_wai.train.ROOT}', + dataset_metadata_dir='${dataset.paralleldomain4d_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.paralleldomain4d_wai.train.overfit_num_sets}, + variable_num_views=${dataset.paralleldomain4d_wai.train.variable_num_views}, + num_views=${dataset.paralleldomain4d_wai.train.num_views}, + covisibility_thres=${dataset.paralleldomain4d_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/paralleldomain4d +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/paralleldomain4d_wai/val/default.yaml b/configs/dataset/paralleldomain4d_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..556d8cc57280fb786c5574cb77b8f46f426bd186 --- /dev/null +++ b/configs/dataset/paralleldomain4d_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ParallelDomain4DWAI( + split='${dataset.paralleldomain4d_wai.val.split}', + resolution=${dataset.paralleldomain4d_wai.val.dataset_resolution}, + principal_point_centered=${dataset.paralleldomain4d_wai.val.principal_point_centered}, + seed=${dataset.paralleldomain4d_wai.val.seed}, + transform='${dataset.paralleldomain4d_wai.val.transform}', + data_norm_type='${dataset.paralleldomain4d_wai.val.data_norm_type}', + ROOT='${dataset.paralleldomain4d_wai.val.ROOT}', + dataset_metadata_dir='${dataset.paralleldomain4d_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.paralleldomain4d_wai.val.overfit_num_sets}, + variable_num_views=${dataset.paralleldomain4d_wai.val.variable_num_views}, + num_views=${dataset.paralleldomain4d_wai.val.num_views}, + covisibility_thres=${dataset.paralleldomain4d_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_paralleldomain4d} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/paralleldomain4d +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/resolution_options/default.yaml b/configs/dataset/resolution_options/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..942fe9efe3b1bb8eb92f99d3450ebbb25826c99f --- /dev/null +++ b/configs/dataset/resolution_options/default.yaml @@ -0,0 +1,77 @@ +518_many_ar: '[(518, 518), (518, 392), (518, 336), (518, 294), (518, 252), (518, 168), (392, 518), (336, 518), (294, 518), (252, 518)]' +518_many_landscape_ar: '[(518, 518), (518, 392), (518, 336), (518, 294), (518, 252), (518, 168)]' +518_many_non_square_landscape_ar: '[(518, 392), (518, 336), (518, 294), (518, 252), (518, 168)]' +518_0_50_ar: (252, 518) # 1:2 +518_0_56_ar: (294, 518) # 9:16 +518_0_66_ar: (336, 518) # 2:3 +518_0_75_ar: (392, 518) # 3:4 +518_1_00_ar: (518, 518) # 1:1 +518_1_33_ar: (518, 392) # 4:3 +518_1_52_ar: (518, 336) # 3:2 +518_1_77_ar: (518, 294) # 16:9 +518_2_00_ar: (518, 252) # 2:1 +518_3_20_ar: (518, 168) # 3.2:1 +512_many_ar: '[(512, 512), (512, 384), (512, 336), (512, 288), (512, 256), (512, 160), (384, 512), (336, 512), (288, 512), (256, 512)]' +512_many_landscape_ar: '[(512, 512), (512, 384), (512, 336), (512, 288), (512, 256), (512, 160)]' +512_many_non_square_landscape_ar: '[(512, 384), (512, 336), (512, 288), (512, 256), (512, 160)]' +512_0_50_ar: (256, 512) +512_0_56_ar: (288, 512) +512_0_66_ar: (336, 512) +512_0_75_ar: (384, 512) +512_1_00_ar: (512, 512) +512_1_33_ar: (512, 384) +512_1_52_ar: (512, 336) +512_1_77_ar: (512, 288) +512_2_00_ar: (512, 256) +512_3_20_ar: (512, 160) +504_many_ar: '[(504, 504), (504, 378), (504, 322), (504, 280), (504, 238), (504, 154), (378, 504), (322, 504), (280, 504), (238, 504)]' +504_many_landscape_ar: '[(504, 504), (504, 378), (504, 322), (504, 280), (504, 238), (504, 154)]' +504_many_non_square_landscape_ar: '[(504, 378), (504, 322), (504, 280), (504, 238), (504, 154)]' +504_0_50_ar: (238, 504) +504_0_56_ar: (280, 504) +504_0_66_ar: (322, 504) +504_0_75_ar: (378, 504) +504_1_00_ar: (504, 504) +504_1_33_ar: (504, 378) +504_1_52_ar: (504, 322) +504_1_77_ar: (504, 280) +504_2_00_ar: (504, 238) +504_3_20_ar: (504, 154) +448_many_ar: '[(448, 448), (448, 336), (448, 294), (448, 252), (448, 224), (448, 140), (336, 448), (294, 448), (252, 448), (224, 448)]' +448_many_landscape_ar: '[(448, 448), (448, 336), (448, 294), (448, 252), (448, 224), (448, 140)]' +448_many_non_square_landscape_ar: '[(448, 336), (448, 294), (448, 252), (448, 224), (448, 140)]' +448_0_50_ar: (224, 448) +448_0_56_ar: (252, 448) +448_0_66_ar: (294, 448) +448_0_75_ar: (336, 448) +448_1_00_ar: (448, 448) +448_1_33_ar: (448, 336) +448_1_52_ar: (448, 294) +448_1_77_ar: (448, 252) +448_2_00_ar: (448, 224) +448_3_20_ar: (448, 140) +224_many_ar_14ps: '[(224, 224), (224, 168), (224, 154), (224, 126), (224, 112), (224, 70), (168, 224), (154, 224), (126, 224), (112, 224)]' +224_many_landscape_ar_14ps: '[(224, 224), (224, 168), (224, 154), (224, 126), (224, 112), (224, 70)]' +224_many_non_square_landscape_ar_14ps: '[(224, 168), (224, 154), (224, 126), (224, 112), (224, 70)]' +224_0_50_ar_14ps: (112, 224) +224_0_56_ar_14ps: (126, 224) +224_0_66_ar_14ps: (154, 224) +224_0_75_ar_14ps: (168, 224) +224_1_00_ar: (224, 224) +224_1_33_ar_14ps: (224, 168) +224_1_52_ar_14ps: (224, 154) +224_1_77_ar_14ps: (224, 126) +224_2_00_ar_14ps: (224, 112) +224_3_20_ar_14ps: (224, 70) +224_many_ar_16ps: '[(224, 224), (224, 176), (224, 160), (224, 128), (224, 112), (224, 80), (176, 224), (160, 224), (128, 224), (112, 224)]' +224_many_landscape_ar_16ps: '[(224, 224), (224, 176), (224, 160), (224, 128), (224, 112), (224, 80)]' +224_many_non_square_landscape_ar_16ps: '[(224, 176), (224, 160), (224, 128), (224, 112), (224, 80)]' +224_0_50_ar_16ps: (112, 224) +224_0_56_ar_16ps: (128, 224) +224_0_66_ar_16ps: (160, 224) +224_0_75_ar_16ps: (176, 224) +224_1_33_ar_16ps: (224, 176) +224_1_52_ar_16ps: (224, 160) +224_1_77_ar_16ps: (224, 128) +224_2_00_ar_16ps: (224, 112) +224_3_20_ar_16ps: (224, 80) diff --git a/configs/dataset/sailvos3d_wai/default.yaml b/configs/dataset/sailvos3d_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/sailvos3d_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/sailvos3d_wai/train/default.yaml b/configs/dataset/sailvos3d_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cbd1aec783eea443e35b7beb011fbce3006680cd --- /dev/null +++ b/configs/dataset/sailvos3d_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "SAILVOS3DWAI( + split='${dataset.sailvos3d_wai.train.split}', + resolution=${dataset.sailvos3d_wai.train.dataset_resolution}, + principal_point_centered=${dataset.sailvos3d_wai.train.principal_point_centered}, + aug_crop=${dataset.sailvos3d_wai.train.aug_crop}, + transform='${dataset.sailvos3d_wai.train.transform}', + data_norm_type='${dataset.sailvos3d_wai.train.data_norm_type}', + ROOT='${dataset.sailvos3d_wai.train.ROOT}', + dataset_metadata_dir='${dataset.sailvos3d_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.sailvos3d_wai.train.overfit_num_sets}, + variable_num_views=${dataset.sailvos3d_wai.train.variable_num_views}, + num_views=${dataset.sailvos3d_wai.train.num_views}, + covisibility_thres=${dataset.sailvos3d_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/sailvos3d +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/sailvos3d_wai/val/default.yaml b/configs/dataset/sailvos3d_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c1e24eca478a7722970d8bed9480dc3a8a009318 --- /dev/null +++ b/configs/dataset/sailvos3d_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "SAILVOS3DWAI( + split='${dataset.sailvos3d_wai.val.split}', + resolution=${dataset.sailvos3d_wai.val.dataset_resolution}, + principal_point_centered=${dataset.sailvos3d_wai.val.principal_point_centered}, + seed=${dataset.sailvos3d_wai.val.seed}, + transform='${dataset.sailvos3d_wai.val.transform}', + data_norm_type='${dataset.sailvos3d_wai.val.data_norm_type}', + ROOT='${dataset.sailvos3d_wai.val.ROOT}', + dataset_metadata_dir='${dataset.sailvos3d_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.sailvos3d_wai.val.overfit_num_sets}, + variable_num_views=${dataset.sailvos3d_wai.val.variable_num_views}, + num_views=${dataset.sailvos3d_wai.val.num_views}, + covisibility_thres=${dataset.sailvos3d_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_sailvos3d} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/sailvos3d +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/scannetpp_wai/default.yaml b/configs/dataset/scannetpp_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b0f64e569c04c066a5c120fba8b37d09472f1a0 --- /dev/null +++ b/configs/dataset/scannetpp_wai/default.yaml @@ -0,0 +1,4 @@ +defaults: + - train: default + - val: default + - test: default diff --git a/configs/dataset/scannetpp_wai/test/default.yaml b/configs/dataset/scannetpp_wai/test/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9aa9cb3aa115d018be59b287e24f2687d12918b2 --- /dev/null +++ b/configs/dataset/scannetpp_wai/test/default.yaml @@ -0,0 +1,24 @@ +dataset_str: + "ScanNetPPWAI( + split='${dataset.scannetpp_wai.test.split}', + resolution=${dataset.scannetpp_wai.test.dataset_resolution}, + principal_point_centered=${dataset.scannetpp_wai.test.principal_point_centered}, + seed=${dataset.scannetpp_wai.test.seed}, + transform='${dataset.scannetpp_wai.test.transform}', + data_norm_type='${dataset.scannetpp_wai.test.data_norm_type}', + ROOT='${dataset.scannetpp_wai.test.ROOT}', + dataset_metadata_dir='${dataset.scannetpp_wai.test.dataset_metadata_dir}', + variable_num_views=${dataset.scannetpp_wai.test.variable_num_views}, + num_views=${dataset.scannetpp_wai.test.num_views}, + covisibility_thres=${dataset.scannetpp_wai.test.covisibility_thres})" +split: 'test' +dataset_resolution: ${dataset.resolution_test_scannetpp} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/scannetppv2 +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +variable_num_views: ${dataset.test.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/scannetpp_wai/train/default.yaml b/configs/dataset/scannetpp_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1b3bc3df4b8d545face2117ef0f38b7ee4201ffc --- /dev/null +++ b/configs/dataset/scannetpp_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ScanNetPPWAI( + split='${dataset.scannetpp_wai.train.split}', + resolution=${dataset.scannetpp_wai.train.dataset_resolution}, + principal_point_centered=${dataset.scannetpp_wai.train.principal_point_centered}, + aug_crop=${dataset.scannetpp_wai.train.aug_crop}, + transform='${dataset.scannetpp_wai.train.transform}', + data_norm_type='${dataset.scannetpp_wai.train.data_norm_type}', + ROOT='${dataset.scannetpp_wai.train.ROOT}', + dataset_metadata_dir='${dataset.scannetpp_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.scannetpp_wai.train.overfit_num_sets}, + variable_num_views=${dataset.scannetpp_wai.train.variable_num_views}, + num_views=${dataset.scannetpp_wai.train.num_views}, + covisibility_thres=${dataset.scannetpp_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/scannetppv2 +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/scannetpp_wai/val/default.yaml b/configs/dataset/scannetpp_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8ffd3399537819c5fa924c459faf9dcd9afefd3d --- /dev/null +++ b/configs/dataset/scannetpp_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "ScanNetPPWAI( + split='${dataset.scannetpp_wai.val.split}', + resolution=${dataset.scannetpp_wai.val.dataset_resolution}, + principal_point_centered=${dataset.scannetpp_wai.val.principal_point_centered}, + seed=${dataset.scannetpp_wai.val.seed}, + transform='${dataset.scannetpp_wai.val.transform}', + data_norm_type='${dataset.scannetpp_wai.val.data_norm_type}', + ROOT='${dataset.scannetpp_wai.val.ROOT}', + dataset_metadata_dir='${dataset.scannetpp_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.scannetpp_wai.val.overfit_num_sets}, + variable_num_views=${dataset.scannetpp_wai.val.variable_num_views}, + num_views=${dataset.scannetpp_wai.val.num_views}, + covisibility_thres=${dataset.scannetpp_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_scannetpp} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/scannetppv2 +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/spring_wai/default.yaml b/configs/dataset/spring_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/spring_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/spring_wai/train/default.yaml b/configs/dataset/spring_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a43893440933d40448af33bcdce02d321495261 --- /dev/null +++ b/configs/dataset/spring_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "SpringWAI( + split='${dataset.spring_wai.train.split}', + resolution=${dataset.spring_wai.train.dataset_resolution}, + principal_point_centered=${dataset.spring_wai.train.principal_point_centered}, + aug_crop=${dataset.spring_wai.train.aug_crop}, + transform='${dataset.spring_wai.train.transform}', + data_norm_type='${dataset.spring_wai.train.data_norm_type}', + ROOT='${dataset.spring_wai.train.ROOT}', + dataset_metadata_dir='${dataset.spring_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.spring_wai.train.overfit_num_sets}, + variable_num_views=${dataset.spring_wai.train.variable_num_views}, + num_views=${dataset.spring_wai.train.num_views}, + covisibility_thres=${dataset.spring_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/spring +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/spring_wai/val/default.yaml b/configs/dataset/spring_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c285b319ae4eccba88e6abea7af721367fa03846 --- /dev/null +++ b/configs/dataset/spring_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "SpringWAI( + split='${dataset.spring_wai.val.split}', + resolution=${dataset.spring_wai.val.dataset_resolution}, + principal_point_centered=${dataset.spring_wai.val.principal_point_centered}, + seed=${dataset.spring_wai.val.seed}, + transform='${dataset.spring_wai.val.transform}', + data_norm_type='${dataset.spring_wai.val.data_norm_type}', + ROOT='${dataset.spring_wai.val.ROOT}', + dataset_metadata_dir='${dataset.spring_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.spring_wai.val.overfit_num_sets}, + variable_num_views=${dataset.spring_wai.val.variable_num_views}, + num_views=${dataset.spring_wai.val.num_views}, + covisibility_thres=${dataset.spring_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_spring} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/spring +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/tav2_wb_wai/default.yaml b/configs/dataset/tav2_wb_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4b0f64e569c04c066a5c120fba8b37d09472f1a0 --- /dev/null +++ b/configs/dataset/tav2_wb_wai/default.yaml @@ -0,0 +1,4 @@ +defaults: + - train: default + - val: default + - test: default diff --git a/configs/dataset/tav2_wb_wai/test/default.yaml b/configs/dataset/tav2_wb_wai/test/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab5dcfb90bd43c0e4a48b9c0af4390afb820c115 --- /dev/null +++ b/configs/dataset/tav2_wb_wai/test/default.yaml @@ -0,0 +1,24 @@ +dataset_str: + "TartanAirV2WBWAI( + split='${dataset.tav2_wb_wai.test.split}', + resolution=${dataset.tav2_wb_wai.test.dataset_resolution}, + principal_point_centered=${dataset.tav2_wb_wai.test.principal_point_centered}, + seed=${dataset.tav2_wb_wai.test.seed}, + transform='${dataset.tav2_wb_wai.test.transform}', + data_norm_type='${dataset.tav2_wb_wai.test.data_norm_type}', + ROOT='${dataset.tav2_wb_wai.test.ROOT}', + dataset_metadata_dir='${dataset.tav2_wb_wai.test.dataset_metadata_dir}', + variable_num_views=${dataset.tav2_wb_wai.test.variable_num_views}, + num_views=${dataset.tav2_wb_wai.test.num_views}, + covisibility_thres=${dataset.tav2_wb_wai.test.covisibility_thres})" +split: 'test' +dataset_resolution: ${dataset.resolution_test_tav2_wb} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/tav2_wb +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +variable_num_views: ${dataset.test.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/tav2_wb_wai/train/default.yaml b/configs/dataset/tav2_wb_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..adff13bba769d76aba5ae5550887896312405f29 --- /dev/null +++ b/configs/dataset/tav2_wb_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "TartanAirV2WBWAI( + split='${dataset.tav2_wb_wai.train.split}', + resolution=${dataset.tav2_wb_wai.train.dataset_resolution}, + principal_point_centered=${dataset.tav2_wb_wai.train.principal_point_centered}, + aug_crop=${dataset.tav2_wb_wai.train.aug_crop}, + transform='${dataset.tav2_wb_wai.train.transform}', + data_norm_type='${dataset.tav2_wb_wai.train.data_norm_type}', + ROOT='${dataset.tav2_wb_wai.train.ROOT}', + dataset_metadata_dir='${dataset.tav2_wb_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.tav2_wb_wai.train.overfit_num_sets}, + variable_num_views=${dataset.tav2_wb_wai.train.variable_num_views}, + num_views=${dataset.tav2_wb_wai.train.num_views}, + covisibility_thres=${dataset.tav2_wb_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/tav2_wb +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/tav2_wb_wai/val/default.yaml b/configs/dataset/tav2_wb_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6b2cb5925ea9f911ddecbb391a24a241636da66 --- /dev/null +++ b/configs/dataset/tav2_wb_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "TartanAirV2WBWAI( + split='${dataset.tav2_wb_wai.val.split}', + resolution=${dataset.tav2_wb_wai.val.dataset_resolution}, + principal_point_centered=${dataset.tav2_wb_wai.val.principal_point_centered}, + seed=${dataset.tav2_wb_wai.val.seed}, + transform='${dataset.tav2_wb_wai.val.transform}', + data_norm_type='${dataset.tav2_wb_wai.val.data_norm_type}', + ROOT='${dataset.tav2_wb_wai.val.ROOT}', + dataset_metadata_dir='${dataset.tav2_wb_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.tav2_wb_wai.val.overfit_num_sets}, + variable_num_views=${dataset.tav2_wb_wai.val.variable_num_views}, + num_views=${dataset.tav2_wb_wai.val.num_views}, + covisibility_thres=${dataset.tav2_wb_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_tav2_wb} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/tav2_wb +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/unrealstereo4k_wai/default.yaml b/configs/dataset/unrealstereo4k_wai/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..909f9d015c19d0e7a075102484f5c711aa1f3bf3 --- /dev/null +++ b/configs/dataset/unrealstereo4k_wai/default.yaml @@ -0,0 +1,3 @@ +defaults: + - train: default + - val: default diff --git a/configs/dataset/unrealstereo4k_wai/train/default.yaml b/configs/dataset/unrealstereo4k_wai/train/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..947161e6ea2b766bddd2083417a55647df0a45ed --- /dev/null +++ b/configs/dataset/unrealstereo4k_wai/train/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "UnrealStereo4KWAI( + split='${dataset.unrealstereo4k_wai.train.split}', + resolution=${dataset.unrealstereo4k_wai.train.dataset_resolution}, + principal_point_centered=${dataset.unrealstereo4k_wai.train.principal_point_centered}, + aug_crop=${dataset.unrealstereo4k_wai.train.aug_crop}, + transform='${dataset.unrealstereo4k_wai.train.transform}', + data_norm_type='${dataset.unrealstereo4k_wai.train.data_norm_type}', + ROOT='${dataset.unrealstereo4k_wai.train.ROOT}', + dataset_metadata_dir='${dataset.unrealstereo4k_wai.train.dataset_metadata_dir}', + overfit_num_sets=${dataset.unrealstereo4k_wai.train.overfit_num_sets}, + variable_num_views=${dataset.unrealstereo4k_wai.train.variable_num_views}, + num_views=${dataset.unrealstereo4k_wai.train.num_views}, + covisibility_thres=${dataset.unrealstereo4k_wai.train.covisibility_thres})" +split: 'train' +dataset_resolution: ${dataset.resolution_train} +principal_point_centered: ${dataset.principal_point_centered} +aug_crop: 16 +transform: 'colorjitter+grayscale+gaublur' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/unrealstereo4k +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.train.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dataset/unrealstereo4k_wai/val/default.yaml b/configs/dataset/unrealstereo4k_wai/val/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..acf919e5745e604943a0ab41e6d8438108c1b25a --- /dev/null +++ b/configs/dataset/unrealstereo4k_wai/val/default.yaml @@ -0,0 +1,26 @@ +dataset_str: + "UnrealStereo4KWAI( + split='${dataset.unrealstereo4k_wai.val.split}', + resolution=${dataset.unrealstereo4k_wai.val.dataset_resolution}, + principal_point_centered=${dataset.unrealstereo4k_wai.val.principal_point_centered}, + seed=${dataset.unrealstereo4k_wai.val.seed}, + transform='${dataset.unrealstereo4k_wai.val.transform}', + data_norm_type='${dataset.unrealstereo4k_wai.val.data_norm_type}', + ROOT='${dataset.unrealstereo4k_wai.val.ROOT}', + dataset_metadata_dir='${dataset.unrealstereo4k_wai.val.dataset_metadata_dir}', + overfit_num_sets=${dataset.unrealstereo4k_wai.val.overfit_num_sets}, + variable_num_views=${dataset.unrealstereo4k_wai.val.variable_num_views}, + num_views=${dataset.unrealstereo4k_wai.val.num_views}, + covisibility_thres=${dataset.unrealstereo4k_wai.val.covisibility_thres})" +split: 'val' +dataset_resolution: ${dataset.resolution_val_unrealstereo4k} +principal_point_centered: ${dataset.principal_point_centered} +seed: 777 +transform: 'imgnorm' +data_norm_type: ${model.data_norm_type} +ROOT: ${root_data_dir}/unrealstereo4k +dataset_metadata_dir: ${mapanything_dataset_metadata_dir} +overfit_num_sets: null +variable_num_views: ${dataset.val.variable_num_views} +num_views: ${dataset.num_views} +covisibility_thres: 0.25 diff --git a/configs/dense_n_view_benchmark.yaml b/configs/dense_n_view_benchmark.yaml new file mode 100644 index 0000000000000000000000000000000000000000..90a60868f0e6ba698f02c711d0b1e0a57b2175f9 --- /dev/null +++ b/configs/dense_n_view_benchmark.yaml @@ -0,0 +1,23 @@ +defaults: + - machine: aws + - model: default + - dataset: default + - _self_ + +output_dir: ${hydra:run.dir} +root_data_dir: ${machine.root_data_dir} +mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir} +root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir} +root_experiments_dir: ${machine.root_experiments_dir} +root_uniception_pretrained_checkpoints_dir: ${machine.root_uniception_pretrained_checkpoints_dir} + +### Benchmarking args +seed: 0 +# Disable CUDNN Benchmark (Disable for variable resolution & number of view training) +disable_cudnn_benchmark: true +# Batch size for inference (Metrics are computed per multi-view set and averaged, not per batch of multi-view sets) +batch_size: 10 +# Use mixed precision for inference +amp: 1 +# Floating point type to use for mixed precision +amp_dtype: "bf16" diff --git a/configs/distributed/default.yaml b/configs/distributed/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e54fe64b7bf8879fed89eac6d21e08a96769c64 --- /dev/null +++ b/configs/distributed/default.yaml @@ -0,0 +1,6 @@ +# Distributed Training Params +# Number of distributed processes +world_size: 1 +local_rank: -1 +# Url used to set up distributed training +dist_url: 'env://' diff --git a/configs/loss/conf_pm_mask_loss.yaml b/configs/loss/conf_pm_mask_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6375cf1dcaf054f14a2f248ccfb38801a93391be --- /dev/null +++ b/configs/loss/conf_pm_mask_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', loss_in_log=True), alpha=0.2) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', flatten_across_image_only=True, loss_in_log=True), top_n_percent=5, apply_to_real_data_only=True) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/conf_pm_mask_scale_loss.yaml b/configs/loss/conf_pm_mask_scale_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..46f754258cabcf7f9c61c69b8e541196aca2aa2d --- /dev/null +++ b/configs/loss/conf_pm_mask_scale_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfLoss(PointsPlusScaleRegr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', flatten_across_image_only=True, loss_in_log=True), alpha=0.2) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(PointsPlusScaleRegr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', flatten_across_image_only=True, loss_in_log=True), top_n_percent=5, apply_to_real_data_only=True) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/default.yaml b/configs/loss/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5db7d1b5a58dd7b08185e530b462b9d0f3b47f50 --- /dev/null +++ b/configs/loss/default.yaml @@ -0,0 +1,6 @@ +# Training Loss, For example: "ConfLoss(Regr3D(L21, norm_mode='avg_dis'), alpha=0.2)" +train_criterion: "" +# Validation Loss, For example: +# "Regr3D_ScaleShiftInv(L21, norm_mode='?avg_dis', ambiguous_loss_value=0)" (DUSt3R) +# "Regr3D(L21, norm_mode='?avg_dis', ambiguous_loss_value=2)" (MASt3R) +test_criterion: "" diff --git a/configs/loss/entangled_metric_loss.yaml b/configs/loss/entangled_metric_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39178aaf8c27ee42cd690c0047b38bfe46476c07 --- /dev/null +++ b/configs/loss/entangled_metric_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/moge2_loss.yaml b/configs/loss/moge2_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..31bcd338f4ba79e2daac4422a1ca3ddcdb0a6b4f --- /dev/null +++ b/configs/loss/moge2_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ExcludeTopNPercentPixelLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', loss_in_log=True, flatten_across_image_only=True), top_n_percent=5, apply_to_real_data_only=True) + 3.0 * NormalGMLoss(norm_mode='avg_dis', apply_normal_and_gm_loss_to_synthetic_data_only=True)" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(Regr3D(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='?avg_dis', loss_in_log=True, flatten_across_image_only=True), top_n_percent=5, apply_to_real_data_only=True) + 3.0 * NormalGMLoss(norm_mode='avg_dis', apply_normal_and_gm_loss_to_synthetic_data_only=True)" diff --git a/configs/loss/no_depth_loss.yaml b/configs/loss/no_depth_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce1deee438d4c84fa6112b316c1f663680d6fd2c --- /dev/null +++ b/configs/loss/no_depth_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, depth_loss_weight=0.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, depth_loss_weight=0.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/no_log_scaling.yaml b/configs/loss/no_log_scaling.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f30447ceb31c783a3c62ecb2666c1e7572a85e03 --- /dev/null +++ b/configs/loss/no_log_scaling.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=False, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=False, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/no_points_loss.yaml b/configs/loss/no_points_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6680702c3870e5b137d6042b636197907adb94c4 --- /dev/null +++ b/configs/loss/no_points_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=False, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, cam_frame_points_loss_weight=0.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[1]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=False, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, cam_frame_points_loss_weight=0.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[1]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/no_pose_loss.yaml b/configs/loss/no_pose_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b804c79999266f734b61602f8e8785c46c11d266 --- /dev/null +++ b/configs/loss/no_pose_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, pose_quats_loss_weight=0.0, pose_trans_loss_weight=0.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, pose_quats_loss_weight=0.0, pose_trans_loss_weight=0.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/no_ray_dirs_loss.yaml b/configs/loss/no_ray_dirs_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a584054cf610cbb9ab2dbcd98878ed57b87b43b6 --- /dev/null +++ b/configs/loss/no_ray_dirs_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, ray_directions_loss_weight=0.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0, ray_directions_loss_weight=0.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/no_robust_loss.yaml b/configs/loss/no_robust_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efbcfd30f90f7d4568a3d28b9d7e99a93e1be069 --- /dev/null +++ b/configs/loss/no_robust_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(L2Loss(), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=1.0, gm_loss_weight=1.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.1 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(L2Loss(), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=1.0, gm_loss_weight=1.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.1 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/overall_disentangled_loss.yaml b/configs/loss/overall_disentangled_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f18662efe888e8f66a56c3297270fa9ee8cc2691 --- /dev/null +++ b/configs/loss/overall_disentangled_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfLoss(DisentangledFactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), alpha=0.2, loss_set_indices=[0]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(DisentangledFactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/overall_loss.yaml b/configs/loss/overall_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c25db5cbb2b23667b1302d0b14ae497f684a5b13 --- /dev/null +++ b/configs/loss/overall_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/overall_loss_weigh_pm_higher.yaml b/configs/loss/overall_loss_weigh_pm_higher.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c256b23d4f148b12d8e920fcde40cfa404754b5c --- /dev/null +++ b/configs/loss/overall_loss_weigh_pm_higher.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryScaleRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, cam_frame_points_loss_weight=0.1, depth_loss_weight=0.1, ray_directions_loss_weight=0.1, pose_quats_loss_weight=0.1, pose_trans_loss_weight=0.1, scale_loss_weight=0.1, world_frame_points_loss_weight=1, normal_loss_weight=0.3, gm_loss_weight=0.3), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.03 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/pi3_loss.yaml b/configs/loss/pi3_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..09f2345df7d1b2584b7497fb0d6ce9b31e03d3f2 --- /dev/null +++ b/configs/loss/pi3_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2])" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=True, convert_predictions_to_view0_frame=True, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2])" diff --git a/configs/loss/up_to_scale_loss.yaml b/configs/loss/up_to_scale_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50503a96b4275a3ca2d301da0c37aab161a94f7e --- /dev/null +++ b/configs/loss/up_to_scale_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_along_ray', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2]) + 0.3 * NonAmbiguousMaskLoss(BCELoss())" diff --git a/configs/loss/vggt_loss.yaml b/configs/loss/vggt_loss.yaml new file mode 100644 index 0000000000000000000000000000000000000000..213571b18798ed7bc0256db1916a8c3058e12009 --- /dev/null +++ b/configs/loss/vggt_loss.yaml @@ -0,0 +1,4 @@ +# Training Loss +train_criterion: "ConfAndExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), conf_alpha=0.2, top_n_percent=5, apply_to_real_data_only=True, conf_loss_set_indices=[0], exclude_loss_set_indices=[1, 2])" +# Validation Loss +test_criterion: "ExcludeTopNPercentPixelLoss(FactoredGeometryRegr3DPlusNormalGMLoss(RobustRegressionLoss(alpha=0.5, scaling_c=0.05), norm_mode='avg_dis', depth_type_for_loss='depth_z', loss_in_log=True, flatten_across_image_only=True, compute_pairwise_relative_pose_loss=False, compute_world_frame_points_loss=True, apply_normal_and_gm_loss_to_synthetic_data_only=True, normal_loss_weight=3.0, gm_loss_weight=3.0), top_n_percent=5, apply_to_real_data_only=True, loss_set_indices=[0, 1, 2])" diff --git a/configs/machine/aws.yaml b/configs/machine/aws.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71f0d31bc676008dfdc2fde4231af93aa00d0869 --- /dev/null +++ b/configs/machine/aws.yaml @@ -0,0 +1,15 @@ +defaults: + - default + +# Root directory containing all datasets +root_data_dir: "/fsx/xrtech/data" +# Dataset metadata directory +mapanything_dataset_metadata_dir: "/fsx/nkeetha/mapanything_dataset_metadata" +# Root directory containing pretrained checkpoints for custom models +root_pretrained_checkpoints_dir: "/fsx/nkeetha/mapanything_checkpoints" +# Root directory to log experiments +root_experiments_dir: "/fsx/nkeetha/experiments" +# Root directory containing UniCeption pretrained checkpoints +root_uniception_pretrained_checkpoints_dir: "/fsx/nkeetha/uniception_checkpoints" +# Root directory containing external benchmark data +external_benchmark_data_root_data_dir: "/fsx/xrtech/external_benchmark_data/rmvd_mvs_benchmark/rmvd_test_data" diff --git a/configs/machine/default.yaml b/configs/machine/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c9852a356536a333046429588266258f9328bbe --- /dev/null +++ b/configs/machine/default.yaml @@ -0,0 +1,12 @@ +# Root directory containing all datasets +root_data_dir: ??? +# Dataset metadata directory +mapanything_dataset_metadata_dir: ??? +# Root directory containing pretrained checkpoints for custom models +root_pretrained_checkpoints_dir: ??? +# Root directory to log experiments +root_experiments_dir: ??? +# Root directory containing UniCeption pretrained checkpoints +root_uniception_pretrained_checkpoints_dir: ??? +# Root directory containing external benchmark data +external_benchmark_data_root_data_dir: ??? diff --git a/configs/machine/psc.yaml b/configs/machine/psc.yaml new file mode 100644 index 0000000000000000000000000000000000000000..342448e85bf7ddb0ee6bd97e9ef39c067d15cc34 --- /dev/null +++ b/configs/machine/psc.yaml @@ -0,0 +1,15 @@ +defaults: + - default + +# Root directory containing all datasets +root_data_dir: "/ocean/projects/cis220039p/shared/datasets" +# Dataset metadata directory +mapanything_dataset_metadata_dir: "/ocean/projects/cis220039p/shared/mapanything_dataset_metadata" +# Root directory containing pretrained checkpoints for custom models +root_pretrained_checkpoints_dir: "/jet/home/yzhang25/mapanything/checkpoints" +# Root directory to log experiments +root_experiments_dir: "/jet/home/yzhang25/mapanything/outputs" +# Root directory containing UniCeption pretrained checkpoints +root_uniception_pretrained_checkpoints_dir: "/ocean/projects/cis220039p/shared/uniception/checkpoints/" +# Root directory containing external benchmark data +external_benchmark_data_root_data_dir: "/jet/home/yzhang25/mapanything/benchmarking/rmvd_mvs_benchmark/rmvd_test_data" diff --git a/configs/machine/xri_dgx.yaml b/configs/machine/xri_dgx.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb4ac9a8523e9a8de994d71f5071e19641b739f7 --- /dev/null +++ b/configs/machine/xri_dgx.yaml @@ -0,0 +1,15 @@ +defaults: + - default + +# Root directory containing all datasets +root_data_dir: "/mnt/xri_mapsresearch/data/nkeetha" +# Dataset metadata directory +mapanything_dataset_metadata_dir: "/mnt/xri_mapsresearch/data/nkeetha/mapanything_dataset_metadata" +# Root directory containing pretrained checkpoints for custom models +root_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/mapanything/checkpoints" +# Root directory to log experiments +root_experiments_dir: "/mnt/xri_mapsresearch/experiments/nkeetha" +# Root directory containing UniCeption pretrained checkpoints +root_uniception_pretrained_checkpoints_dir: "/mnt/xri_mapsresearch/code/nkeetha/mapanything/UniCeption/checkpoints" +# Root directory containing external benchmark data +external_benchmark_data_root_data_dir: "/mnt/xri_mapsresearch/data/nkeetha/rmvd_mvs_benchmark/rmvd_test_data" diff --git a/configs/model/anycalib.yaml b/configs/model/anycalib.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e73f8a68fdc58d98d72af2121ebc21c52788a26 --- /dev/null +++ b/configs/model/anycalib.yaml @@ -0,0 +1,11 @@ +# String for model factory +model_str: "anycalib" +# Model config +model_config: + name: "anycalib" +# Image Normalization Type +data_norm_type: "identity" +# AnyCalib checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/default.yaml b/configs/model/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e3864d11fbfd9c575df77598ba42a218df8857c --- /dev/null +++ b/configs/model/default.yaml @@ -0,0 +1,16 @@ +# String for model factory (Options: "mapanything", "mapanything_ablations", "modular_dust3r", "vggt", "pi3") +model_str: ??? +# Model config +model_config: + # Path to pretrained model checkpoint + pretrained_checkpoint_path: null + # Load specific submodules from the checkpoint + load_specific_pretrained_submodules: False + # List of submodules to load from the checkpoint (if load_specific_pretrained_submodules is True) + specific_pretrained_submodules: [] +# Path of a starting checkpoint (to enable backward compatibility with original DUSt3R class) +pretrained: null +# Image normalization type +data_norm_type: ??? +# Torch hub force reload +torch_hub_force_reload: False diff --git a/configs/model/dust3r.yaml b/configs/model/dust3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9eb502d305d96cad2620f947e31e15f5a292d5c --- /dev/null +++ b/configs/model/dust3r.yaml @@ -0,0 +1,23 @@ +# String for model factory +model_str: "dust3r" +# Model config +model_config: + name: "dust3r" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth" + # Scene graph for BA + scene_graph: "complete" + # Pairwise inference batch size + inference_batch_size: 32 + # Global optim schedule + global_optim_schedule: "cosine" + # Global optim lr + global_optim_lr: 0.01 + # Number of iterations for global optimization + global_optim_niter: 300 +# Image Normalization Type +data_norm_type: "dust3r" +# DUSt3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/encoder/croco_512.yaml b/configs/model/encoder/croco_512.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af463f7c116f799d90234a3315d69145723fdf02 --- /dev/null +++ b/configs/model/encoder/croco_512.yaml @@ -0,0 +1,16 @@ +# UniCeption encoder string used for selecting encoder class (python3 -m uniception.models.encoders.list) +encoder_str: "croco" +# Name of the encoder +name: "croco_512" +# Data normalization type +data_norm_type: "croco" +# Patch embedding class +patch_embed_cls: "PatchEmbedDust3R" +# Image size +img_size: [512, 512] # This parameter has no influence for PatchEmbedDust3R +# Path to the pretrained encoder checkpoint +pretrained_checkpoint_path: '${machine.root_uniception_pretrained_checkpoints_dir}/encoders/CroCo_Encoder_224.pth' +# Override attributes in the pretrained checkpoint +override_checkpoint_attributes: True +# Flag to indicate whether model class uses torch hub +uses_torch_hub: False diff --git a/configs/model/encoder/croco_512_data_norm_dust3r.yaml b/configs/model/encoder/croco_512_data_norm_dust3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6158ba02e357994a2599addee8f61efddf0c17d --- /dev/null +++ b/configs/model/encoder/croco_512_data_norm_dust3r.yaml @@ -0,0 +1,16 @@ +# UniCeption encoder string used for selecting encoder class (python3 -m uniception.models.encoders.list) +encoder_str: "croco" +# Name of the encoder +name: "croco_512_img_norm_dust3r" +# Data normalization type +data_norm_type: "dust3r" +# Patch embedding class +patch_embed_cls: "PatchEmbedDust3R" +# Image size +img_size: [512, 512] # This parameter has no influence for PatchEmbedDust3R +# Path to the pretrained encoder checkpoint +pretrained_checkpoint_path: '${machine.root_uniception_pretrained_checkpoints_dir}/encoders/CroCo_Encoder_224.pth' +# Override attributes in the pretrained checkpoint +override_checkpoint_attributes: True +# Flag to indicate whether model class uses torch hub +uses_torch_hub: False diff --git a/configs/model/encoder/dinov2_large.yaml b/configs/model/encoder/dinov2_large.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e1aa0ee137a4dfcc529ec633989e892b344b675 --- /dev/null +++ b/configs/model/encoder/dinov2_large.yaml @@ -0,0 +1,14 @@ +# UniCeption encoder string used for selecting encoder class (python3 -m uniception.models.encoders.list) +encoder_str: "dinov2" +# Name of the encoder +name: "dinov2_large" +# Data normalization type +data_norm_type: "dinov2" +# ViT size +size: "large" +# Registers +with_registers: False +# Flag to indicate whether model class uses torch hub +uses_torch_hub: True +# Flag to indicate whether to use gradient checkpointing for encoder +gradient_checkpointing: False diff --git a/configs/model/encoder/radio_v2_5_large.yaml b/configs/model/encoder/radio_v2_5_large.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6236761e280b9193001e035bc703efd9869bf9a8 --- /dev/null +++ b/configs/model/encoder/radio_v2_5_large.yaml @@ -0,0 +1,10 @@ +# UniCeption encoder string used for selecting encoder class (python3 -m uniception.models.encoders.list) +encoder_str: "radio" +# Name of the encoder +name: "radio_v2.5-large" +# Data normalization type +data_norm_type: "radio" +# Model version +model_version: "radio_v2.5-l" +# Flag to indicate whether model class uses torch hub +uses_torch_hub: True diff --git a/configs/model/info_sharing/aat_ifr_24_layers.yaml b/configs/model/info_sharing/aat_ifr_24_layers.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4aeabafc404a09a64769064a874408e98818fa2e --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_24_layers.yaml @@ -0,0 +1,22 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_24_layers_ifr" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/aat_ifr_24_layers_escaling.yaml b/configs/model/info_sharing/aat_ifr_24_layers_escaling.yaml new file mode 100644 index 0000000000000000000000000000000000000000..60f461af9f439dfd86cf3d81f565fcd65612f2cc --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_24_layers_escaling.yaml @@ -0,0 +1,24 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_24_layers_ifr" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: True + # Scale Entropy in Attention + use_entropy_scaling: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/aat_ifr_24_layers_no_ref_view.yaml b/configs/model/info_sharing/aat_ifr_24_layers_no_ref_view.yaml new file mode 100644 index 0000000000000000000000000000000000000000..42edf9dfcd2c1d5986147f4db2444e4845bcd4f9 --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_24_layers_no_ref_view.yaml @@ -0,0 +1,22 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_24_layers_ifr_no_ref_view" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: False + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/aat_ifr_24_layers_w_view_pe.yaml b/configs/model/info_sharing/aat_ifr_24_layers_w_view_pe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2b40e8bc35055616f06de08151165f9675d5f690 --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_24_layers_w_view_pe.yaml @@ -0,0 +1,26 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_24_layers_ifr_w_view_pe" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False + # Maximum number of views for positional encoding + max_num_views_for_pe: 1000 + # Use random indices within range (1, max_num_views_for_pe) for positional encoding of non reference views + use_rand_idx_pe_for_non_reference_views: True diff --git a/configs/model/info_sharing/aat_ifr_48_layers.yaml b/configs/model/info_sharing/aat_ifr_48_layers.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20425dcf47583cb287f273278168f15e23e92f24 --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_48_layers.yaml @@ -0,0 +1,26 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_48_layers_ifr" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 23, 35] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "48_layers" + # Depth (this includes both frame-wise and gloabl attention layers) + depth: 48 + # Feature dim (similar to ViT-Large) + dim: 1024 + # Number of heads (similar to ViT-Large) + num_heads: 16 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/aat_ifr_48_layers_escaling.yaml b/configs/model/info_sharing/aat_ifr_48_layers_escaling.yaml new file mode 100644 index 0000000000000000000000000000000000000000..838837bc11c10823dfc39163ae4efb0ffe5ac2bb --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_48_layers_escaling.yaml @@ -0,0 +1,28 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_48_layers_ifr" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 23, 35] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "48_layers" + # Depth (this includes both frame-wise and gloabl attention layers) + depth: 48 + # Feature dim (similar to ViT-Large) + dim: 1024 + # Number of heads (similar to ViT-Large) + num_heads: 16 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: True + # Scale Entropy in Attention + use_entropy_scaling: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/aat_ifr_48_layers_no_ref_view.yaml b/configs/model/info_sharing/aat_ifr_48_layers_no_ref_view.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7dfea0c27591afe6d13f477b110a3ffdf7444d6 --- /dev/null +++ b/configs/model/info_sharing/aat_ifr_48_layers_no_ref_view.yaml @@ -0,0 +1,26 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "alternating_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "aat_48_layers_ifr_no_ref_view" + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 23, 35] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "48_layers" + # Depth (this includes both frame-wise and gloabl attention layers) + depth: 48 + # Feature dim (similar to ViT-Large) + dim: 1024 + # Number of heads (similar to ViT-Large) + num_heads: 16 + # Distinguish Reference and Non-Reference Views + distinguish_ref_and_non_ref_views: False + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/cat_ifr_dust3r.yaml b/configs/model/info_sharing/cat_ifr_dust3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..34d4a175450864a2fc9dea44460e4c3aea13bbcf --- /dev/null +++ b/configs/model/info_sharing/cat_ifr_dust3r.yaml @@ -0,0 +1,18 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "cross_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: "RoPE100" +# Module arguments +module_args: + # Name of the info sharing module + name: "base_cat_ifr_dust3r" + # Number of views + num_views: 2 + # Indices of the intermediate features to be shared (indices start from 0) + indices: [5, 8] + # Normalize intermediate features + norm_intermediate: False + # Load CroCo cross-attention transformer for DUSt3R Init + pretrained_checkpoint_path: '${machine.root_uniception_pretrained_checkpoints_dir}/info_sharing/cross_attn_transformer/Two_View_Cross_Attention_Transformer_CroCo.pth' diff --git a/configs/model/info_sharing/gat_ifr_24_layers.yaml b/configs/model/info_sharing/gat_ifr_24_layers.yaml new file mode 100644 index 0000000000000000000000000000000000000000..66055d0aadcd66290128fa90aed2545c3182698c --- /dev/null +++ b/configs/model/info_sharing/gat_ifr_24_layers.yaml @@ -0,0 +1,24 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "global_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "gat_24_layers_ifr" + # Maximum number of views for positional encoding + max_num_views: 1000 + # Use random indices within range (1, max_num_views) for positional encoding of non reference views + use_rand_idx_pe_for_non_reference_views: True + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/info_sharing/gat_ifr_24_layers_escaling.yaml b/configs/model/info_sharing/gat_ifr_24_layers_escaling.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1032a73835cc44adc9f6f86801abede704c74418 --- /dev/null +++ b/configs/model/info_sharing/gat_ifr_24_layers_escaling.yaml @@ -0,0 +1,26 @@ +# Model type (Options: ["cross_attention", "global_attention", "alternating_attention"]) +model_type: "global_attention" +# Model class type (Options: ["no_intermediate_features", "intermediate_features"]) +model_return_type: "intermediate_features" +# Custom positional encoding (Options: ["RoPEfreq"], Callable Function, null) +custom_positional_encoding: null +# Module arguments +module_args: + # Name of the info sharing module + name: "gat_24_layers_ifr" + # Maximum number of views for positional encoding + max_num_views: 1000 + # Use random indices within range (1, max_num_views) for positional encoding of non reference views + use_rand_idx_pe_for_non_reference_views: True + # Indices of the intermediate features to be shared (indices start from 0) + indices: [11, 17] + # Normalize intermediate features + norm_intermediate: True + # Size string + size: "24_layers" + # Depth + depth: 24 + # Scale Entropy in Attention + use_entropy_scaling: True + # Flag to indicate whether to use gradient checkpointing + gradient_checkpointing: False diff --git a/configs/model/mapanything.yaml b/configs/model/mapanything.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d0ffa3d1b9b6aea267d91567092da5ccd28bcf91 --- /dev/null +++ b/configs/model/mapanything.yaml @@ -0,0 +1,18 @@ +defaults: + - default + - encoder: dinov2_large + - info_sharing: aat_ifr_24_layers + - pred_head: dpt_pose_scale + - task: images_only + +# String for model factory +model_str: "mapanything" +# Model config +model_config: + name: "mapanything" + encoder_config: ${model.encoder} + info_sharing_config: ${model.info_sharing} + pred_head_config: ${model.pred_head} + geometric_input_config: ${model.task} +# Image Normalization Type +data_norm_type: ${model.encoder.data_norm_type} diff --git a/configs/model/mapanything_ablations.yaml b/configs/model/mapanything_ablations.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97b5bc5065b179863672a96514e05ca6510363ac --- /dev/null +++ b/configs/model/mapanything_ablations.yaml @@ -0,0 +1,18 @@ +defaults: + - default + - encoder: dinov2_large + - info_sharing: aat_ifr_24_layers + - pred_head: dpt_pose + - task: images_only + +# String for model factory +model_str: "mapanything_ablations" +# Model config +model_config: + name: "mapanything_ablations" + encoder_config: ${model.encoder} + info_sharing_config: ${model.info_sharing} + pred_head_config: ${model.pred_head} + geometric_input_config: ${model.task} +# Image Normalization Type +data_norm_type: ${model.encoder.data_norm_type} diff --git a/configs/model/mast3r.yaml b/configs/model/mast3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..142e2171363396a8f14a7dc6e2867fd51a13b999 --- /dev/null +++ b/configs/model/mast3r.yaml @@ -0,0 +1,15 @@ +# String for model factory +model_str: "mast3r" +# Model config +model_config: + name: "mast3r" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth" + # Cache dir + cache_dir: "${root_pretrained_checkpoints_dir}/mast3r_cache" +# Image Normalization Type +data_norm_type: "dust3r" +# MASt3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/metric_dust3r.yaml b/configs/model/metric_dust3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a0d4760172caab0475c345a1a2cd4821978bb9fc --- /dev/null +++ b/configs/model/metric_dust3r.yaml @@ -0,0 +1,23 @@ +# String for model factory +model_str: "dust3r" +# Model config +model_config: + name: "metric_dust3r" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth" + # Scene graph for BA + scene_graph: "complete" + # Pairwise inference batch size + inference_batch_size: 32 + # Global optim schedule + global_optim_schedule: "cosine" + # Global optim lr + global_optim_lr: 0.01 + # Number of iterations for global optimization + global_optim_niter: 300 +# Image Normalization Type +data_norm_type: "dust3r" +# DUSt3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/modular_dust3r_512_dpt.yaml b/configs/model/modular_dust3r_512_dpt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5e5107b2b146749e1682607abfebf40cdc115c6e --- /dev/null +++ b/configs/model/modular_dust3r_512_dpt.yaml @@ -0,0 +1,16 @@ +defaults: + - default + - encoder: croco_512_data_norm_dust3r + - info_sharing: cat_ifr_dust3r + - pred_head: dpt + +# String for model factory +model_str: "modular_dust3r" +# Model config +model_config: + name: "dust3r_512_dpt" + encoder_config: ${model.encoder} + info_sharing_config: ${model.info_sharing} + pred_head_config: ${model.pred_head} +# Image Normalization Type +data_norm_type: "dust3r" diff --git a/configs/model/moge_1.yaml b/configs/model/moge_1.yaml new file mode 100644 index 0000000000000000000000000000000000000000..22dfc1ddc6ac22182be28a580dd3a98004efbd66 --- /dev/null +++ b/configs/model/moge_1.yaml @@ -0,0 +1,17 @@ +# String for model factory +model_str: "moge" +# Model config +model_config: + name: "moge-1" + # MoGe pre-trained model checkpoint string + model_string: "Ruicheng/moge-vitl" + # Load custom checkpoint + load_custom_ckpt: false + # Custom checkpoint path + custom_ckpt_path: null +# Image Normalization Type +data_norm_type: "identity" +# MoGe checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/moge_2.yaml b/configs/model/moge_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..41950288350393d4166a3066e6c6fb9e7fcd00cb --- /dev/null +++ b/configs/model/moge_2.yaml @@ -0,0 +1,17 @@ +# String for model factory +model_str: "moge" +# Model config +model_config: + name: "moge-2" + # MoGe pre-trained model checkpoint string + model_string: "Ruicheng/moge-2-vitl" + # Load custom checkpoint + load_custom_ckpt: false + # Custom checkpoint path + custom_ckpt_path: null +# Image Normalization Type +data_norm_type: "identity" +# MoGe checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/must3r.yaml b/configs/model/must3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..edbb51708162a556c2ae2ac819fd084e6d1ec897 --- /dev/null +++ b/configs/model/must3r.yaml @@ -0,0 +1,15 @@ +# String for model factory +model_str: "must3r" +# Model config +model_config: + name: "must3r" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/MUSt3R_512.pth" + # Retrieval Checkpoint path + retrieval_ckpt_path: "${root_pretrained_checkpoints_dir}/MUSt3R_512_retrieval_trainingfree.pth" +# Image Normalization Type +data_norm_type: "dust3r" +# MASt3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/pi3.yaml b/configs/model/pi3.yaml new file mode 100644 index 0000000000000000000000000000000000000000..19afea09be091071f095825dddd3d2d5cc91a518 --- /dev/null +++ b/configs/model/pi3.yaml @@ -0,0 +1,13 @@ +# String for model factory +model_str: "pi3" +# Model config +model_config: + name: "pi3" + # Load pre-trained weights + load_pretrained_weights: true +# Image Normalization Type +data_norm_type: "identity" +# Pi3 checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: False diff --git a/configs/model/pow3r.yaml b/configs/model/pow3r.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c13ea3cead807f93ea7e6b809c4017d6b336293 --- /dev/null +++ b/configs/model/pow3r.yaml @@ -0,0 +1,19 @@ +defaults: + - default + - task: images_only + +# String for model factory +model_str: "pow3r" +# Model config +model_config: + name: "pow3r" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/Pow3R_ViTLarge_BaseDecoder_512_linear.pth" + # Geometric input config + geometric_input_config: ${model.task} +# Image Normalization Type +data_norm_type: "dust3r" +# Pow3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/pow3r_ba.yaml b/configs/model/pow3r_ba.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b232df603420878fa290998feff97c457cd59d5 --- /dev/null +++ b/configs/model/pow3r_ba.yaml @@ -0,0 +1,29 @@ +defaults: + - default + - task: images_only + +# String for model factory +model_str: "pow3r_ba" +# Model config +model_config: + name: "pow3r_ba" + # Checkpoint path + ckpt_path: "${root_pretrained_checkpoints_dir}/Pow3R_ViTLarge_BaseDecoder_512_linear.pth" + # Geometric input config + geometric_input_config: ${model.task} + # Scene graph for BA + scene_graph: "complete" + # Pairwise inference batch size + inference_batch_size: 32 + # Global optim schedule + global_optim_schedule: "cosine" + # Global optim lr + global_optim_lr: 0.01 + # Number of iterations for global optimization + global_optim_niter: 300 +# Image Normalization Type +data_norm_type: "dust3r" +# Pow3R checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask.yaml b/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask.yaml new file mode 100644 index 0000000000000000000000000000000000000000..61c1f1912bdeac50f23bd83b4678a7c3d551d28d --- /dev/null +++ b/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask.yaml @@ -0,0 +1,22 @@ +# Camera Frame Pointmap + Global Camera Pose (Trans + Quats) + Confidence + Mask +input_dim: 5 +scene_rep_dim: 3 +type: "campointmap+pose+confidence+mask" +scene_rep_type: "campointmap+pose" +dense_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + pointmap_mode: "z_exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask_scale.yaml b/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..798b7f6dd2f43806dd0edceac168892977499052 --- /dev/null +++ b/configs/model/pred_head/adaptor_config/campointmap_pose_confidence_mask_scale.yaml @@ -0,0 +1,27 @@ +# Camera Frame Pointmap + Global Camera Pose (Trans + Quats) + Confidence + Mask + Scene-wide Metric Scaling Factor +input_dim: 5 +scene_rep_dim: 3 +type: "campointmap+pose+confidence+mask" +scene_rep_type: "campointmap+pose" +dense_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + pointmap_mode: "z_exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} +scale_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + mode: "exp" + vmin: 1e-08 + vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/pointmap_confidence.yaml b/configs/model/pred_head/adaptor_config/pointmap_confidence.yaml new file mode 100644 index 0000000000000000000000000000000000000000..50b89d8d2f9995143a8821198d2131015a413b9d --- /dev/null +++ b/configs/model/pred_head/adaptor_config/pointmap_confidence.yaml @@ -0,0 +1,13 @@ +# Pointmap + Confidence +input_dim: 4 +scene_rep_dim: 3 +type: "pointmap+confidence" +scene_rep_type: "pointmap" +init_dict: + name: "pointmap+confidence" + pointmap_mode: "exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/pointmap_confidence_mask.yaml b/configs/model/pred_head/adaptor_config/pointmap_confidence_mask.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d4acb17e456ab5883950458749c6f1b2773cb42 --- /dev/null +++ b/configs/model/pred_head/adaptor_config/pointmap_confidence_mask.yaml @@ -0,0 +1,13 @@ +# Pointmap + Confidence + Mask +input_dim: 5 +scene_rep_dim: 3 +type: "pointmap+confidence+mask" +scene_rep_type: "pointmap" +init_dict: + name: "pointmap+confidence+mask" + pointmap_mode: "exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/pointmap_confidence_mask_scale.yaml b/configs/model/pred_head/adaptor_config/pointmap_confidence_mask_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3788cad2fd5d59e775f6c9a7143a3d7ea1ff7a86 --- /dev/null +++ b/configs/model/pred_head/adaptor_config/pointmap_confidence_mask_scale.yaml @@ -0,0 +1,18 @@ +# Pointmap + Confidence + Mask + Scene-wide Metric Scaling Factor +input_dim: 5 +scene_rep_dim: 3 +type: "pointmap+confidence+mask" +scene_rep_type: "pointmap" +init_dict: + name: "pointmap+confidence+mask" + pointmap_mode: "exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +scale_pred_init_dict: + name: "campointmap+pose+confidence+mask+scale" + mode: "exp" + vmin: 1e-08 + vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/pointmap_factored_raydirs_depth_pose_confidence_mask_scale.yaml b/configs/model/pred_head/adaptor_config/pointmap_factored_raydirs_depth_pose_confidence_mask_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c0597b92436a4dbef96399cc919f46369d76c596 --- /dev/null +++ b/configs/model/pred_head/adaptor_config/pointmap_factored_raydirs_depth_pose_confidence_mask_scale.yaml @@ -0,0 +1,39 @@ +# Global Pointmaps + Ray Directions on Unit Sphere + Depth along Ray + Global Camera Pose (Trans + Quats) + Confidence + Mask + Global Metric Scaling Factor +input_dim: 9 +scene_rep_dim: 7 +type: "pointmap+raydirs+depth+pose+confidence+mask" +scene_rep_type: "pointmap+raydirs+depth+pose" +dense_pred_init_dict: + name: "pointmap+raydirs+depth+pose+confidence+mask+scale" + pointmap_mode: "exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + ray_directions_mode: "linear" + ray_directions_normalize_to_unit_sphere: true + ray_directions_normalize_to_unit_image_plane: false + ray_directions_vmin: ${special_float:"-inf"} + ray_directions_vmax: ${special_float:"inf"} + ray_directions_clamp_min_of_z_dir: false + ray_directions_z_dir_min: ${special_float:"-inf"} + depth_mode: "exp" + depth_vmin: 0 + depth_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} +scale_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + mode: "exp" + vmin: 1e-08 + vmax: ${special_float:"inf"} +# Flag to decide what representaion to use for global pointmaps +use_factored_predictions_for_global_pointmaps: true diff --git a/configs/model/pred_head/adaptor_config/pointmap_raydirs_depth_pose_confidence_mask_scale.yaml b/configs/model/pred_head/adaptor_config/pointmap_raydirs_depth_pose_confidence_mask_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c64d7e630c4d6f78f63413cb0d60397eded249aa --- /dev/null +++ b/configs/model/pred_head/adaptor_config/pointmap_raydirs_depth_pose_confidence_mask_scale.yaml @@ -0,0 +1,39 @@ +# Global Pointmaps + Ray Directions on Unit Sphere + Depth along Ray + Global Camera Pose (Trans + Quats) + Confidence + Mask + Global Metric Scaling Factor +input_dim: 9 +scene_rep_dim: 7 +type: "pointmap+raydirs+depth+pose+confidence+mask" +scene_rep_type: "pointmap+raydirs+depth+pose" +dense_pred_init_dict: + name: "pointmap+raydirs+depth+pose+confidence+mask+scale" + pointmap_mode: "exp" + pointmap_vmin: ${special_float:"-inf"} + pointmap_vmax: ${special_float:"inf"} + ray_directions_mode: "linear" + ray_directions_normalize_to_unit_sphere: true + ray_directions_normalize_to_unit_image_plane: false + ray_directions_vmin: ${special_float:"-inf"} + ray_directions_vmax: ${special_float:"inf"} + ray_directions_clamp_min_of_z_dir: false + ray_directions_z_dir_min: ${special_float:"-inf"} + depth_mode: "exp" + depth_vmin: 0 + depth_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} +scale_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + mode: "exp" + vmin: 1e-08 + vmax: ${special_float:"inf"} +# Flag to decide what representaion to use for global pointmaps +use_factored_predictions_for_global_pointmaps: false diff --git a/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask.yaml b/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask.yaml new file mode 100644 index 0000000000000000000000000000000000000000..14cc198b36bec63341a121ab48e4db848f92f0cf --- /dev/null +++ b/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask.yaml @@ -0,0 +1,29 @@ +# Ray Directions on Unit Sphere + Depth along Ray + Global Camera Pose (Trans + Quats) + Confidence + Mask +input_dim: 6 +scene_rep_dim: 4 +type: "raydirs+depth+pose+confidence+mask" +scene_rep_type: "raydirs+depth+pose" +dense_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask" + ray_directions_mode: "linear" + ray_directions_normalize_to_unit_sphere: true + ray_directions_normalize_to_unit_image_plane: false + ray_directions_vmin: ${special_float:"-inf"} + ray_directions_vmax: ${special_float:"inf"} + ray_directions_clamp_min_of_z_dir: false + ray_directions_z_dir_min: ${special_float:"-inf"} + depth_mode: "exp" + depth_vmin: 0 + depth_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask_scale.yaml b/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3b96c477e3c3dc56f34a06deb4b3fd758a0f7d24 --- /dev/null +++ b/configs/model/pred_head/adaptor_config/raydirs_depth_pose_confidence_mask_scale.yaml @@ -0,0 +1,34 @@ +# Ray Directions on Unit Sphere + Depth along Ray + Global Camera Pose (Trans + Quats) + Confidence + Mask + Global Metric Scaling Factor +input_dim: 6 +scene_rep_dim: 4 +type: "raydirs+depth+pose+confidence+mask" +scene_rep_type: "raydirs+depth+pose" +dense_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + ray_directions_mode: "linear" + ray_directions_normalize_to_unit_sphere: true + ray_directions_normalize_to_unit_image_plane: false + ray_directions_vmin: ${special_float:"-inf"} + ray_directions_vmax: ${special_float:"inf"} + ray_directions_clamp_min_of_z_dir: false + ray_directions_z_dir_min: ${special_float:"-inf"} + depth_mode: "exp" + depth_vmin: 0 + depth_vmax: ${special_float:"inf"} + confidence_type: "exp" + confidence_vmin: 1 + confidence_vmax: ${special_float:"inf"} +pose_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + cam_trans_mode: "linear" + cam_trans_vmin: ${special_float:"-inf"} + cam_trans_vmax: ${special_float:"inf"} + quaternions_mode: "linear" + quaternions_normalize: true + quaternions_vmin: ${special_float:"-inf"} + quaternions_vmax: ${special_float:"inf"} +scale_pred_init_dict: + name: "raydirs+depth+pose+confidence+mask+scale" + mode: "exp" + vmin: 1e-08 + vmax: ${special_float:"inf"} diff --git a/configs/model/pred_head/dpt.yaml b/configs/model/pred_head/dpt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d15c69b9551dccba3837634b3c37ce73231c98b9 --- /dev/null +++ b/configs/model/pred_head/dpt.yaml @@ -0,0 +1,15 @@ +defaults: + - adaptor_config: pointmap_confidence + +type: "dpt" +feature_head: + feature_dim: 256 + hooks: [0, 1, 2, 3] + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +regressor_head: + output_dim: ${model.pred_head.adaptor_config.input_dim} + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +adaptor_type: ${model.pred_head.adaptor_config.type} +adaptor: ${model.pred_head.adaptor_config.init_dict} +# Flag to indicate whether to use gradient checkpointing +gradient_checkpointing: False diff --git a/configs/model/pred_head/dpt_pose.yaml b/configs/model/pred_head/dpt_pose.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e4ae286cd5c39e08081c8c0118bbeececcf9aaaa --- /dev/null +++ b/configs/model/pred_head/dpt_pose.yaml @@ -0,0 +1,19 @@ +defaults: + - adaptor_config: raydirs_depth_pose_confidence_mask + +type: "dpt+pose" +feature_head: + feature_dim: 256 + hooks: [0, 1, 2, 3] + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +regressor_head: + output_dim: ${model.pred_head.adaptor_config.input_dim} + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +pose_head: + num_resconv_block: 2 + rot_representation_dim: 4 +adaptor_type: ${model.pred_head.adaptor_config.type} +dpt_adaptor: ${model.pred_head.adaptor_config.dense_pred_init_dict} +pose_adaptor: ${model.pred_head.adaptor_config.pose_pred_init_dict} +# Flag to indicate whether to use gradient checkpointing +gradient_checkpointing: False diff --git a/configs/model/pred_head/dpt_pose_scale.yaml b/configs/model/pred_head/dpt_pose_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..223f7291702cf5380d8e86f3a79f3fce90314534 --- /dev/null +++ b/configs/model/pred_head/dpt_pose_scale.yaml @@ -0,0 +1,22 @@ +defaults: + - adaptor_config: raydirs_depth_pose_confidence_mask_scale + +type: "dpt+pose" +feature_head: + feature_dim: 256 + hooks: [0, 1, 2, 3] + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +regressor_head: + output_dim: ${model.pred_head.adaptor_config.input_dim} + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +pose_head: + num_resconv_block: 2 + rot_representation_dim: 4 +scale_head: + output_dim: 1 +adaptor_type: ${model.pred_head.adaptor_config.type} +dpt_adaptor: ${model.pred_head.adaptor_config.dense_pred_init_dict} +pose_adaptor: ${model.pred_head.adaptor_config.pose_pred_init_dict} +scale_adaptor: ${model.pred_head.adaptor_config.scale_pred_init_dict} +# Flag to indicate whether to use gradient checkpointing +gradient_checkpointing: False diff --git a/configs/model/pred_head/dpt_scale.yaml b/configs/model/pred_head/dpt_scale.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e3f3b66ef17d0c39eab7a0a670e9b9696e49cce --- /dev/null +++ b/configs/model/pred_head/dpt_scale.yaml @@ -0,0 +1,18 @@ +defaults: + - adaptor_config: pointmap_confidence_mask_scale + +type: "dpt" +feature_head: + feature_dim: 256 + hooks: [0, 1, 2, 3] + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +regressor_head: + output_dim: ${model.pred_head.adaptor_config.input_dim} + checkpoint_gradient: ${model.pred_head.gradient_checkpointing} +scale_head: + output_dim: 1 +adaptor_type: ${model.pred_head.adaptor_config.type} +adaptor: ${model.pred_head.adaptor_config.init_dict} +scale_adaptor: ${model.pred_head.adaptor_config.scale_pred_init_dict} +# Flag to indicate whether to use gradient checkpointing +gradient_checkpointing: False diff --git a/configs/model/task/aug_training.yaml b/configs/model/task/aug_training.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ab5bcda9ca27f48d0bde5eabd7bc65ec9942023b --- /dev/null +++ b/configs/model/task/aug_training.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 0.9 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0.05 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 0.5 +# Probability of Geometric Inputs with Depths +depth_prob: 0.5 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0.5 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0.5 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale quantities for the input metric high quality gt depth +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0.05 +# Probability for skipping input of the metric scale quantities for the input metric pose +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0.05 diff --git a/configs/model/task/calibrated_sfm.yaml b/configs/model/task/calibrated_sfm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..36a032bd80f6f85d382e7228e7b41fbbcc215210 --- /dev/null +++ b/configs/model/task/calibrated_sfm.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/default.yaml b/configs/model/task/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..59b8f6906605b537a52a920f0410f1bb3f24679d --- /dev/null +++ b/configs/model/task/default.yaml @@ -0,0 +1,27 @@ +# Ray Directions Encoder Config +ray_dirs_encoder_config: + name: "ray_dirs_encoder" + in_chans: 3 + encoder_str: "dense_rep_encoder" + apply_pe: false +# Depth Encoder Config +depth_encoder_config: + name: "depth_encoder" + in_chans: 1 + encoder_str: "dense_rep_encoder" + apply_pe: false +# Cam Rotation (Quats) Encoder Config +cam_rot_encoder_config: + name: "cam_rot_quats_encoder" + in_chans: 4 + encoder_str: "global_rep_encoder" +# Cam Translation Encoder Config +cam_trans_encoder_config: + name: "cam_trans_encoder" + in_chans: 3 + encoder_str: "global_rep_encoder" +# Scale Encoder Config +scale_encoder_config: + name: "scale_encoder" + in_chans: 1 + encoder_str: "global_rep_encoder" diff --git a/configs/model/task/depth_completion.yaml b/configs/model/task/depth_completion.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4a1f69ac3c9f0e9e3dd5d85643fdb9384231184c --- /dev/null +++ b/configs/model/task/depth_completion.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 1 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/images_only.yaml b/configs/model/task/images_only.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a964709223cbc31147f9424c6b4d0ae790c5aeaa --- /dev/null +++ b/configs/model/task/images_only.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 0 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 1 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 0 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/mvs.yaml b/configs/model/task/mvs.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55a9c6c1111ddc5f29b8f2d602ba6262de93a1c7 --- /dev/null +++ b/configs/model/task/mvs.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/mvs_non_metric.yaml b/configs/model/task/mvs_non_metric.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3e82b6170d9f72d0aaa0f3088fb215176ece9919 --- /dev/null +++ b/configs/model/task/mvs_non_metric.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/mvs_training.yaml b/configs/model/task/mvs_training.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2d6b151eba1a4b713087a07e5a18ff1cda4aee9 --- /dev/null +++ b/configs/model/task/mvs_training.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale quantities for the input metric high quality gt depth +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale quantities for the input metric pose +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0.05 diff --git a/configs/model/task/non_metric_poses_metric_depth.yaml b/configs/model/task/non_metric_poses_metric_depth.yaml new file mode 100644 index 0000000000000000000000000000000000000000..118a92a0109d96aff58f54d5a9d5a4ce9ae4f820 --- /dev/null +++ b/configs/model/task/non_metric_poses_metric_depth.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/non_metric_poses_metric_depth_sparse.yaml b/configs/model/task/non_metric_poses_metric_depth_sparse.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dd18678da962f48547359aebc1de46192cefdb7 --- /dev/null +++ b/configs/model/task/non_metric_poses_metric_depth_sparse.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 1 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/non_metric_poses_non_metric_depth.yaml b/configs/model/task/non_metric_poses_non_metric_depth.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b40c966ed86274d75d6386bfb73a7f50459ce80e --- /dev/null +++ b/configs/model/task/non_metric_poses_non_metric_depth.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 1 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/non_metric_poses_non_metric_depth_sparse.yaml b/configs/model/task/non_metric_poses_non_metric_depth_sparse.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07368b82120063ff57e567da7f4f2d51ba7facae --- /dev/null +++ b/configs/model/task/non_metric_poses_non_metric_depth_sparse.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 1 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 1 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/pass_through.yaml b/configs/model/task/pass_through.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd117090ef8144a218390d3e53adbffaa481c0eb --- /dev/null +++ b/configs/model/task/pass_through.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/posed_sfm.yaml b/configs/model/task/posed_sfm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2de61efcd26b6210b297c4197fcff625021c7087 --- /dev/null +++ b/configs/model/task/posed_sfm.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 0 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/posed_sfm_non_metric.yaml b/configs/model/task/posed_sfm_non_metric.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05e96622612d5f16d95cd603d12f4ce3f12ce373 --- /dev/null +++ b/configs/model/task/posed_sfm_non_metric.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 0 +# Probability of Geometric Inputs with Depths +depth_prob: 0 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 1 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 1 diff --git a/configs/model/task/registration.yaml b/configs/model/task/registration.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a34588baa5910781a1af715394886390e916b790 --- /dev/null +++ b/configs/model/task/registration.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/registration_sparse.yaml b/configs/model/task/registration_sparse.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c2fd0da53475885bd2df1b69071c99a2360c96da --- /dev/null +++ b/configs/model/task/registration_sparse.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 1 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale norm factor for the input metric high quality gt depth +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0 +# Probability for skipping input of the metric scale norm factor for the input metric pose +# If 0, the metric scale norm factor will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/task/registration_training.yaml b/configs/model/task/registration_training.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f30435d26f93bc1dc4f36ac87c42e44f6e71f39e --- /dev/null +++ b/configs/model/task/registration_training.yaml @@ -0,0 +1,23 @@ +defaults: + - default + +# Overall Probability of Geometric Inputs +overall_prob: 1 +# Dropout Probability of Geometric Inputs (for each sample across batch size and number of views) +dropout_prob: 0 +# Probability of Geometric Inputs with Ray Directions +ray_dirs_prob: 1 +# Probability of Geometric Inputs with Depths +depth_prob: 1 +# Probability of Geometric Inputs with Camera Poses +cam_prob: 0 +# Probability of sparsely sampling the high quality gt depth +sparse_depth_prob: 0.5 +# Percentage of the valid depth to remove if the probability of using sparse depth is greater than 0 (Range: [0, 1]) +sparsification_removal_percent: 0.9 +# Probability for skipping input of the metric scale quantities for the input metric high quality gt depth +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +depth_scale_norm_all_prob: 0.05 +# Probability for skipping input of the metric scale quantities for the input metric pose +# If 0, the metric scale quantities will be provided as input to the model for all the metric scale conditionings +pose_scale_norm_all_prob: 0 diff --git a/configs/model/vggt.yaml b/configs/model/vggt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe637acc2f7d37b19480e8e63fe197b2567fe3f0 --- /dev/null +++ b/configs/model/vggt.yaml @@ -0,0 +1,17 @@ +# String for model factory +model_str: "vggt" +# Model config +model_config: + name: "vggt" + # Load pre-trained weights + load_pretrained_weights: true + # Load custom checkpoint + load_custom_ckpt: false + # Custom checkpoint path + custom_ckpt_path: null +# Image Normalization Type +data_norm_type: "identity" +# VGGT checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/vggt_commercial.yaml b/configs/model/vggt_commercial.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3947977319fb5c16d275c71434f76f82fd7f69be --- /dev/null +++ b/configs/model/vggt_commercial.yaml @@ -0,0 +1,17 @@ +# String for model factory +model_str: "vggt" +# Model config +model_config: + name: "vggt" + # Load pre-trained weights + load_pretrained_weights: true + # Load custom checkpoint + load_custom_ckpt: true + # Custom checkpoint path + custom_ckpt_path: "${root_pretrained_checkpoints_dir}/vggt_1B_commercial.pt" +# Image Normalization Type +data_norm_type: "identity" +# VGGT checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/model/vggt_non_pretrained.yaml b/configs/model/vggt_non_pretrained.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7c17f88844eed48f5c009af417ecaed5b243c624 --- /dev/null +++ b/configs/model/vggt_non_pretrained.yaml @@ -0,0 +1,17 @@ +# String for model factory +model_str: "vggt" +# Model config +model_config: + name: "vggt" + # Load pre-trained weights + load_pretrained_weights: false + # Load custom checkpoint + load_custom_ckpt: false + # Custom checkpoint path + custom_ckpt_path: null +# Image Normalization Type +data_norm_type: "identity" +# VGGT checkpoint is already loaded in the inference wrapper +pretrained: null +# Torch hub force reload +torch_hub_force_reload: false diff --git a/configs/rmvd_benchmark.yaml b/configs/rmvd_benchmark.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3c07521c8b47846bb98ae8a40e340b416c445db3 --- /dev/null +++ b/configs/rmvd_benchmark.yaml @@ -0,0 +1,34 @@ +defaults: + - machine: aws + - model: default + - dataset: default + - _self_ + +# Path Settings +output_dir: ${hydra:run.dir} +external_benchmark_data_root_data_dir: ${machine.external_benchmark_data_root_data_dir} +mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir} +root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir} +root_experiments_dir: ${machine.root_experiments_dir} +root_uniception_pretrained_checkpoints_dir: ${machine.root_uniception_pretrained_checkpoints_dir} + +### Benchmarking args +seed: 0 +# Disable CUDNN Benchmark (Disable for variable resolution & number of view training) +disable_cudnn_benchmark: true +# Batch size for inference (Metrics are computed per multi-view set and averaged, not per batch of multi-view sets) +batch_size: 10 +# Use mixed precision for inference +amp: 1 +# Floating point type to use for mixed precision +amp_dtype: "bf16" +# Choose from eth3d, kitti, scannet +eval_dataset: eth3d +# Choose from img, img+intrinsics, img+intrinsics+pose +evaluation_conditioning: img +# Choose from "median", "none" +evaluation_alignment: median +# Choose from "multi_view", "single_view" +evaluation_views: multi_view +# Resolution to inference the selected model. +evaluation_resolution: ${dataset.resolution_options.518_1_33_ar} diff --git a/configs/train.yaml b/configs/train.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b35f6b9e9d848422d90d92db4807b3cf102c1961 --- /dev/null +++ b/configs/train.yaml @@ -0,0 +1,15 @@ +defaults: + - machine: aws + - model: default + - dataset: default + - loss: default + - train_params: default + - distributed: default + - _self_ + +output_dir: ${hydra:run.dir} +root_data_dir: ${machine.root_data_dir} +mapanything_dataset_metadata_dir: ${machine.mapanything_dataset_metadata_dir} +root_pretrained_checkpoints_dir: ${machine.root_pretrained_checkpoints_dir} +root_experiments_dir: ${machine.root_experiments_dir} +root_uniception_pretrained_checkpoints_dir: ${machine.root_uniception_pretrained_checkpoints_dir} diff --git a/configs/train_params/default.yaml b/configs/train_params/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28af2bbf8810776b9ea02f14fb6f9264c9ee986a --- /dev/null +++ b/configs/train_params/default.yaml @@ -0,0 +1,41 @@ +# Random Seed +seed: 0 +# Maximum number of images per GPU (changes based on available GPU memory) +max_num_of_imgs_per_gpu: 48 +# Accumulate gradient iterations (for increasing the effective batch size under memory constraints) +accum_iter: 1 +# Maximum number of epochs for the scheduler +epochs: 100 +## Default Optimizer parameters +# Learning rate (absolute lr) +lr: 0.0001 +# Lower lr bound for cyclic schedulers that hit 0 +min_lr: 1e-06 +# Epochs to warmup LR +warmup_epochs: 10 +# Weight decay +weight_decay: 0.05 +# LR schedule type +schedule_type: "linear_warmup_half_cycle_cosine_decay" +# Warn if model params are not in the below submodule_configs +warn_not_in_submodule: False +# Optimizer parameters specific to submodules +submodule_configs: {} +# Use Automatic Mixed Precision for pretraining +amp: 1 +# Floating point type to use for mixed precision training +amp_dtype: "bf16" +# Disable CUDNN Benchmark (Disable for variable resolution & number of view training) +disable_cudnn_benchmark: true +# Freeze the validation samples across all epochs +freeze_val_samples_across_all_epochs: true +# Test loss evaluation frequency +eval_freq: 1 +# Frequency (number of epochs) to save checkpoint in checkpoint-last.pth +save_freq: 1 +# Frequency (number of epochs) to save checkpoint in checkpoint-%d.pth +keep_freq: 10 +# Frequence (number of iterations) to print infos while training (includes tensorboard logging) +print_freq: 20 +# Resume Training from last checkpoint +resume: True diff --git a/configs/train_params/finetune_with_lower_encoder_lr.yaml b/configs/train_params/finetune_with_lower_encoder_lr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..54710c25f7ec51b9cc8063b6b82dc9ae08e1b913 --- /dev/null +++ b/configs/train_params/finetune_with_lower_encoder_lr.yaml @@ -0,0 +1,16 @@ +defaults: + - default + +# Use 20x lower lr for finetuning +lr: 5e-06 +min_lr: 5e-08 + +# Optimizer parameters specific to submodules +submodule_configs: + # Encoder + encoder: + lr: 2.5e-07 + min_lr: 2.5e-09 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type} diff --git a/configs/train_params/finetune_with_lower_encoder_lr_64g.yaml b/configs/train_params/finetune_with_lower_encoder_lr_64g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8dc9cf2ceeea3dcc609bcaae5a495ba337c8fe4e --- /dev/null +++ b/configs/train_params/finetune_with_lower_encoder_lr_64g.yaml @@ -0,0 +1,16 @@ +defaults: + - default + +# Use 20x lower lr for finetuning +lr: 1e-05 +min_lr: 1e-07 + +# Optimizer parameters specific to submodules +submodule_configs: + # Encoder + encoder: + lr: 5e-07 + min_lr: 5e-09 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type} diff --git a/configs/train_params/freeze_encoder.yaml b/configs/train_params/freeze_encoder.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a20d13d08e82ae18b6f34403c614e121109fe442 --- /dev/null +++ b/configs/train_params/freeze_encoder.yaml @@ -0,0 +1,8 @@ +defaults: + - default + +# Optimizer parameters specific to submodules +submodule_configs: + # Encoder + encoder: + lr: 0 diff --git a/configs/train_params/lower_encoder_lr.yaml b/configs/train_params/lower_encoder_lr.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4225312d1da1f4a9108e5cb97ea4a02b84b1c32e --- /dev/null +++ b/configs/train_params/lower_encoder_lr.yaml @@ -0,0 +1,12 @@ +defaults: + - default + +# Optimizer parameters specific to submodules +submodule_configs: + # Encoder + encoder: + lr: 5e-06 + min_lr: 5e-08 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type} diff --git a/configs/train_params/lower_encoder_lr_64g.yaml b/configs/train_params/lower_encoder_lr_64g.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d7e426d217f67887cc0fc3d5d8bb1ef6312c1e4b --- /dev/null +++ b/configs/train_params/lower_encoder_lr_64g.yaml @@ -0,0 +1,16 @@ +defaults: + - default + +# Use 2x higher lr for 8x higher effective batch size +lr: 2e-04 +min_lr: 2e-07 + +# Optimizer parameters specific to submodules +submodule_configs: + # Encoder + encoder: + lr: 1e-05 + min_lr: 1e-08 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type} diff --git a/configs/train_params/moge2_finetune.yaml b/configs/train_params/moge2_finetune.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f8cedc03f8bd95a1ddcef4823d3a3fc3bf91b1ab --- /dev/null +++ b/configs/train_params/moge2_finetune.yaml @@ -0,0 +1,6 @@ +defaults: + - default + +# Use lower lr for finetuning +lr: 1e-05 +min_lr: 1e-07 diff --git a/configs/train_params/pi3_finetune.yaml b/configs/train_params/pi3_finetune.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cd03510fc3b1ded194965d5230a6763ed440ef82 --- /dev/null +++ b/configs/train_params/pi3_finetune.yaml @@ -0,0 +1,16 @@ +defaults: + - default + +# Use lower lr for finetuning +lr: 1e-05 +min_lr: 1e-07 + +# Optimizer parameters specific to submodules +submodule_configs: + # DINOv2 + model.encoder: + lr: 5e-07 + min_lr: 5e-09 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type} diff --git a/configs/train_params/vggt_finetune.yaml b/configs/train_params/vggt_finetune.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1c0ae4ee46d62cbf0fb07188d4bfbe64c70e34a4 --- /dev/null +++ b/configs/train_params/vggt_finetune.yaml @@ -0,0 +1,16 @@ +defaults: + - default + +# Use lower lr for finetuning +lr: 1e-05 +min_lr: 1e-07 + +# Optimizer parameters specific to submodules +submodule_configs: + # DINOv2 + model.aggregator.patch_embed: + lr: 5e-07 + min_lr: 5e-09 + warmup_epochs: ${train_params.warmup_epochs} + weight_decay: ${train_params.weight_decay} + schedule_type: ${train_params.schedule_type}