Spaces:
Runtime error
Runtime error
| default_scope = 'mmdet3d' | |
| default_hooks = dict( | |
| timer=dict(type='IterTimerHook'), | |
| logger=dict(type='LoggerHook', interval=50), | |
| param_scheduler=dict(type='ParamSchedulerHook'), | |
| checkpoint=dict(type='CheckpointHook', interval=-1), | |
| sampler_seed=dict(type='DistSamplerSeedHook'), | |
| visualization=dict(type='Det3DVisualizationHook')) | |
| env_cfg = dict( | |
| cudnn_benchmark=False, | |
| mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | |
| dist_cfg=dict(backend='nccl'), | |
| ) | |
| log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) | |
| log_level = 'INFO' | |
| load_from = None | |
| resume = False | |
| custom_imports = dict(imports=['projects.DETR3D.detr3d']) | |
| # If point cloud range is changed, the models should also change their point | |
| # cloud range accordingly | |
| point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] | |
| voxel_size = [0.2, 0.2, 8] | |
| img_norm_cfg = dict( | |
| mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], bgr_to_rgb=False) | |
| # For nuScenes we usually do 10-class detection | |
| class_names = [ | |
| 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', | |
| 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' | |
| ] | |
| input_modality = dict( | |
| use_lidar=False, | |
| use_camera=True, | |
| use_radar=False, | |
| use_map=False, | |
| use_external=False) | |
| # this means type='DETR3D' will be processed as 'mmdet3d.DETR3D' | |
| default_scope = 'mmdet3d' | |
| model = dict( | |
| type='DETR3D', | |
| use_grid_mask=True, | |
| data_preprocessor=dict( | |
| type='Det3DDataPreprocessor', **img_norm_cfg, pad_size_divisor=32), | |
| img_backbone=dict( | |
| type='mmdet.RegNet', | |
| arch='regnetx_4.0gf', | |
| out_indices=(0,1,2,3), | |
| init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf') | |
| ), | |
| img_neck=dict( | |
| type='mmdet.FPN', | |
| in_channels=[80, 240, 560, 1360], | |
| out_channels=256, | |
| start_level=1, | |
| add_extra_convs='on_output', | |
| num_outs=4, | |
| relu_before_extra_convs=True), | |
| pts_bbox_head=dict( | |
| type='DETR3DHead', | |
| num_query=900, | |
| num_classes=10, | |
| in_channels=256, | |
| sync_cls_avg_factor=True, | |
| with_box_refine=True, | |
| as_two_stage=False, | |
| transformer=dict( | |
| type='Detr3DTransformer', | |
| decoder=dict( | |
| type='Detr3DTransformerDecoder', | |
| num_layers=6, | |
| return_intermediate=True, | |
| transformerlayers=dict( | |
| type='BaseTransformerLayer', | |
| attn_cfgs=[ | |
| dict( | |
| type='MultiheadAttention', # mmcv. | |
| embed_dims=256, | |
| num_heads=8, | |
| dropout=0.1), | |
| dict( | |
| type='Detr3DCrossAtten', | |
| pc_range=point_cloud_range, | |
| num_points=4, | |
| embed_dims=256) | |
| ], | |
| feedforward_channels=512, | |
| ffn_dropout=0.1, | |
| operation_order=('self_attn', 'norm', 'cross_attn', 'norm', | |
| 'ffn', 'norm')))), | |
| bbox_coder=dict( | |
| type='NMSFreeCoder', | |
| post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], | |
| pc_range=point_cloud_range, | |
| max_num=300, | |
| voxel_size=voxel_size, | |
| num_classes=10), | |
| positional_encoding=dict( | |
| type='mmdet.SinePositionalEncoding', | |
| num_feats=128, | |
| normalize=True, | |
| offset=-0.5), | |
| loss_cls=dict( | |
| type='mmdet.FocalLoss', | |
| use_sigmoid=True, | |
| gamma=2.0, | |
| alpha=0.25, | |
| loss_weight=2.0), | |
| loss_bbox=dict(type='mmdet.L1Loss', loss_weight=0.25), | |
| loss_iou=dict(type='mmdet.GIoULoss', loss_weight=0.5)), | |
| # model training and testing settings | |
| train_cfg=dict( | |
| pts=dict( | |
| grid_size=[512, 512, 1], | |
| voxel_size=voxel_size, | |
| point_cloud_range=point_cloud_range, | |
| out_size_factor=2, | |
| assigner=dict( | |
| type='HungarianAssigner3D', | |
| cls_cost=dict(type='mmdet.FocalLossCost', weight=2.0), | |
| reg_cost=dict(type='BBox3DL1Cost', weight=0.5), | |
| # ↓ Fake cost. This is just to get compatible with DETR head | |
| iou_cost=dict(type='mmdet.IoUCost', weight=0.0), | |
| pc_range=point_cloud_range)))) | |
| dataset_type = 'NuScenesDataset' | |
| data_root = 'data/nuscenes/' | |
| test_transforms = [ | |
| dict( | |
| type='RandomResize3D', | |
| scale=(800, 450), | |
| ratio_range=(1., 1.), | |
| keep_ratio=True) | |
| ] | |
| # test_transforms = [ | |
| # dict( | |
| # type='RandomResize3D', | |
| # scale=(1400, 800), | |
| # ratio_range=(0.8, 1.2), | |
| # keep_ratio=True | |
| # ), | |
| # ] | |
| train_transforms = [dict(type='PhotoMetricDistortion3D')] + test_transforms | |
| # train_transforms = [ | |
| # dict(type='PhotoMetricDistortion3D'), | |
| # dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), | |
| # dict( | |
| # type='GlobalRotScaleTrans', | |
| # rot_range=[-0.3925, 0.3925], | |
| # scale_ratio_range=[0.9, 1.1], | |
| # translation_std=[0, 0, 0] | |
| # ), | |
| # ] + test_transforms | |
| backend_args = None | |
| train_pipeline = [ | |
| dict( | |
| type='LoadMultiViewImageFromFiles', | |
| to_float32=True, | |
| num_views=6, | |
| backend_args=backend_args), | |
| dict( | |
| type='LoadAnnotations3D', | |
| with_bbox_3d=True, | |
| with_label_3d=True, | |
| with_attr_label=False), | |
| dict(type='MultiViewWrapper', transforms=train_transforms), | |
| dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), | |
| dict(type='ObjectNameFilter', classes=class_names), | |
| dict(type='Pack3DDetInputs', keys=['img', 'gt_bboxes_3d', 'gt_labels_3d']) | |
| ] | |
| test_pipeline = [ | |
| dict( | |
| type='LoadMultiViewImageFromFiles', | |
| to_float32=True, | |
| num_views=6, | |
| backend_args=backend_args), | |
| dict(type='MultiViewWrapper', transforms=test_transforms), | |
| dict(type='Pack3DDetInputs', keys=['img']) | |
| ] | |
| metainfo = dict(classes=class_names) | |
| data_prefix = dict( | |
| pts='', | |
| CAM_FRONT='samples/CAM_FRONT', | |
| CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT', | |
| CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT', | |
| CAM_BACK='samples/CAM_BACK', | |
| CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT', | |
| CAM_BACK_LEFT='samples/CAM_BACK_LEFT') | |
| train_dataloader = dict( | |
| batch_size=2, | |
| num_workers=8, | |
| persistent_workers=True, | |
| drop_last=False, | |
| sampler=dict(type='DefaultSampler', shuffle=True), | |
| # sampler=dict( | |
| # type='ClassBalancedDataset', | |
| # dataset=dict(type='DefaultSampler', shuffle=True), | |
| # oversample_thr=0.001), | |
| dataset=dict( | |
| type=dataset_type, | |
| data_root=data_root, | |
| ann_file='nuscenes_infos_train.pkl', | |
| pipeline=train_pipeline, | |
| load_type='frame_based', | |
| metainfo=metainfo, | |
| modality=input_modality, | |
| test_mode=False, | |
| data_prefix=data_prefix, | |
| # we use box_type_3d='LiDAR' in kitti and nuscenes dataset | |
| # and box_type_3d='Depth' in sunrgbd and scannet dataset. | |
| box_type_3d='LiDAR', | |
| backend_args=backend_args)) | |
| val_dataloader = dict( | |
| batch_size=2, | |
| num_workers=8, | |
| persistent_workers=True, | |
| drop_last=False, | |
| sampler=dict(type='DefaultSampler', shuffle=False), | |
| dataset=dict( | |
| type=dataset_type, | |
| data_root=data_root, | |
| ann_file='nuscenes_infos_val.pkl', | |
| load_type='frame_based', | |
| pipeline=test_pipeline, | |
| metainfo=metainfo, | |
| modality=input_modality, | |
| test_mode=True, | |
| data_prefix=data_prefix, | |
| box_type_3d='LiDAR', | |
| backend_args=backend_args)) | |
| test_dataloader = val_dataloader | |
| val_evaluator = dict( | |
| type='NuScenesMetric', | |
| data_root=data_root, | |
| ann_file=data_root + 'nuscenes_infos_val.pkl', | |
| metric='bbox', | |
| backend_args=backend_args) | |
| test_evaluator = val_evaluator | |
| optim_wrapper = dict( | |
| type='OptimWrapper', | |
| optimizer=dict(type='AdamW', lr=1e-4, weight_decay=0.01), | |
| paramwise_cfg=dict(custom_keys={'img_backbone': dict(lr_mult=0.1)}), | |
| clip_grad=dict(max_norm=35, norm_type=2), | |
| ) | |
| # learning policy | |
| param_scheduler = [ | |
| dict( | |
| type='LinearLR', | |
| start_factor=1.0 / 3, | |
| by_epoch=False, | |
| begin=0, | |
| end=14000), | |
| dict( | |
| type='CosineAnnealingLR', | |
| by_epoch=True, | |
| begin=0, | |
| end=50, | |
| T_max=50, | |
| eta_min_ratio=1e-3) | |
| ] | |
| total_epochs = 50 | |
| train_cfg = dict( | |
| type='EpochBasedTrainLoop', max_epochs=total_epochs, val_interval=2) | |
| val_cfg = dict(type='ValLoop') | |
| test_cfg = dict(type='TestLoop') | |
| default_hooks = dict( | |
| checkpoint=dict( | |
| type='CheckpointHook', interval=1, max_keep_ckpts=1, save_last=True)) | |
| # load_from = 'work_dirs/detr3d_nuscenes/epoch_30.pth' | |
| # setuptools 65 downgrades to 58. | |
| # In mmlab-node we use setuptools 61 but occurs NO errors | |
| vis_backends = [dict(type='TensorboardVisBackend')] | |
| visualizer = dict( | |
| type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') | |