Spaces:

yaghi27
/

ImageToBEV-lightweight

Runtime error

App Files Files Community

yaghi27 commited on Aug 9

Commit

701f963

1 Parent(s): f037527

Upload petr_vovnet_gridmask_p4_800x320.py

Browse files

Files changed (1) hide show

model/PETR/petr_vovnet_gridmask_p4_800x320.py +737 -0

model/PETR/petr_vovnet_gridmask_p4_800x320.py ADDED Viewed

	@@ -0,0 +1,737 @@

+auto_scale_lr = dict(base_batch_size=32, enable=False)
+backbone_norm_cfg = dict(requires_grad=True, type='LN')
+backend_args = None
+class_names = [
+    'car',
+    'truck',
+    'construction_vehicle',
+    'bus',
+    'trailer',
+    'barrier',
+    'motorcycle',
+    'bicycle',
+    'pedestrian',
+    'traffic_cone',
+]
+custom_imports = dict(imports=[
+    'projects.PETR.petr',
+])
+data_prefix = dict(img='', pts='samples/LIDAR_TOP', sweeps='sweeps/LIDAR_TOP')
+data_root = 'data/nuscenes/'
+dataset_type = 'NuScenesDataset'
+db_sampler = dict(
+    backend_args=None,
+    classes=[
+        'car',
+        'truck',
+        'construction_vehicle',
+        'bus',
+        'trailer',
+        'barrier',
+        'motorcycle',
+        'bicycle',
+        'pedestrian',
+        'traffic_cone',
+    ],
+    data_root='data/nuscenes/',
+    info_path='data/nuscenes/nuscenes_dbinfos_train.pkl',
+    points_loader=dict(
+        backend_args=None,
+        coord_type='LIDAR',
+        load_dim=5,
+        type='LoadPointsFromFile',
+        use_dim=[
+            0,
+            1,
+            2,
+            3,
+            4,
+        ]),
+    prepare=dict(
+        filter_by_difficulty=[
+            -1,
+        ],
+        filter_by_min_points=dict(
+            barrier=5,
+            bicycle=5,
+            bus=5,
+            car=5,
+            construction_vehicle=5,
+            motorcycle=5,
+            pedestrian=5,
+            traffic_cone=5,
+            trailer=5,
+            truck=5)),
+    rate=1.0,
+    sample_groups=dict(
+        barrier=2,
+        bicycle=6,
+        bus=4,
+        car=2,
+        construction_vehicle=7,
+        motorcycle=6,
+        pedestrian=2,
+        traffic_cone=2,
+        trailer=6,
+        truck=3))
+default_hooks = dict(
+    checkpoint=dict(interval=-1, type='CheckpointHook'),
+    logger=dict(interval=50, type='LoggerHook'),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    timer=dict(type='IterTimerHook'),
+    visualization=dict(type='Det3DVisualizationHook'))
+default_scope = 'mmdet3d'
+env_cfg = dict(
+    cudnn_benchmark=False,
+    dist_cfg=dict(backend='nccl'),
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
+eval_pipeline = [
+    dict(
+        backend_args=None,
+        coord_type='LIDAR',
+        load_dim=5,
+        type='LoadPointsFromFile',
+        use_dim=5),
+    dict(
+        backend_args=None,
+        sweeps_num=10,
+        test_mode=True,
+        type='LoadPointsFromMultiSweeps'),
+    dict(keys=[
+        'points',
+    ], type='Pack3DDetInputs'),
+]
+find_unused_parameters = False
+ida_aug_conf = dict(
+    H=900,
+    W=1600,
+    bot_pct_lim=(
+        0.0,
+        0.0,
+    ),
+    final_dim=(
+        320,
+        800,
+    ),
+    rand_flip=True,
+    resize_lim=(
+        0.47,
+        0.625,
+    ),
+    rot_lim=(
+        0.0,
+        0.0,
+    ))
+img_norm_cfg = dict(
+    mean=[
+        103.53,
+        116.28,
+        123.675,
+    ],
+    std=[
+        57.375,
+        57.12,
+        58.395,
+    ],
+    to_rgb=False)
+input_modality = dict(use_camera=True, use_lidar=True)
+launcher = 'none'
+load_from = None
+log_level = 'INFO'
+log_processor = dict(by_epoch=True, type='LogProcessor', window_size=50)
+lr = 0.0001
+metainfo = dict(classes=[
+    'car',
+    'truck',
+    'construction_vehicle',
+    'bus',
+    'trailer',
+    'barrier',
+    'motorcycle',
+    'bicycle',
+    'pedestrian',
+    'traffic_cone',
+])
+model = dict(
+    data_preprocessor=dict(
+        bgr_to_rgb=False,
+        mean=[
+            103.53,
+            116.28,
+            123.675,
+        ],
+        pad_size_divisor=32,
+        std=[
+            57.375,
+            57.12,
+            58.395,
+        ],
+        type='Det3DDataPreprocessor'),
+    img_backbone=dict(
+        arch='regnetx_4.0gf',
+        init_cfg=dict(
+            checkpoint='open-mmlab://regnetx_4.0gf', type='Pretrained'),
+        out_indices=(
+            2,
+            3,
+        ),
+        type='mmdet.RegNet'),
+    img_neck=dict(
+        in_channels=[
+            560,
+            1360,
+        ], num_outs=2, out_channels=256, type='CPFPN'),
+    pts_bbox_head=dict(
+        LID=True,
+        bbox_coder=dict(
+            max_num=300,
+            num_classes=10,
+            pc_range=[
+                -51.2,
+                -51.2,
+                -5.0,
+                51.2,
+                51.2,
+                3.0,
+            ],
+            post_center_range=[
+                -61.2,
+                -61.2,
+                -10.0,
+                61.2,
+                61.2,
+                10.0,
+            ],
+            type='NMSFreeCoder',
+            voxel_size=[
+                0.2,
+                0.2,
+                8,
+            ]),
+        in_channels=256,
+        loss_bbox=dict(loss_weight=0.25, type='mmdet.L1Loss'),
+        loss_cls=dict(
+            alpha=0.25,
+            gamma=2.0,
+            loss_weight=2.0,
+            type='mmdet.FocalLoss',
+            use_sigmoid=True),
+        loss_iou=dict(loss_weight=0.0, type='mmdet.GIoULoss'),
+        normedlinear=False,
+        num_classes=10,
+        num_query=900,
+        position_range=[
+            -61.2,
+            -61.2,
+            -10.0,
+            61.2,
+            61.2,
+            10.0,
+        ],
+        positional_encoding=dict(
+            normalize=True, num_feats=128, type='SinePositionalEncoding3D'),
+        transformer=dict(
+            decoder=dict(
+                num_layers=6,
+                return_intermediate=True,
+                transformerlayers=dict(
+                    attn_cfgs=[
+                        dict(
+                            attn_drop=0.1,
+                            dropout_layer=dict(drop_prob=0.1, type='Dropout'),
+                            embed_dims=256,
+                            num_heads=8,
+                            type='MultiheadAttention'),
+                        dict(
+                            attn_drop=0.1,
+                            dropout_layer=dict(drop_prob=0.1, type='Dropout'),
+                            embed_dims=256,
+                            num_heads=8,
+                            type='PETRMultiheadAttention'),
+                    ],
+                    feedforward_channels=2048,
+                    ffn_dropout=0.1,
+                    operation_order=(
+                        'self_attn',
+                        'norm',
+                        'cross_attn',
+                        'norm',
+                        'ffn',
+                        'norm',
+                    ),
+                    type='PETRTransformerDecoderLayer'),
+                type='PETRTransformerDecoder'),
+            type='PETRTransformer'),
+        type='PETRHead',
+        with_multiview=True,
+        with_position=True),
+    train_cfg=dict(
+        pts=dict(
+            assigner=dict(
+                cls_cost=dict(type='FocalLossCost', weight=2.0),
+                iou_cost=dict(type='IoUCost', weight=0.0),
+                pc_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
+                type='HungarianAssigner3D'),
+            grid_size=[
+                512,
+                512,
+                1,
+            ],
+            out_size_factor=4,
+            point_cloud_range=[
+                -51.2,
+                -51.2,
+                -5.0,
+                51.2,
+                51.2,
+                3.0,
+            ],
+            voxel_size=[
+                0.2,
+                0.2,
+                8,
+            ])),
+    type='PETR',
+    use_grid_mask=True)
+num_epochs = 30
+optim_wrapper = dict(
+    clip_grad=dict(max_norm=35, norm_type=2),
+    optimizer=dict(lr=0.0001, type='AdamW', weight_decay=0.01),
+    paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.1))),
+    type='OptimWrapper')
+param_scheduler = [
+    dict(
+        begin=0,
+        by_epoch=False,
+        end=1500,
+        start_factor=0.3333333333333333,
+        type='LinearLR'),
+    dict(T_max=30, by_epoch=True, type='CosineAnnealingLR'),
+]
+point_cloud_range = [
+    -51.2,
+    -51.2,
+    -5.0,
+    51.2,
+    51.2,
+    3.0,
+]
+randomness = dict(deterministic=False, diff_rank_seed=False, seed=1)
+resume = False
+test_cfg = dict()
+test_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='nuscenes_infos_val.pkl',
+        backend_args=None,
+        box_type_3d='LiDAR',
+        data_prefix=dict(
+            CAM_BACK='samples/CAM_BACK',
+            CAM_BACK_LEFT='samples/CAM_BACK_LEFT',
+            CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
+            CAM_FRONT='samples/CAM_FRONT',
+            CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
+            CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
+            img='',
+            pts='samples/LIDAR_TOP',
+            sweeps='sweeps/LIDAR_TOP'),
+        data_root='data/nuscenes/',
+        metainfo=dict(classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ]),
+        modality=dict(use_camera=True, use_lidar=True),
+        pipeline=[
+            dict(
+                backend_args=None,
+                to_float32=True,
+                type='LoadMultiViewImageFromFiles'),
+            dict(
+                data_aug_conf=dict(
+                    H=900,
+                    W=1600,
+                    bot_pct_lim=(
+                        0.0,
+                        0.0,
+                    ),
+                    final_dim=(
+                        320,
+                        800,
+                    ),
+                    rand_flip=True,
+                    resize_lim=(
+                        0.47,
+                        0.625,
+                    ),
+                    rot_lim=(
+                        0.0,
+                        0.0,
+                    )),
+                training=False,
+                type='ResizeCropFlipImage'),
+            dict(keys=[
+                'img',
+            ], type='Pack3DDetInputs'),
+        ],
+        test_mode=True,
+        type='NuScenesDataset',
+        use_valid_flag=True),
+    drop_last=False,
+    num_workers=1,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+test_evaluator = dict(
+    ann_file='data/nuscenes/nuscenes_infos_val.pkl',
+    backend_args=None,
+    data_root='data/nuscenes/',
+    metric='bbox',
+    type='NuScenesMetric')
+test_pipeline = [
+    dict(
+        backend_args=None, to_float32=True,
+        type='LoadMultiViewImageFromFiles'),
+    dict(
+        data_aug_conf=dict(
+            H=900,
+            W=1600,
+            bot_pct_lim=(
+                0.0,
+                0.0,
+            ),
+            final_dim=(
+                320,
+                800,
+            ),
+            rand_flip=True,
+            resize_lim=(
+                0.47,
+                0.625,
+            ),
+            rot_lim=(
+                0.0,
+                0.0,
+            )),
+        training=False,
+        type='ResizeCropFlipImage'),
+    dict(keys=[
+        'img',
+    ], type='Pack3DDetInputs'),
+]
+train_cfg = dict(by_epoch=True, max_epochs=30, val_interval=3)
+train_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='nuscenes_infos_train.pkl',
+        backend_args=None,
+        box_type_3d='LiDAR',
+        data_prefix=dict(
+            CAM_BACK='samples/CAM_BACK',
+            CAM_BACK_LEFT='samples/CAM_BACK_LEFT',
+            CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
+            CAM_FRONT='samples/CAM_FRONT',
+            CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
+            CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
+            img='',
+            pts='samples/LIDAR_TOP',
+            sweeps='sweeps/LIDAR_TOP'),
+        data_root='data/nuscenes/',
+        metainfo=dict(classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ]),
+        modality=dict(use_camera=True, use_lidar=True),
+        pipeline=[
+            dict(
+                backend_args=None,
+                to_float32=True,
+                type='LoadMultiViewImageFromFiles'),
+            dict(
+                type='LoadAnnotations3D',
+                with_attr_label=False,
+                with_bbox_3d=True,
+                with_label_3d=True),
+            dict(
+                point_cloud_range=[
+                    -51.2,
+                    -51.2,
+                    -5.0,
+                    51.2,
+                    51.2,
+                    3.0,
+                ],
+                type='ObjectRangeFilter'),
+            dict(
+                classes=[
+                    'car',
+                    'truck',
+                    'construction_vehicle',
+                    'bus',
+                    'trailer',
+                    'barrier',
+                    'motorcycle',
+                    'bicycle',
+                    'pedestrian',
+                    'traffic_cone',
+                ],
+                type='ObjectNameFilter'),
+            dict(
+                data_aug_conf=dict(
+                    H=900,
+                    W=1600,
+                    bot_pct_lim=(
+                        0.0,
+                        0.0,
+                    ),
+                    final_dim=(
+                        320,
+                        800,
+                    ),
+                    rand_flip=True,
+                    resize_lim=(
+                        0.47,
+                        0.625,
+                    ),
+                    rot_lim=(
+                        0.0,
+                        0.0,
+                    )),
+                training=True,
+                type='ResizeCropFlipImage'),
+            dict(
+                reverse_angle=False,
+                rot_range=[
+                    -0.3925,
+                    0.3925,
+                ],
+                scale_ratio_range=[
+                    0.95,
+                    1.05,
+                ],
+                training=True,
+                translation_std=[
+                    0,
+                    0,
+                    0,
+                ],
+                type='GlobalRotScaleTransImage'),
+            dict(
+                keys=[
+                    'img',
+                    'gt_bboxes',
+                    'gt_bboxes_labels',
+                    'attr_labels',
+                    'gt_bboxes_3d',
+                    'gt_labels_3d',
+                    'centers_2d',
+                    'depths',
+                ],
+                type='Pack3DDetInputs'),
+        ],
+        test_mode=False,
+        type='NuScenesDataset',
+        use_valid_flag=True),
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(shuffle=True, type='DefaultSampler'))
+train_pipeline = [
+    dict(
+        backend_args=None, to_float32=True,
+        type='LoadMultiViewImageFromFiles'),
+    dict(
+        type='LoadAnnotations3D',
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True),
+    dict(
+        point_cloud_range=[
+            -51.2,
+            -51.2,
+            -5.0,
+            51.2,
+            51.2,
+            3.0,
+        ],
+        type='ObjectRangeFilter'),
+    dict(
+        classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ],
+        type='ObjectNameFilter'),
+    dict(
+        data_aug_conf=dict(
+            H=900,
+            W=1600,
+            bot_pct_lim=(
+                0.0,
+                0.0,
+            ),
+            final_dim=(
+                320,
+                800,
+            ),
+            rand_flip=True,
+            resize_lim=(
+                0.47,
+                0.625,
+            ),
+            rot_lim=(
+                0.0,
+                0.0,
+            )),
+        training=True,
+        type='ResizeCropFlipImage'),
+    dict(
+        reverse_angle=False,
+        rot_range=[
+            -0.3925,
+            0.3925,
+        ],
+        scale_ratio_range=[
+            0.95,
+            1.05,
+        ],
+        training=True,
+        translation_std=[
+            0,
+            0,
+            0,
+        ],
+        type='GlobalRotScaleTransImage'),
+    dict(
+        keys=[
+            'img',
+            'gt_bboxes',
+            'gt_bboxes_labels',
+            'attr_labels',
+            'gt_bboxes_3d',
+            'gt_labels_3d',
+            'centers_2d',
+            'depths',
+        ],
+        type='Pack3DDetInputs'),
+]
+val_cfg = dict()
+val_dataloader = dict(
+    batch_size=1,
+    dataset=dict(
+        ann_file='nuscenes_infos_val.pkl',
+        backend_args=None,
+        box_type_3d='LiDAR',
+        data_prefix=dict(
+            CAM_BACK='samples/CAM_BACK',
+            CAM_BACK_LEFT='samples/CAM_BACK_LEFT',
+            CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
+            CAM_FRONT='samples/CAM_FRONT',
+            CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
+            CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
+            img='',
+            pts='samples/LIDAR_TOP',
+            sweeps='sweeps/LIDAR_TOP'),
+        data_root='data/nuscenes/',
+        metainfo=dict(classes=[
+            'car',
+            'truck',
+            'construction_vehicle',
+            'bus',
+            'trailer',
+            'barrier',
+            'motorcycle',
+            'bicycle',
+            'pedestrian',
+            'traffic_cone',
+        ]),
+        modality=dict(use_camera=True, use_lidar=True),
+        pipeline=[
+            dict(
+                backend_args=None,
+                to_float32=True,
+                type='LoadMultiViewImageFromFiles'),
+            dict(
+                data_aug_conf=dict(
+                    H=900,
+                    W=1600,
+                    bot_pct_lim=(
+                        0.0,
+                        0.0,
+                    ),
+                    final_dim=(
+                        320,
+                        800,
+                    ),
+                    rand_flip=True,
+                    resize_lim=(
+                        0.47,
+                        0.625,
+                    ),
+                    rot_lim=(
+                        0.0,
+                        0.0,
+                    )),
+                training=False,
+                type='ResizeCropFlipImage'),
+            dict(keys=[
+                'img',
+            ], type='Pack3DDetInputs'),
+        ],
+        test_mode=True,
+        type='NuScenesDataset',
+        use_valid_flag=True),
+    drop_last=False,
+    num_workers=1,
+    persistent_workers=True,
+    sampler=dict(shuffle=False, type='DefaultSampler'))
+val_evaluator = dict(
+    ann_file='data/nuscenes/nuscenes_infos_val.pkl',
+    backend_args=None,
+    data_root='data/nuscenes/',
+    metric='bbox',
+    type='NuScenesMetric')
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    name='visualizer',
+    type='Det3DLocalVisualizer',
+    vis_backends=[
+        dict(type='LocalVisBackend'),
+    ])
+voxel_size = [
+    0.2,
+    0.2,
+    8,
+]
+work_dir = 'work_dirs/detr3d_nuscenes'