run_id: 0903_libero_goal_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm run_root_dir: ./playground/Checkpoints seed: 42 trackers: - jsonl - wandb wandb_entity: michaelyu-1101-fudanuniversity wandb_project: Internvla is_debug: false framework: framework_py: InternVLA-M1 qwenvl: base_vlm: Qwen/Qwen2.5-VL-3B-Instruct attn_implementation: flash_attention_2 vl_hidden_dim: 2048 dino: dino_backbone: dinov2_vitl14 layer_qformer: qformer_end_layer: 37 qformer_start_layer: 36 num_query_tokens: 64 input_dim: 2048 ouptput_dim: 768 grad_scale: 0.5 action_model: action_model_type: DiT-B action_hidden_dim: 768 action_dim: 7 input_dim: 2048 ouptput_dim: 768 use_ema: false future_action_window_size: 7 past_action_window_size: 0 repeated_diffusion_steps: 8 reduce_in_full_precision: true datasets: vlm_data: dataformat: llava_json dataset_use: asv2_conversation_en,asv2_detailed_description_en,asv2_region_captioning_en,coco_internvl_longcap_en,coco_karpathy_train_567_en,coco_negative_gpt4o_en,coco_poetry_zh,coco_rem_en_zh,cocorem_exist_yorn_en,cocotextv2_en,cocotextv2_gpt4o_en,okvqa_en,refcoco_grounding_aug_en,refcoco_grounding_en,tallyqa_coco_en,toloka_grounding_aug_en,vqav2_en,vsr_en eval_dataset: aokvqa_cauldron_llava_format data_flatten: false base_interval: 2 max_pixels: 50176 min_pixels: 784 fix_image_size: - 224 - 224 model_max_length: 1024 model_type: qwen2.5vl per_device_batch_size: 4 vla_data: dataset_py: lerobot_libero data_root_dir: playground/Datasets/LEROBOT_LIBERO_DATA data_mix: libero_goal action_type: delta_qpos CoT_prompt: Your task is {instruction}. To identify the key objects for your task. Locate their bounding boxes in [x1,y1,x2,y2] format. CoT_answer: bbox default_image_resolution: - 3 - 224 - 224 per_device_batch_size: 16 load_all_data_for_training: true obs: - image_0 trainer: epochs: 100 max_train_steps: 100000 num_warmup_steps: 5000 save_interval: 10000 eval_interval: 1000 learning_rate: base: 2.5e-05 lr_scheduler_type: cosine_with_min_lr scheduler_specific_kwargs: min_lr: 1.0e-06 freeze_modules: '' loss_scale: vla: 1.0 vlm: 0.1 max_grad_norm: 1.0 warmup_ratio: 0.1 weight_decay: 0.0 logging_frequency: 10 gradient_clipping: 1.0 gradient_accumulation_steps: 1 optimizer: name: AdamW betas: - 0.9 - 0.95 eps: 1.0e-08 weight_decay: 1.0e-08 is_resume: false resume_epoch: null resume_step: null enable_gradient_checkpointing: true enable_mixed_precision_training: true output_dir: ./playground/Checkpoints/0903_libero_goal_augsteps_0_wo_flash_attention_wo_augsteps_two_view_action_chunk_16_pretrained_vlm