move all constants out of utils.py
Browse files- app.py +1 -1
- constants.py +137 -1
- utils.py +9 -135
    	
        app.py
    CHANGED
    
    | @@ -72,7 +72,7 @@ with gr.Blocks() as block: | |
| 72 | 
             
                                value=list(default_loader.SUPER_GROUPS.keys())[0]
         | 
| 73 | 
             
                            )
         | 
| 74 | 
             
                            model_group_selector = gr.Radio(
         | 
| 75 | 
            -
                                choices=list( | 
| 76 | 
             
                                label="Select a model group",
         | 
| 77 | 
             
                                value="All"
         | 
| 78 | 
             
                            )
         | 
|  | |
| 72 | 
             
                                value=list(default_loader.SUPER_GROUPS.keys())[0]
         | 
| 73 | 
             
                            )
         | 
| 74 | 
             
                            model_group_selector = gr.Radio(
         | 
| 75 | 
            +
                                choices=list(BASE_MODEL_GROUPS.keys()),
         | 
| 76 | 
             
                                label="Select a model group",
         | 
| 77 | 
             
                                value="All"
         | 
| 78 | 
             
                            )
         | 
    	
        constants.py
    CHANGED
    
    | @@ -76,4 +76,140 @@ SUBMIT_INTRODUCTION = """# Submit on MEGA-Bench Leaderboard | |
| 76 |  | 
| 77 | 
             
            Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
         | 
| 78 |  | 
| 79 | 
            -
            """
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 76 |  | 
| 77 | 
             
            Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
         | 
| 78 |  | 
| 79 | 
            +
            """
         | 
| 80 | 
            +
             | 
| 81 | 
            +
             | 
| 82 | 
            +
             | 
| 83 | 
            +
            ## Constants related to the leaderboard display
         | 
| 84 | 
            +
             | 
| 85 | 
            +
             | 
| 86 | 
            +
            # Keep all the constant mappings outside the class
         | 
| 87 | 
            +
            MODEL_NAME_MAP = {
         | 
| 88 | 
            +
                "Claude_3.5_new": "Claude-3.5-Sonnet (1022)",
         | 
| 89 | 
            +
                "GPT_4o": "GPT-4o (0513)",
         | 
| 90 | 
            +
                "Claude_3.5": "Claude-3.5-Sonnet (0620)",
         | 
| 91 | 
            +
                "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002",
         | 
| 92 | 
            +
                "InternVL2_76B": "InternVL2-Llama3-76B",
         | 
| 93 | 
            +
                "Qwen2_VL_72B": "Qwen2-VL-72B",
         | 
| 94 | 
            +
                "llava_onevision_72B": "Llava-OneVision-72B",
         | 
| 95 | 
            +
                "NVLM": "NVLM-D-72B",
         | 
| 96 | 
            +
                "GPT_4o_mini": "GPT-4o mini",
         | 
| 97 | 
            +
                "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
         | 
| 98 | 
            +
                "Pixtral_12B": "Pixtral 12B",
         | 
| 99 | 
            +
                "Aria": "Aria-MoE-25B",
         | 
| 100 | 
            +
                "Qwen2_VL_7B": "Qwen2-VL-7B",
         | 
| 101 | 
            +
                "InternVL2_8B": "InternVL2-8B",
         | 
| 102 | 
            +
                "llava_onevision_7B": "Llava-OneVision-7B",
         | 
| 103 | 
            +
                "Llama_3_2_11B": "Llama-3.2-11B",
         | 
| 104 | 
            +
                "Phi-3.5-vision": "Phi-3.5-Vision",
         | 
| 105 | 
            +
                "MiniCPM_v2.6": "MiniCPM-V2.6",
         | 
| 106 | 
            +
                "Idefics3": "Idefics3-8B-Llama3",
         | 
| 107 | 
            +
                "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
         | 
| 108 | 
            +
                "POINTS_7B": "POINTS-Qwen2.5-7B",
         | 
| 109 | 
            +
                "Qwen2_VL_2B": "Qwen2-VL-2B",
         | 
| 110 | 
            +
                "InternVL2_2B": "InternVL2-2B",
         | 
| 111 | 
            +
                "Molmo_7B_D": "Molmo-7B-D-0924",
         | 
| 112 | 
            +
                "Molmo_72B": "Molmo-72B-0924",
         | 
| 113 | 
            +
                "Mammoth_VL": "Mammoth-VL-8B",
         | 
| 114 | 
            +
                "SmolVLM": "SmolVLM-1.7B",
         | 
| 115 | 
            +
                "POINTS_15_7B": "POINTS-1.5-8B",
         | 
| 116 | 
            +
                "InternVL2_5_78B": "InternVL2.5-78B",
         | 
| 117 | 
            +
                "InternVL2_5_2B": "InternVL2.5-2B",
         | 
| 118 | 
            +
            }
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            DIMENSION_NAME_MAP = {
         | 
| 121 | 
            +
                "skills": "Skills",
         | 
| 122 | 
            +
                "input_format": "Input Format",
         | 
| 123 | 
            +
                "output_format": "Output Format",
         | 
| 124 | 
            +
                "input_num": "Visual Input Number",
         | 
| 125 | 
            +
                "app": "Application"
         | 
| 126 | 
            +
            }
         | 
| 127 | 
            +
             | 
| 128 | 
            +
            KEYWORD_NAME_MAP = {
         | 
| 129 | 
            +
                # Skills
         | 
| 130 | 
            +
                "Object Recognition and Classification": "Object Recognition",
         | 
| 131 | 
            +
                "Text Recognition (OCR)": "OCR",
         | 
| 132 | 
            +
                "Language Understanding and Generation": "Language",
         | 
| 133 | 
            +
                "Scene and Event Understanding": "Scene/Event",
         | 
| 134 | 
            +
                "Mathematical and Logical Reasoning": "Math/Logic",
         | 
| 135 | 
            +
                "Commonsense and Social Reasoning": "Commonsense",
         | 
| 136 | 
            +
                "Ethical and Safety Reasoning": "Ethics/Safety",
         | 
| 137 | 
            +
                "Domain-Specific Knowledge and Skills": "Domain-Specific",
         | 
| 138 | 
            +
                "Spatial and Temporal Reasoning": "Spatial/Temporal",
         | 
| 139 | 
            +
                "Planning and Decision Making": "Planning/Decision",
         | 
| 140 | 
            +
                # Input Format
         | 
| 141 | 
            +
                'User Interface Screenshots': "UI related", 
         | 
| 142 | 
            +
                'Text-Based Images and Documents': "Documents", 
         | 
| 143 | 
            +
                'Diagrams and Data Visualizations': "Infographics", 
         | 
| 144 | 
            +
                'Videos': "Videos", 
         | 
| 145 | 
            +
                'Artistic and Creative Content': "Arts/Creative", 
         | 
| 146 | 
            +
                'Photographs': "Photographs", 
         | 
| 147 | 
            +
                '3D Models and Aerial Imagery': "3D related",
         | 
| 148 | 
            +
                # Application
         | 
| 149 | 
            +
                'Information_Extraction': "Info Extraction", 
         | 
| 150 | 
            +
                'Planning' : "Planning", 
         | 
| 151 | 
            +
                'Coding': "Coding", 
         | 
| 152 | 
            +
                'Perception': "Perception", 
         | 
| 153 | 
            +
                'Metrics': "Metrics", 
         | 
| 154 | 
            +
                'Science': "Science", 
         | 
| 155 | 
            +
                'Knowledge': "Knowledge", 
         | 
| 156 | 
            +
                'Mathematics': "Math",
         | 
| 157 | 
            +
                # Output format
         | 
| 158 | 
            +
                'contextual_formatted_text': "Contexual", 
         | 
| 159 | 
            +
                'structured_output': "Structured", 
         | 
| 160 | 
            +
                'exact_text': "Exact", 
         | 
| 161 | 
            +
                'numerical_data': "Numerical", 
         | 
| 162 | 
            +
                'open_ended_output': "Open-ended", 
         | 
| 163 | 
            +
                'multiple_choice': "MC",
         | 
| 164 | 
            +
                "6-8 images": "6-8 imgs",
         | 
| 165 | 
            +
                "1-image": "1 img",
         | 
| 166 | 
            +
                "2-3 images": "2-3 imgs",
         | 
| 167 | 
            +
                "4-5 images": "4-5 imgs",
         | 
| 168 | 
            +
                "9-image or more": "9+ imgs",
         | 
| 169 | 
            +
                "video": "Video",
         | 
| 170 | 
            +
            }
         | 
| 171 | 
            +
             | 
| 172 | 
            +
            MODEL_URLS = {
         | 
| 173 | 
            +
                "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
         | 
| 174 | 
            +
                "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
         | 
| 175 | 
            +
                "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet", 
         | 
| 176 | 
            +
                "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
         | 
| 177 | 
            +
                "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
         | 
| 178 | 
            +
                "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
         | 
| 179 | 
            +
                "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
         | 
| 180 | 
            +
                "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
         | 
| 181 | 
            +
                "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
         | 
| 182 | 
            +
                "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
         | 
| 183 | 
            +
                "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
         | 
| 184 | 
            +
                "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
         | 
| 185 | 
            +
                "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
         | 
| 186 | 
            +
                "Aria": "https://huggingface.co/rhymes-ai/Aria",
         | 
| 187 | 
            +
                "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
         | 
| 188 | 
            +
                "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
         | 
| 189 | 
            +
                "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
         | 
| 190 | 
            +
                "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
         | 
| 191 | 
            +
                "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
         | 
| 192 | 
            +
                "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
         | 
| 193 | 
            +
                "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
         | 
| 194 | 
            +
                "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
         | 
| 195 | 
            +
                "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
         | 
| 196 | 
            +
                "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
         | 
| 197 | 
            +
                "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
         | 
| 198 | 
            +
                "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
         | 
| 199 | 
            +
                "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
         | 
| 200 | 
            +
                "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
         | 
| 201 | 
            +
                "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
         | 
| 202 | 
            +
                "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
         | 
| 203 | 
            +
                "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
         | 
| 204 | 
            +
            }
         | 
| 205 | 
            +
             | 
| 206 | 
            +
            # Define the base MODEL_GROUPS structure
         | 
| 207 | 
            +
            BASE_MODEL_GROUPS = {
         | 
| 208 | 
            +
                "All": list(MODEL_NAME_MAP.keys()),
         | 
| 209 | 
            +
                "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
         | 
| 210 | 
            +
                "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
         | 
| 211 | 
            +
                "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
         | 
| 212 | 
            +
                "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
         | 
| 213 | 
            +
                "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
         | 
| 214 | 
            +
                "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
         | 
| 215 | 
            +
            }
         | 
    	
        utils.py
    CHANGED
    
    | @@ -2,139 +2,15 @@ import pandas as pd | |
| 2 | 
             
            import json
         | 
| 3 | 
             
            from typing import Dict, Any, Tuple
         | 
| 4 | 
             
            import os
         | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
                 | 
| 9 | 
            -
                 | 
| 10 | 
            -
                 | 
| 11 | 
            -
             | 
| 12 | 
            -
                "InternVL2_76B": "InternVL2-Llama3-76B",
         | 
| 13 | 
            -
                "Qwen2_VL_72B": "Qwen2-VL-72B",
         | 
| 14 | 
            -
                "llava_onevision_72B": "Llava-OneVision-72B",
         | 
| 15 | 
            -
                "NVLM": "NVLM-D-72B",
         | 
| 16 | 
            -
                "GPT_4o_mini": "GPT-4o mini",
         | 
| 17 | 
            -
                "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
         | 
| 18 | 
            -
                "Pixtral_12B": "Pixtral 12B",
         | 
| 19 | 
            -
                "Aria": "Aria-MoE-25B",
         | 
| 20 | 
            -
                "Qwen2_VL_7B": "Qwen2-VL-7B",
         | 
| 21 | 
            -
                "InternVL2_8B": "InternVL2-8B",
         | 
| 22 | 
            -
                "llava_onevision_7B": "Llava-OneVision-7B",
         | 
| 23 | 
            -
                "Llama_3_2_11B": "Llama-3.2-11B",
         | 
| 24 | 
            -
                "Phi-3.5-vision": "Phi-3.5-Vision",
         | 
| 25 | 
            -
                "MiniCPM_v2.6": "MiniCPM-V2.6",
         | 
| 26 | 
            -
                "Idefics3": "Idefics3-8B-Llama3",
         | 
| 27 | 
            -
                "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
         | 
| 28 | 
            -
                "POINTS_7B": "POINTS-Qwen2.5-7B",
         | 
| 29 | 
            -
                "Qwen2_VL_2B": "Qwen2-VL-2B",
         | 
| 30 | 
            -
                "InternVL2_2B": "InternVL2-2B",
         | 
| 31 | 
            -
                "Molmo_7B_D": "Molmo-7B-D-0924",
         | 
| 32 | 
            -
                "Molmo_72B": "Molmo-72B-0924",
         | 
| 33 | 
            -
                "Mammoth_VL": "Mammoth-VL-8B",
         | 
| 34 | 
            -
                "SmolVLM": "SmolVLM-1.7B",
         | 
| 35 | 
            -
                "POINTS_15_7B": "POINTS-1.5-8B",
         | 
| 36 | 
            -
                "InternVL2_5_78B": "InternVL2.5-78B",
         | 
| 37 | 
            -
                "InternVL2_5_2B": "InternVL2.5-2B",
         | 
| 38 | 
            -
            }
         | 
| 39 | 
            -
             | 
| 40 | 
            -
            DIMENSION_NAME_MAP = {
         | 
| 41 | 
            -
                "skills": "Skills",
         | 
| 42 | 
            -
                "input_format": "Input Format",
         | 
| 43 | 
            -
                "output_format": "Output Format",
         | 
| 44 | 
            -
                "input_num": "Visual Input Number",
         | 
| 45 | 
            -
                "app": "Application"
         | 
| 46 | 
            -
            }
         | 
| 47 | 
            -
             | 
| 48 | 
            -
            KEYWORD_NAME_MAP = {
         | 
| 49 | 
            -
                # Skills
         | 
| 50 | 
            -
                "Object Recognition and Classification": "Object Recognition",
         | 
| 51 | 
            -
                "Text Recognition (OCR)": "OCR",
         | 
| 52 | 
            -
                "Language Understanding and Generation": "Language",
         | 
| 53 | 
            -
                "Scene and Event Understanding": "Scene/Event",
         | 
| 54 | 
            -
                "Mathematical and Logical Reasoning": "Math/Logic",
         | 
| 55 | 
            -
                "Commonsense and Social Reasoning": "Commonsense",
         | 
| 56 | 
            -
                "Ethical and Safety Reasoning": "Ethics/Safety",
         | 
| 57 | 
            -
                "Domain-Specific Knowledge and Skills": "Domain-Specific",
         | 
| 58 | 
            -
                "Spatial and Temporal Reasoning": "Spatial/Temporal",
         | 
| 59 | 
            -
                "Planning and Decision Making": "Planning/Decision",
         | 
| 60 | 
            -
                # Input Format
         | 
| 61 | 
            -
                'User Interface Screenshots': "UI related", 
         | 
| 62 | 
            -
                'Text-Based Images and Documents': "Documents", 
         | 
| 63 | 
            -
                'Diagrams and Data Visualizations': "Infographics", 
         | 
| 64 | 
            -
                'Videos': "Videos", 
         | 
| 65 | 
            -
                'Artistic and Creative Content': "Arts/Creative", 
         | 
| 66 | 
            -
                'Photographs': "Photographs", 
         | 
| 67 | 
            -
                '3D Models and Aerial Imagery': "3D related",
         | 
| 68 | 
            -
                # Application
         | 
| 69 | 
            -
                'Information_Extraction': "Info Extraction", 
         | 
| 70 | 
            -
                'Planning' : "Planning", 
         | 
| 71 | 
            -
                'Coding': "Coding", 
         | 
| 72 | 
            -
                'Perception': "Perception", 
         | 
| 73 | 
            -
                'Metrics': "Metrics", 
         | 
| 74 | 
            -
                'Science': "Science", 
         | 
| 75 | 
            -
                'Knowledge': "Knowledge", 
         | 
| 76 | 
            -
                'Mathematics': "Math",
         | 
| 77 | 
            -
                # Output format
         | 
| 78 | 
            -
                'contextual_formatted_text': "Contexual", 
         | 
| 79 | 
            -
                'structured_output': "Structured", 
         | 
| 80 | 
            -
                'exact_text': "Exact", 
         | 
| 81 | 
            -
                'numerical_data': "Numerical", 
         | 
| 82 | 
            -
                'open_ended_output': "Open-ended", 
         | 
| 83 | 
            -
                'multiple_choice': "MC",
         | 
| 84 | 
            -
                "6-8 images": "6-8 imgs",
         | 
| 85 | 
            -
                "1-image": "1 img",
         | 
| 86 | 
            -
                "2-3 images": "2-3 imgs",
         | 
| 87 | 
            -
                "4-5 images": "4-5 imgs",
         | 
| 88 | 
            -
                "9-image or more": "9+ imgs",
         | 
| 89 | 
            -
                "video": "Video",
         | 
| 90 | 
            -
            }
         | 
| 91 | 
            -
             | 
| 92 | 
            -
            MODEL_URLS = {
         | 
| 93 | 
            -
                "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
         | 
| 94 | 
            -
                "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
         | 
| 95 | 
            -
                "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet", 
         | 
| 96 | 
            -
                "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
         | 
| 97 | 
            -
                "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
         | 
| 98 | 
            -
                "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
         | 
| 99 | 
            -
                "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
         | 
| 100 | 
            -
                "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
         | 
| 101 | 
            -
                "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
         | 
| 102 | 
            -
                "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
         | 
| 103 | 
            -
                "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
         | 
| 104 | 
            -
                "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
         | 
| 105 | 
            -
                "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
         | 
| 106 | 
            -
                "Aria": "https://huggingface.co/rhymes-ai/Aria",
         | 
| 107 | 
            -
                "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
         | 
| 108 | 
            -
                "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
         | 
| 109 | 
            -
                "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
         | 
| 110 | 
            -
                "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
         | 
| 111 | 
            -
                "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
         | 
| 112 | 
            -
                "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
         | 
| 113 | 
            -
                "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
         | 
| 114 | 
            -
                "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
         | 
| 115 | 
            -
                "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
         | 
| 116 | 
            -
                "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
         | 
| 117 | 
            -
                "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
         | 
| 118 | 
            -
                "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
         | 
| 119 | 
            -
                "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
         | 
| 120 | 
            -
                "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
         | 
| 121 | 
            -
                "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
         | 
| 122 | 
            -
                "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
         | 
| 123 | 
            -
                "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
         | 
| 124 | 
            -
            }
         | 
| 125 |  | 
| 126 | 
             
            class BaseDataLoader:
         | 
| 127 | 
            -
                # Define the base MODEL_GROUPS structure
         | 
| 128 | 
            -
                BASE_MODEL_GROUPS = {
         | 
| 129 | 
            -
                    "All": list(MODEL_NAME_MAP.keys()),
         | 
| 130 | 
            -
                    "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
         | 
| 131 | 
            -
                    "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
         | 
| 132 | 
            -
                    "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
         | 
| 133 | 
            -
                    "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
         | 
| 134 | 
            -
                    "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
         | 
| 135 | 
            -
                    "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
         | 
| 136 | 
            -
                }
         | 
| 137 | 
            -
             | 
| 138 | 
             
                def __init__(self):
         | 
| 139 | 
             
                    self.MODEL_DATA = self._load_model_data()
         | 
| 140 | 
             
                    self.SUMMARY_DATA = self._load_summary_data()
         | 
| @@ -174,17 +50,15 @@ class BaseDataLoader: | |
| 174 | 
             
                    return {k: groups[k] for k in order if k in groups}
         | 
| 175 |  | 
| 176 | 
             
                def _initialize_model_groups(self) -> Dict[str, list]:
         | 
| 177 | 
            -
                    # Get the list of available models from the loaded data
         | 
| 178 | 
             
                    available_models = set(self.MODEL_DATA.keys())
         | 
| 179 |  | 
| 180 | 
            -
                    # Create filtered groups based on available models
         | 
| 181 | 
             
                    filtered_groups = {}
         | 
| 182 | 
            -
                    for group_name, models in  | 
| 183 | 
             
                        if group_name == "All":
         | 
| 184 | 
             
                            filtered_groups[group_name] = sorted(list(available_models))
         | 
| 185 | 
             
                        else:
         | 
| 186 | 
             
                            filtered_models = [model for model in models if model in available_models]
         | 
| 187 | 
            -
                            if filtered_models: | 
| 188 | 
             
                                filtered_groups[group_name] = filtered_models
         | 
| 189 |  | 
| 190 | 
             
                    return filtered_groups
         | 
|  | |
| 2 | 
             
            import json
         | 
| 3 | 
             
            from typing import Dict, Any, Tuple
         | 
| 4 | 
             
            import os
         | 
| 5 | 
            +
            from constants import (
         | 
| 6 | 
            +
                MODEL_NAME_MAP,
         | 
| 7 | 
            +
                DIMENSION_NAME_MAP,
         | 
| 8 | 
            +
                KEYWORD_NAME_MAP,
         | 
| 9 | 
            +
                MODEL_URLS,
         | 
| 10 | 
            +
                BASE_MODEL_GROUPS
         | 
| 11 | 
            +
            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 12 |  | 
| 13 | 
             
            class BaseDataLoader:
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 14 | 
             
                def __init__(self):
         | 
| 15 | 
             
                    self.MODEL_DATA = self._load_model_data()
         | 
| 16 | 
             
                    self.SUMMARY_DATA = self._load_summary_data()
         | 
|  | |
| 50 | 
             
                    return {k: groups[k] for k in order if k in groups}
         | 
| 51 |  | 
| 52 | 
             
                def _initialize_model_groups(self) -> Dict[str, list]:
         | 
|  | |
| 53 | 
             
                    available_models = set(self.MODEL_DATA.keys())
         | 
| 54 |  | 
|  | |
| 55 | 
             
                    filtered_groups = {}
         | 
| 56 | 
            +
                    for group_name, models in BASE_MODEL_GROUPS.items():
         | 
| 57 | 
             
                        if group_name == "All":
         | 
| 58 | 
             
                            filtered_groups[group_name] = sorted(list(available_models))
         | 
| 59 | 
             
                        else:
         | 
| 60 | 
             
                            filtered_models = [model for model in models if model in available_models]
         | 
| 61 | 
            +
                            if filtered_models:
         | 
| 62 | 
             
                                filtered_groups[group_name] = filtered_models
         | 
| 63 |  | 
| 64 | 
             
                    return filtered_groups
         | 
 
			
