Leaderboard-Deepseek-Gemini-Grok-GPT-Qwen

Running

App Files Files Community

cccjc commited on Dec 21, 2024

Commit

6a59158

1 Parent(s): 2acbd8f

move all constants out of utils.py

Browse files

Files changed (3) hide show

app.py +1 -1
constants.py +137 -1
utils.py +9 -135

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ with gr.Blocks() as block:
                     value=list(default_loader.SUPER_GROUPS.keys())[0]
                 )
                 model_group_selector = gr.Radio(
-                    choices=list(default_loader.BASE_MODEL_GROUPS.keys()),
                     label="Select a model group",
                     value="All"
                 )

                     value=list(default_loader.SUPER_GROUPS.keys())[0]
                 )
                 model_group_selector = gr.Radio(
+                    choices=list(BASE_MODEL_GROUPS.keys()),
                     label="Select a model group",
                     value="All"
                 )

constants.py CHANGED Viewed

@@ -76,4 +76,140 @@ SUBMIT_INTRODUCTION = """# Submit on MEGA-Bench Leaderboard
 Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
-"""

 Our evaluation pipeline is released on our [GitHub repository](https://github.com/TIGER-AI-Lab/MEGA-Bench). We will provide details on how to submit third-party results to this leaderboard.
+"""
+## Constants related to the leaderboard display
+# Keep all the constant mappings outside the class
+MODEL_NAME_MAP = {
+    "Claude_3.5_new": "Claude-3.5-Sonnet (1022)",
+    "GPT_4o": "GPT-4o (0513)",
+    "Claude_3.5": "Claude-3.5-Sonnet (0620)",
+    "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002",
+    "InternVL2_76B": "InternVL2-Llama3-76B",
+    "Qwen2_VL_72B": "Qwen2-VL-72B",
+    "llava_onevision_72B": "Llava-OneVision-72B",
+    "NVLM": "NVLM-D-72B",
+    "GPT_4o_mini": "GPT-4o mini",
+    "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
+    "Pixtral_12B": "Pixtral 12B",
+    "Aria": "Aria-MoE-25B",
+    "Qwen2_VL_7B": "Qwen2-VL-7B",
+    "InternVL2_8B": "InternVL2-8B",
+    "llava_onevision_7B": "Llava-OneVision-7B",
+    "Llama_3_2_11B": "Llama-3.2-11B",
+    "Phi-3.5-vision": "Phi-3.5-Vision",
+    "MiniCPM_v2.6": "MiniCPM-V2.6",
+    "Idefics3": "Idefics3-8B-Llama3",
+    "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
+    "POINTS_7B": "POINTS-Qwen2.5-7B",
+    "Qwen2_VL_2B": "Qwen2-VL-2B",
+    "InternVL2_2B": "InternVL2-2B",
+    "Molmo_7B_D": "Molmo-7B-D-0924",
+    "Molmo_72B": "Molmo-72B-0924",
+    "Mammoth_VL": "Mammoth-VL-8B",
+    "SmolVLM": "SmolVLM-1.7B",
+    "POINTS_15_7B": "POINTS-1.5-8B",
+    "InternVL2_5_78B": "InternVL2.5-78B",
+    "InternVL2_5_2B": "InternVL2.5-2B",
+}
+DIMENSION_NAME_MAP = {
+    "skills": "Skills",
+    "input_format": "Input Format",
+    "output_format": "Output Format",
+    "input_num": "Visual Input Number",
+    "app": "Application"
+}
+KEYWORD_NAME_MAP = {
+    # Skills
+    "Object Recognition and Classification": "Object Recognition",
+    "Text Recognition (OCR)": "OCR",
+    "Language Understanding and Generation": "Language",
+    "Scene and Event Understanding": "Scene/Event",
+    "Mathematical and Logical Reasoning": "Math/Logic",
+    "Commonsense and Social Reasoning": "Commonsense",
+    "Ethical and Safety Reasoning": "Ethics/Safety",
+    "Domain-Specific Knowledge and Skills": "Domain-Specific",
+    "Spatial and Temporal Reasoning": "Spatial/Temporal",
+    "Planning and Decision Making": "Planning/Decision",
+    # Input Format
+    'User Interface Screenshots': "UI related",
+    'Text-Based Images and Documents': "Documents",
+    'Diagrams and Data Visualizations': "Infographics",
+    'Videos': "Videos",
+    'Artistic and Creative Content': "Arts/Creative",
+    'Photographs': "Photographs",
+    '3D Models and Aerial Imagery': "3D related",
+    # Application
+    'Information_Extraction': "Info Extraction",
+    'Planning' : "Planning",
+    'Coding': "Coding",
+    'Perception': "Perception",
+    'Metrics': "Metrics",
+    'Science': "Science",
+    'Knowledge': "Knowledge",
+    'Mathematics': "Math",
+    # Output format
+    'contextual_formatted_text': "Contexual",
+    'structured_output': "Structured",
+    'exact_text': "Exact",
+    'numerical_data': "Numerical",
+    'open_ended_output': "Open-ended",
+    'multiple_choice': "MC",
+    "6-8 images": "6-8 imgs",
+    "1-image": "1 img",
+    "2-3 images": "2-3 imgs",
+    "4-5 images": "4-5 imgs",
+    "9-image or more": "9+ imgs",
+    "video": "Video",
+}
+MODEL_URLS = {
+    "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
+    "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
+    "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet",
+    "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
+    "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
+    "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
+    "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
+    "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
+    "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
+    "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
+    "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
+    "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
+    "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
+    "Aria": "https://huggingface.co/rhymes-ai/Aria",
+    "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
+    "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
+    "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
+    "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
+    "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
+    "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
+    "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
+    "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
+    "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
+    "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
+    "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
+    "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
+    "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
+    "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
+    "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
+    "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
+    "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
+}
+# Define the base MODEL_GROUPS structure
+BASE_MODEL_GROUPS = {
+    "All": list(MODEL_NAME_MAP.keys()),
+    "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
+    "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
+    "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
+    "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
+    "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
+    "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
+}

utils.py CHANGED Viewed

@@ -2,139 +2,15 @@ import pandas as pd
 import json
 from typing import Dict, Any, Tuple
 import os
-# Keep all the constant mappings outside the class
-MODEL_NAME_MAP = {
-    "Claude_3.5_new": "Claude-3.5-Sonnet (1022)",
-    "GPT_4o": "GPT-4o (0513)",
-    "Claude_3.5": "Claude-3.5-Sonnet (0620)",
-    "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002",
-    "InternVL2_76B": "InternVL2-Llama3-76B",
-    "Qwen2_VL_72B": "Qwen2-VL-72B",
-    "llava_onevision_72B": "Llava-OneVision-72B",
-    "NVLM": "NVLM-D-72B",
-    "GPT_4o_mini": "GPT-4o mini",
-    "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002",
-    "Pixtral_12B": "Pixtral 12B",
-    "Aria": "Aria-MoE-25B",
-    "Qwen2_VL_7B": "Qwen2-VL-7B",
-    "InternVL2_8B": "InternVL2-8B",
-    "llava_onevision_7B": "Llava-OneVision-7B",
-    "Llama_3_2_11B": "Llama-3.2-11B",
-    "Phi-3.5-vision": "Phi-3.5-Vision",
-    "MiniCPM_v2.6": "MiniCPM-V2.6",
-    "Idefics3": "Idefics3-8B-Llama3",
-    "Aquila_VL_2B": "Aquila-VL-2B-llava-qwen",
-    "POINTS_7B": "POINTS-Qwen2.5-7B",
-    "Qwen2_VL_2B": "Qwen2-VL-2B",
-    "InternVL2_2B": "InternVL2-2B",
-    "Molmo_7B_D": "Molmo-7B-D-0924",
-    "Molmo_72B": "Molmo-72B-0924",
-    "Mammoth_VL": "Mammoth-VL-8B",
-    "SmolVLM": "SmolVLM-1.7B",
-    "POINTS_15_7B": "POINTS-1.5-8B",
-    "InternVL2_5_78B": "InternVL2.5-78B",
-    "InternVL2_5_2B": "InternVL2.5-2B",
-}
-DIMENSION_NAME_MAP = {
-    "skills": "Skills",
-    "input_format": "Input Format",
-    "output_format": "Output Format",
-    "input_num": "Visual Input Number",
-    "app": "Application"
-}
-KEYWORD_NAME_MAP = {
-    # Skills
-    "Object Recognition and Classification": "Object Recognition",
-    "Text Recognition (OCR)": "OCR",
-    "Language Understanding and Generation": "Language",
-    "Scene and Event Understanding": "Scene/Event",
-    "Mathematical and Logical Reasoning": "Math/Logic",
-    "Commonsense and Social Reasoning": "Commonsense",
-    "Ethical and Safety Reasoning": "Ethics/Safety",
-    "Domain-Specific Knowledge and Skills": "Domain-Specific",
-    "Spatial and Temporal Reasoning": "Spatial/Temporal",
-    "Planning and Decision Making": "Planning/Decision",
-    # Input Format
-    'User Interface Screenshots': "UI related",
-    'Text-Based Images and Documents': "Documents",
-    'Diagrams and Data Visualizations': "Infographics",
-    'Videos': "Videos",
-    'Artistic and Creative Content': "Arts/Creative",
-    'Photographs': "Photographs",
-    '3D Models and Aerial Imagery': "3D related",
-    # Application
-    'Information_Extraction': "Info Extraction",
-    'Planning' : "Planning",
-    'Coding': "Coding",
-    'Perception': "Perception",
-    'Metrics': "Metrics",
-    'Science': "Science",
-    'Knowledge': "Knowledge",
-    'Mathematics': "Math",
-    # Output format
-    'contextual_formatted_text': "Contexual",
-    'structured_output': "Structured",
-    'exact_text': "Exact",
-    'numerical_data': "Numerical",
-    'open_ended_output': "Open-ended",
-    'multiple_choice': "MC",
-    "6-8 images": "6-8 imgs",
-    "1-image": "1 img",
-    "2-3 images": "2-3 imgs",
-    "4-5 images": "4-5 imgs",
-    "9-image or more": "9+ imgs",
-    "video": "Video",
-}
-MODEL_URLS = {
-    "Claude_3.5_new": "https://www.anthropic.com/news/3-5-models-and-computer-use",
-    "GPT_4o": "https://platform.openai.com/docs/models/gpt-4o",
-    "Claude_3.5": "https://www.anthropic.com/news/claude-3-5-sonnet",
-    "Gemini_1.5_pro_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
-    "Gemini_1.5_flash_002": "https://ai.google.dev/gemini-api/docs/models/gemini",
-    "GPT_4o_mini": "https://platform.openai.com/docs/models#gpt-4o-mini",
-    "Qwen2_VL_72B": "https://huggingface.co/Qwen/Qwen2-VL-72B-Instruct",
-    "InternVL2_76B": "https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B",
-    "llava_onevision_72B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-72b-ov-chat",
-    "NVLM": "https://huggingface.co/nvidia/NVLM-D-72B",
-    "Molmo_72B": "https://huggingface.co/allenai/Molmo-72B-0924",
-    "Qwen2_VL_7B": "https://huggingface.co/Qwen/Qwen2-VL-7B-Instruct",
-    "Pixtral_12B": "https://huggingface.co/mistralai/Pixtral-12B-2409",
-    "Aria": "https://huggingface.co/rhymes-ai/Aria",
-    "InternVL2_8B": "https://huggingface.co/OpenGVLab/InternVL2-8B",
-    "Phi-3.5-vision": "https://huggingface.co/microsoft/Phi-3.5-vision-instruct",
-    "MiniCPM_v2.6": "https://huggingface.co/openbmb/MiniCPM-V-2_6",
-    "llava_onevision_7B": "https://huggingface.co/lmms-lab/llava-onevision-qwen2-7b-ov",
-    "Llama_3_2_11B": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision",
-    "Idefics3": "https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3",
-    "Molmo_7B_D": "https://huggingface.co/allenai/Molmo-7B-D-0924",
-    "Aquila_VL_2B": "https://huggingface.co/BAAI/Aquila-VL-2B-llava-qwen",
-    "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
-    "Qwen2_VL_2B": "https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct",
-    "InternVL2_2B": "https://huggingface.co/OpenGVLab/InternVL2-2B",
-    "POINTS_7B": "https://huggingface.co/WePOINTS/POINTS-Qwen-2-5-7B-Chat",
-    "POINTS_15_7B": "https://huggingface.co/WePOINTS/POINTS-1-5-Qwen-2-5-7B-Chat",
-    "SmolVLM": "https://huggingface.co/HuggingFaceTB/SmolVLM-Instruct",
-    "Mammoth_VL": "https://huggingface.co/MAmmoTH-VL/MAmmoTH-VL-8B",
-    "InternVL2_5_78B": "https://huggingface.co/OpenGVLab/InternVL2_5-78B",
-    "InternVL2_5_2B": "https://huggingface.co/OpenGVLab/InternVL2_5-2B",
-}
 class BaseDataLoader:
-    # Define the base MODEL_GROUPS structure
-    BASE_MODEL_GROUPS = {
-        "All": list(MODEL_NAME_MAP.keys()),
-        "Flagship Models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', 'Molmo_72B', 'InternVL2_5_78B'],
-        "Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"],
-        "Proprietary Flagship models": ['Claude_3.5_new', 'GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'],
-        "Proprietary Efficiency Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini'],
-        "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B', 'NVLM', "Molmo_72B", "InternVL2_5_78B"],
-        "Open-source Efficiency Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'Aria', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3', 'Molmo_7B_D', "Aquila_VL_2B", "POINTS_7B", "Qwen2_VL_2B", "InternVL2_2B", "InternVL2_5_2B"]
-    }
     def __init__(self):
         self.MODEL_DATA = self._load_model_data()
         self.SUMMARY_DATA = self._load_summary_data()
@@ -174,17 +50,15 @@ class BaseDataLoader:
         return {k: groups[k] for k in order if k in groups}
     def _initialize_model_groups(self) -> Dict[str, list]:
-        # Get the list of available models from the loaded data
         available_models = set(self.MODEL_DATA.keys())
-        # Create filtered groups based on available models
         filtered_groups = {}
-        for group_name, models in self.BASE_MODEL_GROUPS.items():
             if group_name == "All":
                 filtered_groups[group_name] = sorted(list(available_models))
             else:
                 filtered_models = [model for model in models if model in available_models]
-                if filtered_models:  # Only include group if it has models
                     filtered_groups[group_name] = filtered_models
         return filtered_groups

 import json
 from typing import Dict, Any, Tuple
 import os
+from constants import (
+    MODEL_NAME_MAP,
+    DIMENSION_NAME_MAP,
+    KEYWORD_NAME_MAP,
+    MODEL_URLS,
+    BASE_MODEL_GROUPS
+)
 class BaseDataLoader:
     def __init__(self):
         self.MODEL_DATA = self._load_model_data()
         self.SUMMARY_DATA = self._load_summary_data()
         return {k: groups[k] for k in order if k in groups}
     def _initialize_model_groups(self) -> Dict[str, list]:
         available_models = set(self.MODEL_DATA.keys())
         filtered_groups = {}
+        for group_name, models in BASE_MODEL_GROUPS.items():
             if group_name == "All":
                 filtered_groups[group_name] = sorted(list(available_models))
             else:
                 filtered_models = [model for model in models if model in available_models]
+                if filtered_models:
                     filtered_groups[group_name] = filtered_models
         return filtered_groups