|  | import gradio as gr | 
					
						
						|  | from utils import MEGABenchEvalDataLoader | 
					
						
						|  | import os | 
					
						
						|  | from constants import * | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | current_dir = os.path.dirname(os.path.abspath(__file__)) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | base_css_file = os.path.join(current_dir, "static", "css", "style.css") | 
					
						
						|  | table_css_file = os.path.join(current_dir, "static", "css", "table.css") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with open(base_css_file, "r") as f: | 
					
						
						|  | base_css = f.read() | 
					
						
						|  | with open(table_css_file, "r") as f: | 
					
						
						|  | table_css = f.read() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default") | 
					
						
						|  | si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI") | 
					
						
						|  |  | 
					
						
						|  | with gr.Blocks() as block: | 
					
						
						|  |  | 
					
						
						|  | css_style = gr.HTML( | 
					
						
						|  | f"<style>{base_css}\n{table_css}</style>", | 
					
						
						|  | visible=False | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with gr.Tabs(elem_classes="tab-buttons") as tabs: | 
					
						
						|  | with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=0): | 
					
						
						|  |  | 
					
						
						|  | default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors." | 
					
						
						|  |  | 
					
						
						|  | single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added." | 
					
						
						|  |  | 
					
						
						|  | caption_component = gr.Markdown( | 
					
						
						|  | value=default_caption, | 
					
						
						|  | elem_classes="table-caption", | 
					
						
						|  | latex_delimiters=[{"left": "$", "right": "$", "display": False}], | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | super_group_selector = gr.Radio( | 
					
						
						|  | choices=list(default_loader.SUPER_GROUPS.keys()), | 
					
						
						|  | label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.", | 
					
						
						|  | value=list(default_loader.SUPER_GROUPS.keys())[0] | 
					
						
						|  | ) | 
					
						
						|  | model_group_selector = gr.Radio( | 
					
						
						|  | choices=list(BASE_MODEL_GROUPS.keys()), | 
					
						
						|  | label="Select a model group", | 
					
						
						|  | value="All" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All") | 
					
						
						|  | data_component = gr.Dataframe( | 
					
						
						|  | value=initial_data, | 
					
						
						|  | headers=initial_headers, | 
					
						
						|  | datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2), | 
					
						
						|  | interactive=True, | 
					
						
						|  | elem_classes="custom-dataframe", | 
					
						
						|  | max_height=2400, | 
					
						
						|  | column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5), | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def update_table_and_caption(table_type, super_group, model_group): | 
					
						
						|  | if table_type == "Default": | 
					
						
						|  | headers, data = default_loader.get_leaderboard_data(super_group, model_group) | 
					
						
						|  | caption = default_caption | 
					
						
						|  | else: | 
					
						
						|  | headers, data = si_loader.get_leaderboard_data(super_group, model_group) | 
					
						
						|  | caption = single_image_caption | 
					
						
						|  |  | 
					
						
						|  | return [ | 
					
						
						|  | gr.Dataframe( | 
					
						
						|  | value=data, | 
					
						
						|  | headers=headers, | 
					
						
						|  | datatype=["number", "html"] + ["number"] * (len(headers) - 2), | 
					
						
						|  | interactive=True, | 
					
						
						|  | column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5), | 
					
						
						|  | ), | 
					
						
						|  | caption, | 
					
						
						|  | f"<style>{base_css}\n{table_css}</style>" | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | with gr.Accordion("Citation", open=False): | 
					
						
						|  | citation_button = gr.Textbox( | 
					
						
						|  | value=CITATION_BUTTON_TEXT, | 
					
						
						|  | label=CITATION_BUTTON_LABEL, | 
					
						
						|  | elem_id="citation-button", | 
					
						
						|  | lines=10, | 
					
						
						|  | ) | 
					
						
						|  | gr.Markdown( | 
					
						
						|  | TABLE_INTRODUCTION | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | table_selector = gr.Radio( | 
					
						
						|  | choices=["Default", "Single Image"], | 
					
						
						|  | label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.", | 
					
						
						|  | value="Default" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def update_selectors(table_type): | 
					
						
						|  | loader = default_loader if table_type == "Default" else si_loader | 
					
						
						|  | return [ | 
					
						
						|  | gr.Radio(choices=list(loader.SUPER_GROUPS.keys())), | 
					
						
						|  | gr.Radio(choices=list(loader.MODEL_GROUPS.keys())) | 
					
						
						|  | ] | 
					
						
						|  |  | 
					
						
						|  | refresh_button = gr.Button("Refresh") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | refresh_button.click( | 
					
						
						|  | fn=update_table_and_caption, | 
					
						
						|  | inputs=[table_selector, super_group_selector, model_group_selector], | 
					
						
						|  | outputs=[data_component, caption_component, css_style] | 
					
						
						|  | ) | 
					
						
						|  | super_group_selector.change( | 
					
						
						|  | fn=update_table_and_caption, | 
					
						
						|  | inputs=[table_selector, super_group_selector, model_group_selector], | 
					
						
						|  | outputs=[data_component, caption_component, css_style] | 
					
						
						|  | ) | 
					
						
						|  | model_group_selector.change( | 
					
						
						|  | fn=update_table_and_caption, | 
					
						
						|  | inputs=[table_selector, super_group_selector, model_group_selector], | 
					
						
						|  | outputs=[data_component, caption_component, css_style] | 
					
						
						|  | ) | 
					
						
						|  | table_selector.change( | 
					
						
						|  | fn=update_selectors, | 
					
						
						|  | inputs=[table_selector], | 
					
						
						|  | outputs=[super_group_selector, model_group_selector] | 
					
						
						|  | ).then( | 
					
						
						|  | fn=update_table_and_caption, | 
					
						
						|  | inputs=[table_selector, super_group_selector, model_group_selector], | 
					
						
						|  | outputs=[data_component, caption_component, css_style] | 
					
						
						|  | ) | 
					
						
						|  | with gr.TabItem("📚 Introduction", elem_id="intro-tab", id=1): | 
					
						
						|  | gr.Markdown( | 
					
						
						|  | LEADERBOARD_INTRODUCTION | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2): | 
					
						
						|  | gr.Markdown(DATA_INFO, elem_classes="markdown-text") | 
					
						
						|  |  | 
					
						
						|  | with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3): | 
					
						
						|  | with gr.Row(): | 
					
						
						|  | gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | 
					
						
						|  |  | 
					
						
						|  | if __name__ == "__main__": | 
					
						
						|  | block.launch(share=True, show_api=False) |