Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1147

chriscanal commited on Sep 26, 2023

Commit

e872e8a

1 Parent(s): 65fc294

Updated main to include title in the graph function parameters

Browse files

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -105,7 +105,6 @@ else:
 original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
 models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
 plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
 to_be_dumped = f"models = {repr(models)}\n"
 # with open("models_backlinks.py", "w") as f:
@@ -476,16 +475,21 @@ with demo:
         with gr.TabItem("📈 Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
             with gr.Row():
                 with gr.Column():
-                    chart = create_metric_plot_obj(plot_df, ["Average ⬆️"], HUMAN_BASELINES).properties(
-                        title="Average of Top Scores and Human Baseline Over Time"
                     )
                     gr.Plot(value=chart, interactive=False, width=500, height=500)
                 with gr.Column():
                     chart = create_metric_plot_obj(
-                        plot_df, ["ARC", "HellaSwag", "MMLU", "TruthfulQA"], HUMAN_BASELINES
-                    ).properties(title="Top Scores and Human Baseline Over Time")
                     gr.Plot(value=chart, interactive=False, width=500, height=500)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
@@ -608,4 +612,4 @@ with demo:
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
-demo.queue(concurrency_count=40).launch()

 original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
 models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
 plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
 to_be_dumped = f"models = {repr(models)}\n"
 # with open("models_backlinks.py", "w") as f:
         with gr.TabItem("📈 Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
             with gr.Row():
                 with gr.Column():
+                    chart = create_metric_plot_obj(
+                        plot_df,
+                        ["Average ⬆️"],
+                        HUMAN_BASELINES,
+                        title="Average of Top Scores and Human Baseline Over Time",
                     )
                     gr.Plot(value=chart, interactive=False, width=500, height=500)
                 with gr.Column():
                     chart = create_metric_plot_obj(
+                        plot_df,
+                        ["ARC", "HellaSwag", "MMLU", "TruthfulQA"],
+                        HUMAN_BASELINES,
+                        title="Top Scores and Human Baseline Over Time",
+                    )
                     gr.Plot(value=chart, interactive=False, width=500, height=500)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
+demo.queue(concurrency_count=40).launch()