Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
Β·
e872e8a
1
Parent(s):
65fc294
Updated main to include title in the graph function parameters
Browse files
app.py
CHANGED
|
@@ -105,7 +105,6 @@ else:
|
|
| 105 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
| 106 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
| 107 |
plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
|
| 108 |
-
|
| 109 |
to_be_dumped = f"models = {repr(models)}\n"
|
| 110 |
|
| 111 |
# with open("models_backlinks.py", "w") as f:
|
|
@@ -476,16 +475,21 @@ with demo:
|
|
| 476 |
with gr.TabItem("π Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
|
| 477 |
with gr.Row():
|
| 478 |
with gr.Column():
|
| 479 |
-
chart = create_metric_plot_obj(
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
| 481 |
)
|
| 482 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
| 483 |
with gr.Column():
|
| 484 |
chart = create_metric_plot_obj(
|
| 485 |
-
plot_df,
|
| 486 |
-
|
|
|
|
|
|
|
|
|
|
| 487 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
| 488 |
-
|
| 489 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
| 490 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 491 |
|
|
@@ -608,4 +612,4 @@ with demo:
|
|
| 608 |
scheduler = BackgroundScheduler()
|
| 609 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 610 |
scheduler.start()
|
| 611 |
-
demo.queue(concurrency_count=40).launch()
|
|
|
|
| 105 |
original_df = get_leaderboard_df(eval_results, eval_results_private, COLS, BENCHMARK_COLS)
|
| 106 |
models = original_df["model_name_for_query"].tolist() # needed for model backlinks in their to the leaderboard
|
| 107 |
plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
|
|
|
|
| 108 |
to_be_dumped = f"models = {repr(models)}\n"
|
| 109 |
|
| 110 |
# with open("models_backlinks.py", "w") as f:
|
|
|
|
| 475 |
with gr.TabItem("π Benchmark Graphs", elem_id="llm-benchmark-tab-table", id=4):
|
| 476 |
with gr.Row():
|
| 477 |
with gr.Column():
|
| 478 |
+
chart = create_metric_plot_obj(
|
| 479 |
+
plot_df,
|
| 480 |
+
["Average β¬οΈ"],
|
| 481 |
+
HUMAN_BASELINES,
|
| 482 |
+
title="Average of Top Scores and Human Baseline Over Time",
|
| 483 |
)
|
| 484 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
| 485 |
with gr.Column():
|
| 486 |
chart = create_metric_plot_obj(
|
| 487 |
+
plot_df,
|
| 488 |
+
["ARC", "HellaSwag", "MMLU", "TruthfulQA"],
|
| 489 |
+
HUMAN_BASELINES,
|
| 490 |
+
title="Top Scores and Human Baseline Over Time",
|
| 491 |
+
)
|
| 492 |
gr.Plot(value=chart, interactive=False, width=500, height=500)
|
|
|
|
| 493 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2):
|
| 494 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 495 |
|
|
|
|
| 612 |
scheduler = BackgroundScheduler()
|
| 613 |
scheduler.add_job(restart_space, "interval", seconds=1800)
|
| 614 |
scheduler.start()
|
| 615 |
+
demo.queue(concurrency_count=40).launch()
|