IL-TUR-Leaderboard

Runtime error

App Files Files Community

abhinav-joshi commited on May 29, 2024

Commit

a92fba7

1 Parent(s): 679c7e6

add tasks

Browse files

Files changed (2) hide show

app.py +27 -19
src/about.py +11 -2

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from src.display.utils import (
     ModelType,
     fields,
     WeightType,
-    Precision
 )
 from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -34,17 +34,28 @@ from src.submission.submit import add_new_eval
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
     snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
     )
 except Exception:
     restart_space()
@@ -86,9 +97,7 @@ def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
         AutoEvalColumn.model.name,
     ]
     # We use COLS to maintain sorting
-    filtered_df = df[
-        always_here_cols + [c for c in COLS if c in df.columns and c in columns]
-    ]
     return filtered_df
@@ -138,7 +147,7 @@ with demo:
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
@@ -149,11 +158,7 @@ with demo:
                         )
                     with gr.Row():
                         shown_columns = gr.CheckboxGroup(
-                            choices=[
-                                c.name
-                                for c in fields(AutoEvalColumn)
-                                if not c.hidden and not c.never_hidden
-                            ],
                             value=[
                                 c.name
                                 for c in fields(AutoEvalColumn)
@@ -168,7 +173,7 @@ with demo:
                             value=False, label="Show gated/private/deleted models", interactive=True
                         )
                 with gr.Column(min_width=320):
-                    #with gr.Box(elem_id="box-filter"):
                     filter_columns_type = gr.CheckboxGroup(
                         label="Model types",
                         choices=[t.to_str() for t in ModelType],
@@ -192,10 +197,7 @@ with demo:
                     )
             leaderboard_table = gr.components.Dataframe(
-                value=leaderboard_df[
-                    [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
-                    + shown_columns.value
-                ],
                 headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
                 datatype=TYPES,
                 elem_id="leaderboard-table",
@@ -223,7 +225,13 @@ with demo:
                 ],
                 leaderboard_table,
             )
-            for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility]:
                 selector.change(
                     update_table,
                     [
@@ -342,4 +350,4 @@ with demo:
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

     ModelType,
     fields,
     WeightType,
+    Precision,
 )
 from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
 from src.populate import get_evaluation_queue_df, get_leaderboard_df
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
+        repo_id=QUEUE_REPO,
+        local_dir=EVAL_REQUESTS_PATH,
+        repo_type="dataset",
+        tqdm_class=None,
+        etag_timeout=30,
+        token=TOKEN,
     )
 except Exception:
     restart_space()
 try:
     print(EVAL_RESULTS_PATH)
     snapshot_download(
+        repo_id=RESULTS_REPO,
+        local_dir=EVAL_RESULTS_PATH,
+        repo_type="dataset",
+        tqdm_class=None,
+        etag_timeout=30,
+        token=TOKEN,
     )
 except Exception:
     restart_space()
         AutoEvalColumn.model.name,
     ]
     # We use COLS to maintain sorting
+    filtered_df = df[always_here_cols + [c for c in COLS if c in df.columns and c in columns]]
     return filtered_df
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 IL-TUR Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
                         )
                     with gr.Row():
                         shown_columns = gr.CheckboxGroup(
+                            choices=[c.name for c in fields(AutoEvalColumn) if not c.hidden and not c.never_hidden],
                             value=[
                                 c.name
                                 for c in fields(AutoEvalColumn)
                             value=False, label="Show gated/private/deleted models", interactive=True
                         )
                 with gr.Column(min_width=320):
+                    # with gr.Box(elem_id="box-filter"):
                     filter_columns_type = gr.CheckboxGroup(
                         label="Model types",
                         choices=[t.to_str() for t in ModelType],
                     )
             leaderboard_table = gr.components.Dataframe(
+                value=leaderboard_df[[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value],
                 headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
                 datatype=TYPES,
                 elem_id="leaderboard-table",
                 ],
                 leaderboard_table,
             )
+            for selector in [
+                shown_columns,
+                filter_columns_type,
+                filter_columns_precision,
+                filter_columns_size,
+                deleted_models_visibility,
+            ]:
                 selector.change(
                     update_table,
                     [
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
+demo.queue(default_concurrency_limit=40).launch()

src/about.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from dataclasses import dataclass
 from enum import Enum
 @dataclass
 class Task:
     benchmark: str
@@ -11,14 +12,22 @@ class Task:
 # Select your tasks here
 # ---------------------------------------------------
 class Tasks(Enum):
-    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
     task0 = Task("anli_r1", "acc", "Legal Named Entity Recognition (L-NER)")
     task1 = Task("logiqa", "acc_norm", "Rhetorical Role Prediction (RR)")
-NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------
 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">IL-TUR leaderboard</h1>"""

 from dataclasses import dataclass
 from enum import Enum
 @dataclass
 class Task:
     benchmark: str
 # Select your tasks here
 # ---------------------------------------------------
 class Tasks(Enum):
+    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
     task0 = Task("anli_r1", "acc", "Legal Named Entity Recognition (L-NER)")
     task1 = Task("logiqa", "acc_norm", "Rhetorical Role Prediction (RR)")
+    task2 = Task("logiqa", "acc_norm", "Court Judgment Prediction and Explanation (CJPE)")
+    task3 = Task("logiqa", "acc_norm", "Bail Prediction (BAIL)")
+    task4 = Task("logiqa", "acc_norm", "Legal Statute Identification (LSI)")
+    task5 = Task("logiqa", "acc_norm", "Prior Case Retrieval (PCR)")
+    task6 = Task("logiqa", "acc_norm", "Summarization (SUMM)")
 # ---------------------------------------------------
+NUM_FEWSHOT = 0  # Change with your few shot
+# ---------------------------------------------------
 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">IL-TUR leaderboard</h1>"""