Spaces:
Runtime error
Runtime error
ycy
commited on
Commit
·
d628d76
1
Parent(s):
346dc5e
test
Browse files- README.md +4 -1
- app.py +1 -0
- src/about.py +1 -0
- src/display/utils.py +1 -0
- src/populate.py +1 -0
README.md
CHANGED
|
@@ -41,4 +41,7 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
|
|
| 41 |
You'll find
|
| 42 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 43 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 44 |
-
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
You'll find
|
| 42 |
- the main table' columns names and properties in `src/display/utils.py`
|
| 43 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
| 44 |
+
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
src.display.util 负责按钮选项(打勾区域)
|
app.py
CHANGED
|
@@ -96,6 +96,7 @@ with demo:
|
|
| 96 |
|
| 97 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 98 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
|
|
|
| 99 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 100 |
|
| 101 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
|
|
|
| 96 |
|
| 97 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 98 |
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
| 99 |
+
#TODO
|
| 100 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 101 |
|
| 102 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
src/about.py
CHANGED
|
@@ -10,6 +10,7 @@ class Task:
|
|
| 10 |
|
| 11 |
# Select your tasks here
|
| 12 |
# ---------------------------------------------------
|
|
|
|
| 13 |
class Tasks(Enum):
|
| 14 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 15 |
task0 = Task("anli_r1", "acc", "ANLI")
|
|
|
|
| 10 |
|
| 11 |
# Select your tasks here
|
| 12 |
# ---------------------------------------------------
|
| 13 |
+
#TODO 指标
|
| 14 |
class Tasks(Enum):
|
| 15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
| 16 |
task0 = Task("anli_r1", "acc", "ANLI")
|
src/display/utils.py
CHANGED
|
@@ -22,6 +22,7 @@ class ColumnContent:
|
|
| 22 |
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
|
|
|
| 25 |
# Init
|
| 26 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 27 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
|
|
|
| 22 |
|
| 23 |
## Leaderboard columns
|
| 24 |
auto_eval_column_dict = []
|
| 25 |
+
#TODO
|
| 26 |
# Init
|
| 27 |
auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
|
| 28 |
auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
|
src/populate.py
CHANGED
|
@@ -14,6 +14,7 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
|
| 17 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 18 |
df = df[cols].round(decimals=2)
|
| 19 |
|
|
|
|
| 14 |
all_data_json = [v.to_dict() for v in raw_data]
|
| 15 |
|
| 16 |
df = pd.DataFrame.from_records(all_data_json)
|
| 17 |
+
|
| 18 |
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
| 19 |
df = df[cols].round(decimals=2)
|
| 20 |
|