CapArena_Auto

Runtime error

App Files Files Community

ycy commited on Mar 1

Commit

d55f05b

1 Parent(s): c90946a

test

Browse files

Files changed (5) hide show

app.py +2 -1
src/envs.py +2 -2
src/leaderboard/read_evals.py +4 -2
src/populate.py +39 -0
src/submission/submit.py +3 -0

app.py CHANGED Viewed

@@ -33,6 +33,7 @@ def restart_space():
     API.restart_space(repo_id=REPO_ID)
 ### Space initialisation
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
@@ -96,7 +97,7 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            #TODO
             leaderboard = init_leaderboard(LEADERBOARD_DF)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):

     API.restart_space(repo_id=REPO_ID)
 ### Space initialisation
+# load the evaluation requests and results locally
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard = init_leaderboard(LEADERBOARD_DF)
         with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):

src/envs.py CHANGED Viewed

@@ -10,8 +10,8 @@ OWNER = "yan111222" # Change to your org - don't forget to create a results and
 # ----------------------------------
 REPO_ID = f"{OWNER}/CapArena_Auto_1"
-QUEUE_REPO = "demo-leaderboard-backend/requests"
-RESULTS_REPO = "demo-leaderboard-backend/results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

 # ----------------------------------
 REPO_ID = f"{OWNER}/CapArena_Auto_1"
+QUEUE_REPO = f"{OWNER}/requests"
+RESULTS_REPO = f"{OWNER}/results"
 # If you setup a cache later, just change HF_HOME
 CACHE_PATH=os.getenv("HF_HOME", ".")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -156,6 +156,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
@@ -163,7 +164,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
             continue
-        # Sort the files by date
         try:
             files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
         except dateutil.parser._parser.ParserError:
@@ -171,7 +172,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result

 def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
+    """Get the path of the results folder and the requests folder, and return a list of EvalResult objects"""
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
         if len(files) == 0 or any([not f.endswith(".json") for f in files]):
             continue
+        # Sort the files by date 这里得到符合要求的数据集
         try:
             files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
         except dateutil.parser._parser.ParserError:
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))
+    # get all of the property eval paths
     eval_results = {}
     for model_result_filepath in model_result_filepaths:
         # Creation of result

src/populate.py CHANGED Viewed

@@ -11,6 +11,45 @@ from src.leaderboard.read_evals import get_raw_eval_results
 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)

 def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
     """Creates a dataframe from all the individual experiment results"""
     raw_data = get_raw_eval_results(results_path, requests_path)
+    # raw_data示例
+    """raw_data = [
+        EvalResult(
+            model_name="org1/model1",
+            model_dtype="float32",
+            model_sha="commit_hash1",
+            results={
+                "task1": {"metric1": 0.85, "metric2": 0.90},
+                "task2": {"metric1": 0.75, "metric2": 0.80}
+            },
+            model_type="Pretrained",
+            weight_type="Original",
+            license="MIT",
+            likes=100,
+            params=123456789,
+            submitted_time="2025-02-28T12:34:56Z",
+            status="FINISHED",
+            precision="float32"
+        ),
+        EvalResult(
+            model_name="org2/model2",
+            model_dtype="float32",
+            model_sha="commit_hash2",
+            results={
+                "task1": {"metric1": 0.88, "metric2": 0.92},
+                "task2": {"metric1": 0.78, "metric2": 0.82}
+            },
+            model_type="Fine-tuned",
+            weight_type="Adapter",
+            license="Apache-2.0",
+            likes=200,
+            params=987654321,
+            submitted_time="2025-02-28T12:34:56Z",
+            status="FINISHED",
+            precision="float32"
+        )
+    ]
+    """
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)

src/submission/submit.py CHANGED Viewed

@@ -14,6 +14,8 @@ from src.submission.check_validity import (
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     model: str,
     base_model: str,
@@ -22,6 +24,7 @@ def add_new_eval(
     weight_type: str,
     model_type: str,
 ):
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
     if not REQUESTED_MODELS:

 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     model: str,
     base_model: str,
     weight_type: str,
     model_type: str,
 ):
+    """通过提交模型到评估队列,将信息自动保存到requests数据集中"""
     global REQUESTED_MODELS
     global USERS_TO_SUBMISSION_DATES
     if not REQUESTED_MODELS: