Maharshi Gor
commited on
Commit
·
3283369
1
Parent(s):
5a11597
Add explicit column names. Update requirements.
Browse files- requirements.txt +1 -1
- src/populate.py +18 -2
requirements.txt
CHANGED
|
@@ -9,7 +9,7 @@ gradio_client
|
|
| 9 |
huggingface-hub>=0.18.0
|
| 10 |
matplotlib
|
| 11 |
numpy<2.0.0
|
| 12 |
-
pandas
|
| 13 |
python-dateutil
|
| 14 |
tqdm
|
| 15 |
transformers
|
|
|
|
| 9 |
huggingface-hub>=0.18.0
|
| 10 |
matplotlib
|
| 11 |
numpy<2.0.0
|
| 12 |
+
pandas>=2.0.0
|
| 13 |
python-dateutil
|
| 14 |
tqdm
|
| 15 |
transformers
|
src/populate.py
CHANGED
|
@@ -47,12 +47,25 @@ def get_tossups_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
| 47 |
if "human_win_rate" in metrics:
|
| 48 |
row["Win Rate w/ Humans"] = metrics["human_win_rate"]
|
| 49 |
row["Win Rate w/ Humans (Aggressive)"] = metrics["human_win_rate_strict"]
|
|
|
|
|
|
|
|
|
|
| 50 |
eval_results.append(row)
|
| 51 |
except Exception as e:
|
| 52 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
| 53 |
continue
|
| 54 |
|
| 55 |
-
return pd.DataFrame(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
|
| 58 |
def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
@@ -75,7 +88,10 @@ def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
| 75 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
| 76 |
continue
|
| 77 |
|
| 78 |
-
return pd.DataFrame(
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
|
| 81 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
|
|
| 47 |
if "human_win_rate" in metrics:
|
| 48 |
row["Win Rate w/ Humans"] = metrics["human_win_rate"]
|
| 49 |
row["Win Rate w/ Humans (Aggressive)"] = metrics["human_win_rate_strict"]
|
| 50 |
+
else:
|
| 51 |
+
row["Win Rate w/ Humans"] = None
|
| 52 |
+
row["Win Rate w/ Humans (Aggressive)"] = None
|
| 53 |
eval_results.append(row)
|
| 54 |
except Exception as e:
|
| 55 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
| 56 |
continue
|
| 57 |
|
| 58 |
+
return pd.DataFrame(
|
| 59 |
+
eval_results,
|
| 60 |
+
columns=[
|
| 61 |
+
"Submission",
|
| 62 |
+
"Avg Score ⬆️",
|
| 63 |
+
"Buzz Accuracy",
|
| 64 |
+
"Buzz Position",
|
| 65 |
+
"Win Rate w/ Humans",
|
| 66 |
+
"Win Rate w/ Humans (Aggressive)",
|
| 67 |
+
],
|
| 68 |
+
)
|
| 69 |
|
| 70 |
|
| 71 |
def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
|
|
|
|
| 88 |
logger.error(f"Error processing model result '{username}/{model_name}': {e}")
|
| 89 |
continue
|
| 90 |
|
| 91 |
+
return pd.DataFrame(
|
| 92 |
+
eval_results,
|
| 93 |
+
columns=["Submission", "Question Accuracy", "Part Accuracy"],
|
| 94 |
+
)
|
| 95 |
|
| 96 |
|
| 97 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|