Maria Castellanos commited on
Commit
26bb373
·
1 Parent(s): 764fa75

leaderboard code v2

Browse files
Files changed (2) hide show
  1. cld.py +3 -3
  2. final_lb.py +46 -21
cld.py CHANGED
@@ -4,10 +4,10 @@ import tqdm
4
  import pandas as pd
5
  from itertools import product
6
 
7
- # Make large CLD alphabet with lowercase, uppercase and double letter combinations
8
  single_chars = list(ascii_lowercase) + list(ascii_uppercase)
9
- double_chars = [''.join(p) for p in product(single_chars, repeat=2)]
10
- CLD_ALPHABET = single_chars + double_chars # length is 2756
11
 
12
  def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:
13
  """Assert whether i and j are represented as non-significant in the column
 
4
  import pandas as pd
5
  from itertools import product
6
 
7
+ # Make large CLD alphabet
8
  single_chars = list(ascii_lowercase) + list(ascii_uppercase)
9
+ underscore_chars = [''.join(p) for p in product(['_'], single_chars)]
10
+ CLD_ALPHABET = single_chars + underscore_chars
11
 
12
  def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:
13
  """Assert whether i and j are represented as non-significant in the column
final_lb.py CHANGED
@@ -5,7 +5,7 @@ from utils import (
5
  map_metric_to_stats,
6
  fetch_dataset_df,
7
  )
8
- from about import ENDPOINTS, LB_COLS, LB_AVG, results_repo_test
9
 
10
  from loguru import logger
11
  import pandas as pd
@@ -35,36 +35,59 @@ def build_leaderboard(df_results, df_results_raw, avg_only=True):
35
  df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
36
  df = df.sort_values(by="submission time", ascending=False, kind="stable")
37
  df = df.drop_duplicates(subset=['hf_username'], keep='first')
 
38
  # Sort by MAE-RAE
39
  sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
40
  sorted_df = map_metric_to_stats(sorted_df, average=True)
41
- avg_leaderboard = sorted_df.copy()
42
- avg_cols = LB_AVG
43
- # Add CLD
 
 
 
 
 
 
44
  df_raw = df_results_raw[df_results_raw["Endpoint"] == ep].copy()
45
  df_raw = df_raw.rename(columns={"RAE": "MA-RAE"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  avg_leaderboard = add_cld_to_leaderboard(
47
- sorted_df,
48
- df_raw,
49
  "MA-RAE",
50
  )
51
  avg_cols = ["rank",
52
- "user",
53
- "CLD",
54
- "MA-RAE",
55
- "R2",
56
- "Spearman R",
57
- "Kendall's Tau",
58
- "model details"]
59
-
60
- # Make sure Hugging Face username exists, if not, delete the row
61
- avg_leaderboard['user_real'] = avg_leaderboard['hf_username'].apply(validate_hf_username)
62
- avg_leaderboard_clean = avg_leaderboard[avg_leaderboard['user_real']]
63
-
64
- # Add ranking column
65
- avg_leaderboard_clean['rank'] = np.arange(1, len(avg_leaderboard_clean) + 1)
66
 
67
- per_ep[ep] = avg_leaderboard_clean[avg_cols]
68
 
69
  else:
70
  if avg_only:
@@ -109,6 +132,8 @@ def prepare_lb_csv(save_folder:str, avg_only:bool):
109
  per_ep_df = build_leaderboard(df_latest, df_latest_raw, avg_only)
110
  logger.info("Saving leaderboard")
111
  for ep in ALL_EPS:
 
 
112
  df_lb = per_ep_df[ep]
113
  save_path = Path(save_folder) / f"{ep}_leaderboard.csv"
114
  df_lb.to_csv(save_path, index=False)
 
5
  map_metric_to_stats,
6
  fetch_dataset_df,
7
  )
8
+ from about import ENDPOINTS, LB_COLS, results_repo_test
9
 
10
  from loguru import logger
11
  import pandas as pd
 
35
  df['hf_username'] = df['hf_username'].apply(lambda s: s.lower())
36
  df = df.sort_values(by="submission time", ascending=False, kind="stable")
37
  df = df.drop_duplicates(subset=['hf_username'], keep='first')
38
+
39
  # Sort by MAE-RAE
40
  sorted_df = df.sort_values(by='mean_MA-RAE', ascending=True, kind="stable")
41
  sorted_df = map_metric_to_stats(sorted_df, average=True)
42
+
43
+ # Make sure Hugging Face username exists, if not, delete the row
44
+ sorted_df['user_real'] = sorted_df['hf_username'].apply(validate_hf_username)
45
+ sorted_df_clean = sorted_df[sorted_df['user_real']].reset_index(drop=True)
46
+ # Add ranking column
47
+ sorted_df_clean['rank'] = np.arange(1, len(sorted_df_clean) + 1)
48
+ avg_leaderboard = sorted_df_clean.copy()
49
+
50
+ # Clean raw data as well
51
  df_raw = df_results_raw[df_results_raw["Endpoint"] == ep].copy()
52
  df_raw = df_raw.rename(columns={"RAE": "MA-RAE"})
53
+
54
+ df_raw['hf_username'] = df_raw['hf_username'].apply(lambda s: s.lower())
55
+ df_raw = df_raw.sort_values(by="submission_time", ascending=False, kind="stable")
56
+ df_raw = df_raw.drop_duplicates(subset=['hf_username','Sample'], keep='first')
57
+
58
+ valid_usernames = sorted_df_clean['hf_username'].unique()
59
+ df_raw_clean = df_raw[df_raw['hf_username'].isin(valid_usernames)].reset_index(drop=True)
60
+
61
+ # Make sure order of raw dataframe is the same as sorted dataframe
62
+ username_order = sorted_df['hf_username'].unique()
63
+ df_raw_sorted = df_raw_clean.copy()
64
+ df_raw_sorted['hf_username'] = pd.Categorical(
65
+ df_raw_sorted['hf_username'],
66
+ categories=username_order,
67
+ ordered=True
68
+ )
69
+ df_raw_sorted = df_raw_sorted.sort_values(
70
+ by=['hf_username', 'Sample'],
71
+ ascending=[True, True]
72
+ )
73
+ df_raw_sorted['hf_username'] = df_raw_sorted['hf_username'].astype(str)
74
+ df_raw_sorted = df_raw_sorted.reset_index(drop=True)
75
+
76
  avg_leaderboard = add_cld_to_leaderboard(
77
+ sorted_df_clean,
78
+ df_raw_sorted,
79
  "MA-RAE",
80
  )
81
  avg_cols = ["rank",
82
+ "user",
83
+ "CLD",
84
+ "MA-RAE",
85
+ "R2",
86
+ "Spearman R",
87
+ "Kendall's Tau",
88
+ "model details"]
 
 
 
 
 
 
 
89
 
90
+ per_ep[ep] = avg_leaderboard[avg_cols]
91
 
92
  else:
93
  if avg_only:
 
132
  per_ep_df = build_leaderboard(df_latest, df_latest_raw, avg_only)
133
  logger.info("Saving leaderboard")
134
  for ep in ALL_EPS:
135
+ if ep != "Average" and avg_only:
136
+ continue
137
  df_lb = per_ep_df[ep]
138
  save_path = Path(save_folder) / f"{ep}_leaderboard.csv"
139
  df_lb.to_csv(save_path, index=False)