Spaces:
Running
Running
Commit
·
49574e5
1
Parent(s):
d90c0f8
update leaderboard
Browse files- serve/leaderboard.py +46 -38
serve/leaderboard.py
CHANGED
|
@@ -95,7 +95,7 @@ def load_leaderboard_table_csv(filename, add_hyperlink=True):
|
|
| 95 |
df = df.drop(df[df["Key"].isnull()].index)
|
| 96 |
for col in df.columns:
|
| 97 |
if "Elo rating" in col:
|
| 98 |
-
print(col, df[col], type(df[col]), df[col] is not np.NaN)
|
| 99 |
df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
|
| 100 |
|
| 101 |
if add_hyperlink and col == "Model":
|
|
@@ -174,10 +174,10 @@ def get_arena_table(arena_dfs, model_table_df):
|
|
| 174 |
# elo rating
|
| 175 |
num_battles = 0
|
| 176 |
for dim in arena_dfs.keys():
|
| 177 |
-
try:
|
| 178 |
-
|
| 179 |
-
except:
|
| 180 |
-
|
| 181 |
row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
|
| 182 |
upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
|
| 183 |
lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
|
|
@@ -215,25 +215,26 @@ def make_arena_leaderboard_md(elo_results):
|
|
| 215 |
total_models = len(arena_df)
|
| 216 |
|
| 217 |
leaderboard_md = f"""
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
(Note: Only anonymous votes are considered here.)
|
| 222 |
-
|
| 223 |
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
| 224 |
-
|
| 225 |
"""
|
| 226 |
return leaderboard_md
|
| 227 |
|
| 228 |
def make_full_leaderboard_md(elo_results):
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
|
|
|
| 233 |
|
| 234 |
leaderboard_md = f"""
|
| 235 |
-
Total #models: **{total_models}
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
"""
|
| 238 |
return leaderboard_md
|
| 239 |
|
|
@@ -251,7 +252,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 251 |
else:
|
| 252 |
with open(elo_results_file, "rb") as fin:
|
| 253 |
elo_results = pickle.load(fin)
|
| 254 |
-
|
| 255 |
# print(elo_results)
|
| 256 |
# print(elo_results.keys())
|
| 257 |
anony_elo_results, full_elo_results = {}, {}
|
|
@@ -266,6 +267,8 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 266 |
p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
|
| 267 |
p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
|
| 268 |
p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
|
|
|
|
|
|
|
| 269 |
|
| 270 |
md = make_leaderboard_md(anony_elo_results)
|
| 271 |
|
|
@@ -273,10 +276,11 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 273 |
|
| 274 |
if leaderboard_table_file:
|
| 275 |
model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
|
|
|
|
| 276 |
with gr.Tabs() as tabs:
|
| 277 |
# arena table
|
| 278 |
arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
|
| 279 |
-
with gr.Tab("Arena
|
| 280 |
md = make_arena_leaderboard_md(anony_elo_results)
|
| 281 |
gr.Markdown(md, elem_id="leaderboard_markdown")
|
| 282 |
gr.Dataframe(
|
|
@@ -308,25 +312,29 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
| 308 |
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
| 309 |
wrap=True,
|
| 310 |
)
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
if not show_plot:
|
| 331 |
gr.Markdown(
|
| 332 |
""" ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
|
|
|
|
| 95 |
df = df.drop(df[df["Key"].isnull()].index)
|
| 96 |
for col in df.columns:
|
| 97 |
if "Elo rating" in col:
|
| 98 |
+
# print(col, df[col], type(df[col]), df[col] is not np.NaN)
|
| 99 |
df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
|
| 100 |
|
| 101 |
if add_hyperlink and col == "Model":
|
|
|
|
| 174 |
# elo rating
|
| 175 |
num_battles = 0
|
| 176 |
for dim in arena_dfs.keys():
|
| 177 |
+
# try:
|
| 178 |
+
# print(arena_dfs[dim].loc[model_name])
|
| 179 |
+
# except:
|
| 180 |
+
# continue
|
| 181 |
row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
|
| 182 |
upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
|
| 183 |
lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
|
|
|
|
| 215 |
total_models = len(arena_df)
|
| 216 |
|
| 217 |
leaderboard_md = f"""
|
| 218 |
+
Total #models: **{total_models}**. \n
|
| 219 |
+
Total #votes: **{int(total_votes)}** (Anonymous Votes only). \n
|
| 220 |
+
Last updated: {last_updated}. \n
|
|
|
|
|
|
|
| 221 |
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
|
|
|
| 222 |
"""
|
| 223 |
return leaderboard_md
|
| 224 |
|
| 225 |
def make_full_leaderboard_md(elo_results):
|
| 226 |
+
total_votes = 0
|
| 227 |
+
for dim in elo_results.keys():
|
| 228 |
+
arena_df = elo_results[dim]["leaderboard_table_df"]
|
| 229 |
+
last_updated = elo_results[dim]["last_updated_datetime"]
|
| 230 |
+
total_votes += sum(arena_df["num_battles"].fillna(0)) // 2
|
| 231 |
+
total_models = len(arena_df)
|
| 232 |
|
| 233 |
leaderboard_md = f"""
|
| 234 |
+
Total #models: **{total_models}**. \n
|
| 235 |
+
Total #votes: **{int(total_votes)}** (Anonymous + Named Votes). \n
|
| 236 |
+
Last updated: {last_updated}.\n
|
| 237 |
+
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
| 238 |
"""
|
| 239 |
return leaderboard_md
|
| 240 |
|
|
|
|
| 252 |
else:
|
| 253 |
with open(elo_results_file, "rb") as fin:
|
| 254 |
elo_results = pickle.load(fin)
|
| 255 |
+
|
| 256 |
# print(elo_results)
|
| 257 |
# print(elo_results.keys())
|
| 258 |
anony_elo_results, full_elo_results = {}, {}
|
|
|
|
| 267 |
p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
|
| 268 |
p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
|
| 269 |
p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
|
| 270 |
+
print(anony_arena_dfs[dim])
|
| 271 |
+
print(full_arena_dfs[dim])
|
| 272 |
|
| 273 |
md = make_leaderboard_md(anony_elo_results)
|
| 274 |
|
|
|
|
| 276 |
|
| 277 |
if leaderboard_table_file:
|
| 278 |
model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
|
| 279 |
+
model_table_df_full = load_leaderboard_table_csv(str(leaderboard_table_file).replace('.csv', '_full.csv'))
|
| 280 |
with gr.Tabs() as tabs:
|
| 281 |
# arena table
|
| 282 |
arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
|
| 283 |
+
with gr.Tab("Anony. Arena", id=0):
|
| 284 |
md = make_arena_leaderboard_md(anony_elo_results)
|
| 285 |
gr.Markdown(md, elem_id="leaderboard_markdown")
|
| 286 |
gr.Dataframe(
|
|
|
|
| 312 |
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
| 313 |
wrap=True,
|
| 314 |
)
|
| 315 |
+
with gr.Tab("Full Arena", id=1):
|
| 316 |
+
md = make_full_leaderboard_md(full_elo_results)
|
| 317 |
+
gr.Markdown(md, elem_id="leaderboard_markdown")
|
| 318 |
+
full_table_vals = get_arena_table(full_arena_dfs, model_table_df_full)
|
| 319 |
+
gr.Dataframe(
|
| 320 |
+
headers=["Rank", "🤖 Model"] + [f"📈 {dim} Elo" for dim in anony_arena_dfs.keys()] + ["⭐ Avg. Arena Elo Ranking", "📮 Votes"],
|
| 321 |
+
datatype=[
|
| 322 |
+
"str",
|
| 323 |
+
"markdown",
|
| 324 |
+
"number",
|
| 325 |
+
"number",
|
| 326 |
+
"number",
|
| 327 |
+
"number",
|
| 328 |
+
"number",
|
| 329 |
+
"number",
|
| 330 |
+
"number"
|
| 331 |
+
],
|
| 332 |
+
value=full_table_vals,
|
| 333 |
+
elem_id="full_leaderboard_dataframe",
|
| 334 |
+
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
| 335 |
+
height=700,
|
| 336 |
+
wrap=True,
|
| 337 |
+
)
|
| 338 |
if not show_plot:
|
| 339 |
gr.Markdown(
|
| 340 |
""" ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
|