|
|
from statsmodels.stats.multicomp import pairwise_tukeyhsd |
|
|
from cld import cld |
|
|
from utils import ( |
|
|
check_page_exists, |
|
|
map_metric_to_stats, |
|
|
fetch_dataset_df, |
|
|
) |
|
|
from about import ENDPOINTS, LB_COLS, results_repo_test, METRICS |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
|
|
|
def validate_hf_username(username): |
|
|
username = str(username).strip() |
|
|
hf_url = f"https://huggingface.co/{username}" |
|
|
return check_page_exists(hf_url, delay=1, max_retries=10) |
|
|
|
|
|
|
|
|
def validate_model_details(tag): |
|
|
if tag is None: |
|
|
return "Not submitted" |
|
|
safe_tag = str(tag).strip() |
|
|
if not safe_tag.startswith("https://"): |
|
|
return "Invalid link" |
|
|
is_real_url = check_page_exists(safe_tag, delay=2) |
|
|
if not is_real_url: |
|
|
return "Invalid link" |
|
|
else: |
|
|
return safe_tag |
|
|
|
|
|
|
|
|
|
|
|
def make_intermediate_lb(): |
|
|
|
|
|
df_latest, df_latest_raw = fetch_dataset_df( |
|
|
download_raw=True, |
|
|
test_repo=results_repo_test |
|
|
) |
|
|
|
|
|
|
|
|
df_latest_raw["hf_username"] = df_latest_raw["hf_username"].str.lower() |
|
|
|
|
|
|
|
|
hf_usernames = df_latest_raw["hf_username"].unique() |
|
|
valid_hf_usernames = {username: validate_hf_username(username) for username in hf_usernames} |
|
|
|
|
|
|
|
|
for username, is_valid in valid_hf_usernames.items(): |
|
|
print(f"Username: {username}, Valid: {is_valid}") |
|
|
|
|
|
df_latest_raw["hf_user_valid"] = df_latest_raw["hf_username"].map(valid_hf_usernames) |
|
|
|
|
|
df_latest_raw = df_latest_raw[df_latest_raw["hf_user_valid"]].reset_index(drop=True) |
|
|
|
|
|
|
|
|
df_latest_raw["submission_time"] = pd.to_datetime(df_latest_raw["submission_time"]) |
|
|
df_latest_raw = df_latest_raw.query("Endpoint == 'Average'") |
|
|
df_latest_raw['latest_time_per_user'] = df_latest_raw.groupby('hf_username')['submission_time'].transform('max') |
|
|
latest_submissions_df = df_latest_raw[df_latest_raw['submission_time'] == df_latest_raw['latest_time_per_user']].copy() |
|
|
|
|
|
latest_submissions_df['mean_RAE'] = latest_submissions_df.groupby('hf_username')['RAE'].transform('mean') |
|
|
latest_submissions_df = latest_submissions_df.sort_values( |
|
|
by=['mean_RAE', 'Sample'], ascending=True |
|
|
).reset_index(drop=True) |
|
|
|
|
|
|
|
|
unique_users_ordered = latest_submissions_df['user'].unique() |
|
|
|
|
|
|
|
|
user_mapping = {} |
|
|
for idx, user in enumerate(unique_users_ordered): |
|
|
|
|
|
|
|
|
prefix = f"{idx + 1:03d}" |
|
|
prefixed_user = f"{prefix}___{user}" |
|
|
user_mapping[user] = prefixed_user |
|
|
|
|
|
|
|
|
latest_submissions_df['user'] = latest_submissions_df['user'].map(user_mapping) |
|
|
|
|
|
|
|
|
tukey = pairwise_tukeyhsd(endog=latest_submissions_df['RAE'], groups=latest_submissions_df['user'], alpha=0.05) |
|
|
tukey_df = pd.DataFrame(data=tukey._results_table.data[1:], |
|
|
columns=tukey._results_table.data[0]) |
|
|
|
|
|
|
|
|
cld_dict = cld(tukey_df) |
|
|
|
|
|
cld_df = pd.DataFrame(cld_dict.items(),columns=["group","letter"]).sort_values("group") |
|
|
cld_df.letter = [",".join(x) for x in cld_df.letter] |
|
|
cld_df["user"] = cld_df.group |
|
|
|
|
|
|
|
|
def clean_up(ser): |
|
|
ser = ser.split(",") |
|
|
|
|
|
if "@" in ser and len(ser) == 2: |
|
|
let = "@" + ser[1] |
|
|
elif "@" in ser and len(ser) == 4: |
|
|
let = "@" + ser[2] + "," + "@" + ser[3] |
|
|
else: |
|
|
let = ",".join(ser) |
|
|
return let |
|
|
|
|
|
cld_df["fixed_letter"] = cld_df["letter"].apply(lambda x: clean_up(x)) |
|
|
report_cols = latest_submissions_df[['user', 'model_report']].drop_duplicates(keep='first') |
|
|
|
|
|
|
|
|
for metric in METRICS: |
|
|
metric_stats = latest_submissions_df.groupby('user')[metric].agg(['mean', 'std']).reset_index() |
|
|
metric_stats = metric_stats.rename(columns={'mean': f'{metric}_mean', 'std': f'{metric}_std'}) |
|
|
metric_stats[f"{metric}_display"] = metric_stats.apply( |
|
|
lambda row: f"{row[f'{metric}_mean']:.4f} ± {row[f'{metric}_std']:.4f}", axis=1 |
|
|
) |
|
|
cld_df = metric_stats[['user', f'{metric}_mean', f'{metric}_std', f'{metric}_display']].merge(cld_df, on='user', how='left') |
|
|
|
|
|
|
|
|
cld_df = cld_df.sort_values(by='RAE_mean', ascending=True).reset_index(drop=True) |
|
|
cld_df = cld_df.merge(report_cols, on='user', how='inner') |
|
|
cld_df['user'] = cld_df['user'].str.split('___').str[1] |
|
|
|
|
|
cld_subset = cld_df[['user', 'fixed_letter'] + [f'{metric}_display' for metric in METRICS]+ ['model_report']] |
|
|
cld_subset = cld_subset.rename(columns={'user': 'user', 'fixed_letter': 'CLD', 'model_report': 'model details'}) |
|
|
cld_subset['model details'] = cld_subset['model details'].apply(lambda x: validate_model_details(x)).astype(str) |
|
|
|
|
|
print(cld_subset.head()) |
|
|
cld_subset.to_csv("leaderboard_cld_results.csv", index=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
make_intermediate_lb() |