Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import pandas as pd | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from huggingface_hub.repocard import metadata_load | |
| path = f"https://huggingface.co/api/spaces" | |
| TASKS = [ | |
| "BitextMining", | |
| "Classification", | |
| "Clustering", | |
| "PairClassification", | |
| "Reranking", | |
| "Retrieval", | |
| "STS", | |
| "Summarization", | |
| ] | |
| TASK_LIST_CLASSIFICATION = [ | |
| "AmazonCounterfactualClassification (en)", | |
| "AmazonPolarityClassification", | |
| "AmazonReviewsClassification (en)", | |
| "Banking77Classification", | |
| "EmotionClassification", | |
| "ImdbClassification", | |
| "MassiveIntentClassification (en)", | |
| "MassiveScenarioClassification (en)", | |
| "MTOPDomainClassification (en)", | |
| "MTOPIntentClassification (en)", | |
| "ToxicConversationsClassification", | |
| "TweetSentimentExtractionClassification", | |
| ] | |
| TASK_LIST_CLUSTERING = [ | |
| "ArxivClusteringP2P", | |
| "ArxivClusteringS2S", | |
| "BiorxivClusteringP2P", | |
| "BiorxivClusteringS2S", | |
| "MedrxivClusteringP2P", | |
| "MedrxivClusteringS2S", | |
| "RedditClustering", | |
| "RedditClusteringP2P", | |
| "StackExchangeClustering", | |
| "StackExchangeClusteringP2P", | |
| "TwentyNewsgroupsClustering", | |
| ] | |
| TASK_LIST_PAIR_CLASSIFICATION = [ | |
| "SprintDuplicateQuestions", | |
| "TwitterSemEval2015", | |
| "TwitterURLCorpus", | |
| ] | |
| TASK_LIST_RERANKING = [ | |
| "AskUbuntuDupQuestions", | |
| "MindSmallReranking", | |
| "SciDocsRR", | |
| "StackOverflowDupQuestions", | |
| ] | |
| TASK_LIST_RETRIEVAL = [ | |
| "ArguAna", | |
| "ClimateFEVER", | |
| "CQADupstackRetrieval", | |
| "DBPedia", | |
| "FEVER", | |
| "FiQA2018", | |
| "HotpotQA", | |
| "MSMARCO", | |
| "NFCorpus", | |
| "NQ", | |
| "QuoraRetrieval", | |
| "SCIDOCS", | |
| "SciFact", | |
| "Touche2020", | |
| "TRECCOVID", | |
| ] | |
| TASK_LIST_STS = [ | |
| "BIOSSES", | |
| "SICK-R", | |
| "STS12", | |
| "STS13", | |
| "STS14", | |
| "STS15", | |
| "STS16", | |
| "STS17 (en-en)", | |
| "STS22 (en)", | |
| "STSBenchmark", | |
| ] | |
| TASK_LIST_SUMMARIZATION = [ | |
| "SummEval", | |
| ] | |
| TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION | |
| TASK_TO_TASK_LIST = {} | |
| def make_clickable_model(model_name): | |
| # Remove user from model name | |
| model_name_show = " ".join(model_name.split("/")[1:]) | |
| link = "https://huggingface.co/" + model_name | |
| return ( | |
| f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>' | |
| ) | |
| TASK_TO_METRIC = { | |
| "BitextMining": "f1", | |
| "Clustering": "v_measure", | |
| "Classification": "accuracy", | |
| "PairClassification": "cos_sim_ap", | |
| "Reranking": "map", | |
| "Retrieval": "ndcg_at_10", | |
| "STS": "cos_sim_spearman", | |
| "Summarization": "cos_sim_spearman", | |
| } | |
| def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC): | |
| api = HfApi() | |
| models = api.list_models(filter="mteb") | |
| df_list = [] | |
| for model in models: | |
| readme_path = hf_hub_download(model.modelId, filename="README.md") | |
| meta = metadata_load(readme_path) | |
| # meta['model-index'][0]["results"] is list of elements like: | |
| # { | |
| # "task": {"type": "Classification"}, | |
| # "dataset": { | |
| # "type": "mteb/amazon_massive_intent", | |
| # "name": "MTEB MassiveIntentClassification (nb)", | |
| # "config": "nb", | |
| # "split": "test", | |
| # }, | |
| # "metrics": [ | |
| # {"type": "accuracy", "value": 39.81506388702084}, | |
| # {"type": "f1", "value": 38.809586587791664}, | |
| # ], | |
| # }, | |
| # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out | |
| #if langs is None: | |
| task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))] | |
| out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results] | |
| #else: | |
| # Multilingual | |
| # out = list( | |
| # map( | |
| # lambda x: { | |
| # x["dataset"]["name"].replace("MTEB ", ""): round( | |
| # list(filter(lambda x: x["type"] == metric, x["metrics"]))[0]["value"], 2 | |
| # ) | |
| # }, | |
| # filter( | |
| # lambda x: (x.get("task", {}).get("type", "") in tasks) | |
| # and (x.get("dataset", {}).get("config", "") in ("default", *langs)), | |
| # meta["model-index"][0]["results"], | |
| # ), | |
| # ) | |
| # ) | |
| out = {k: v for d in out for k, v in d.items()} | |
| out["Model"] = make_clickable_model(model.modelId) | |
| df_list.append(out) | |
| df = pd.DataFrame(df_list) | |
| # Put 'Model' column first | |
| cols = sorted(list(df.columns)) | |
| cols.insert(0, cols.pop(cols.index("Model"))) | |
| df = df[cols] | |
| # df.insert(1, "Average", df.mean(axis=1, skipna=False)) | |
| df.fillna("", inplace=True) | |
| if cast_to_str: | |
| return df.astype(str) # Cast to str as Gradio does not accept floats | |
| return df | |
| DATA_OVERALL = get_mteb_data( | |
| tasks=[ | |
| "Classification", | |
| "Clustering", | |
| "PairClassification", | |
| "Reranking", | |
| "Retrieval", | |
| "STS", | |
| "Summarization", | |
| ], | |
| langs=["en", "en-en"], | |
| cast_to_str=False | |
| ) | |
| DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False)) | |
| DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False)) | |
| DATA_OVERALL = DATA_OVERALL.round(2).astype(str) | |
| DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION] | |
| DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING] | |
| DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION] | |
| DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING] | |
| DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL] | |
| DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS] | |
| DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION] | |
| DATA_OVERALL = DATA_OVERALL[["Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]] | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown( | |
| """Leaderboard for XX most popular Blocks Event Spaces. To learn more and join, see <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>""" | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Overall"): | |
| with gr.Row(): | |
| gr.Markdown("""Average Scores""") | |
| with gr.Row(): | |
| data_overall = gr.components.Dataframe( | |
| DATA_OVERALL, | |
| datatype="markdown", | |
| type="pandas", | |
| col_count=(len(DATA_OVERALL.columns), "fixed"), | |
| wrap=True, | |
| ) | |
| with gr.TabItem("Classification"): | |
| with gr.TabItem("English"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for Classification""") | |
| with gr.Row(): | |
| data_classification_en = gr.components.Dataframe( | |
| DATA_CLASSIFICATION_EN, | |
| datatype="markdown", | |
| type="pandas", | |
| col_count=(len(DATA_CLASSIFICATION_EN.columns), "fixed"), | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_classification_en = gr.Variable(value="Classification") | |
| metric_classification_en = gr.Variable(value="accuracy") | |
| lang_classification_en = gr.Variable(value=["en"]) | |
| data_run.click( | |
| get_mteb_data, | |
| inputs=[ | |
| task_classification_en, | |
| metric_classification_en, | |
| lang_classification_en, | |
| ], | |
| outputs=data_classification_en, | |
| ) | |
| with gr.TabItem("Multilingual"): | |
| with gr.Row(): | |
| gr.Markdown("""Multilingual Classification""") | |
| with gr.Row(): | |
| data_classification = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_classification = gr.Variable(value="Classification") | |
| metric_classification = gr.Variable(value="accuracy") | |
| data_run.click( | |
| get_mteb_data, | |
| inputs=[task_classification, metric_classification], | |
| outputs=data_classification, | |
| ) | |
| with gr.TabItem("Clustering"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for Clustering""") | |
| with gr.Row(): | |
| data_clustering = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_clustering = gr.Variable(value="Clustering") | |
| metric_clustering = gr.Variable(value="v_measure") | |
| data_run.click( | |
| get_mteb_data, | |
| inputs=[task_clustering, metric_clustering], | |
| outputs=data_clustering, | |
| ) | |
| with gr.TabItem("Retrieval"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for Retrieval""") | |
| with gr.Row(): | |
| data_retrieval = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_retrieval = gr.Variable(value="Retrieval") | |
| metric_retrieval = gr.Variable(value="ndcg_at_10") | |
| data_run.click( | |
| get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval | |
| ) | |
| with gr.TabItem("Reranking"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for Reranking""") | |
| with gr.Row(): | |
| data_reranking = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| # col_count=(12, "fixed"), | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_reranking = gr.Variable(value="Reranking") | |
| metric_reranking = gr.Variable(value="map") | |
| data_run.click( | |
| get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking | |
| ) | |
| with gr.TabItem("STS"): | |
| with gr.TabItem("English"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for STS""") | |
| with gr.Row(): | |
| data_sts_en = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run_en = gr.Button("Refresh") | |
| task_sts_en = gr.Variable(value="STS") | |
| metric_sts_en = gr.Variable(value="cos_sim_spearman") | |
| lang_sts_en = gr.Variable(value=["en", "en-en"]) | |
| data_run.click( | |
| get_mteb_data, | |
| inputs=[task_sts_en, metric_sts_en, lang_sts_en], | |
| outputs=data_sts_en, | |
| ) | |
| with gr.TabItem("Multilingual"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for STS""") | |
| with gr.Row(): | |
| data_sts = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_sts = gr.Variable(value="STS") | |
| metric_sts = gr.Variable(value="cos_sim_spearman") | |
| data_run.click(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts) | |
| with gr.TabItem("Summarization"): | |
| with gr.Row(): | |
| gr.Markdown("""Leaderboard for Summarization""") | |
| with gr.Row(): | |
| data_summarization = gr.components.Dataframe( | |
| datatype=["markdown"] * 500, | |
| type="pandas", | |
| ) | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| task_summarization = gr.Variable(value="Summarization") | |
| metric_summarization = gr.Variable(value="cos_sim_spearman") | |
| data_run.click( | |
| get_mteb_data, | |
| inputs=[task_summarization, metric_summarization], | |
| outputs=data_summarization, | |
| ) | |
| # running the function on page load in addition to when the button is clicked | |
| #block.load( | |
| # get_mteb_data, | |
| # inputs=[task_classification_en, metric_classification_en], | |
| # outputs=data_classification_en, | |
| # show_progress=False, | |
| #) | |
| block.load( | |
| get_mteb_data, | |
| inputs=[task_classification, metric_classification], | |
| outputs=data_classification, | |
| ) | |
| block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering) | |
| block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval) | |
| block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking) | |
| block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts) | |
| block.load( | |
| get_mteb_data, inputs=[task_summarization, metric_summarization], outputs=data_summarization | |
| ) | |
| block.launch() | |