Spaces:
Runtime error
Runtime error
| from huggingface_hub import get_collection, Collection, CollectionItem | |
| from toolz import groupby, valmap | |
| from typing import Dict, List | |
| import pandas as pd | |
| from huggingface_hub import model_info | |
| import gradio as gr | |
| from functools import lru_cache | |
| test_slug = "HF-IA-archiving/models-to-archive-65006a7fdadb8c628f33aac9" | |
| def group_collection_by_repo_type( | |
| collection_slug: str, | |
| ) -> Dict[str, List[CollectionItem]]: | |
| collection = get_collection(collection_slug) | |
| return groupby(lambda x: x.repoType, collection.items) | |
| def render_model_hub_link(hub_id): | |
| link = f"https://huggingface.co/{hub_id}" | |
| return ( | |
| f'<a target="_blank" href="{link}" style="color: var(--link-text-color);' | |
| f' text-decoration: underline;text-decoration-style: dotted;">{hub_id}</a>' | |
| ) | |
| def load_to_dataframe(data): | |
| # Columns to keep | |
| columns = [ | |
| "item_id", | |
| "downloads", | |
| "author", | |
| "likes", | |
| "pipeline_tag", | |
| "lastModified", | |
| ] | |
| # convert to dicts | |
| data = [item.__dict__ for item in data] | |
| filtered_data = [ | |
| {key: item[key] for key in columns if key in item} for item in data | |
| ] | |
| required_info_keys = ["language", "tags", "license", "datasets"] | |
| for item in filtered_data: | |
| try: | |
| card = model_info(item["item_id"]).cardData | |
| for key in required_info_keys: | |
| item[key] = card.get(key) | |
| except AttributeError as e: | |
| print(e) | |
| for key in required_info_keys: | |
| item[key] = None | |
| # Load into a DataFrame | |
| df = pd.DataFrame(filtered_data) | |
| df["item_id"] = df["item_id"].apply(render_model_hub_link) | |
| return df | |
| def summary_of_na_values(df): | |
| na_counts = df.isna().sum() | |
| na_counts = na_counts[na_counts > 0] | |
| na_percent = round(na_counts / len(df) * 100, 2) | |
| return ( | |
| pd.DataFrame({"Missing Count": na_counts, "Missing Percent": na_percent}) | |
| .rename_axis(index="Metadata") | |
| .reset_index() | |
| ) | |
| def value_counts(df, column_name): | |
| return df[column_name].value_counts() | |
| def load_data(): | |
| repos_grouped_by_type = group_collection_by_repo_type(test_slug) | |
| models = repos_grouped_by_type["model"] | |
| df = load_to_dataframe(models) | |
| column_names = df.columns.to_list() | |
| return repos_grouped_by_type, column_names, df | |
| def generate_markdown_summary_of_collection( | |
| grouped_collection: Dict[str, List[CollectionItem]] | |
| ): | |
| counts = valmap(len, grouped_collection) | |
| results = "This collection contains the following items:\n" | |
| for k, v in counts.items(): | |
| results += f"- {v} {k}s\n" | |
| return results | |
| repos_grouped_by_type, column_names, df = load_data() | |
| def filter_df(columns_to_show=None): | |
| *_, df = load_data() | |
| return df if columns_to_show is None else df[columns_to_show] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Info about models in this collection") | |
| gr.Markdown(generate_markdown_summary_of_collection(repos_grouped_by_type)) | |
| gr.Markdown("### Summary of missing metadata values") | |
| gr.DataFrame(summary_of_na_values(df)) | |
| gr.Markdown("# Models in this collection") | |
| with gr.Accordion("Models", open=True): | |
| columns_to_show = gr.Dropdown( | |
| label="Columns to show", | |
| value=column_names, | |
| choices=column_names, | |
| multiselect=True, | |
| ) | |
| models_df = gr.DataFrame(filter_df, datatype="markdown") | |
| columns_to_show.change(filter_df, columns_to_show, models_df) | |
| demo.launch(debug=True) | |