Spaces:
Running
Running
| from __future__ import annotations | |
| import json | |
| import shutil | |
| import subprocess | |
| import tempfile | |
| from datetime import datetime, timedelta | |
| from functools import lru_cache | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download | |
| import gradio as gr | |
| from modular_graph_and_candidates import build_graph_json, generate_html, build_timeline_json, generate_timeline_html, filter_graph_by_threshold | |
| def _escape_srcdoc(text: str) -> str: | |
| """Escape for inclusion inside an <iframe srcdoc="β¦"> attribute.""" | |
| return ( | |
| text.replace("&", "&") | |
| .replace("\"", """) | |
| .replace("'", "'") | |
| .replace("<", "<") | |
| .replace(">", ">") | |
| ) | |
| HF_MAIN_REPO = "https://github.com/huggingface/transformers" | |
| CACHE_REPO = "Molbap/hf_cached_embeds_log" | |
| def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool): | |
| repo_id = CACHE_REPO | |
| latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset") | |
| info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) | |
| sha = info.get("sha") | |
| key = f"{sha}/{sim_method}-m{int(multimodal)}" | |
| json_fp = hf_hub_download(repo_id=repo_id, filename=f"{kind}/{key}.json", repo_type="dataset") | |
| raw_data = json.loads(Path(json_fp).read_text(encoding="utf-8")) | |
| filtered_data = filter_graph_by_threshold(raw_data, threshold) | |
| if kind == "timeline": | |
| from modular_graph_and_candidates import generate_timeline_html | |
| raw_html = generate_timeline_html(filtered_data) | |
| else: | |
| raw_html = generate_html(filtered_data) | |
| iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' | |
| tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1]) | |
| tmp.write_text(json.dumps(filtered_data), encoding="utf-8") | |
| return iframe_html, str(tmp) | |
| def run_loc(sim_method: str, multimodal: bool): | |
| latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset") | |
| info = json.loads(Path(latest_fp).read_text(encoding="utf-8")) | |
| sha = info["sha"] | |
| key = f"{sha}/{sim_method}-m{int(multimodal)}" | |
| html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset") | |
| raw_html = Path(html_fp).read_text(encoding="utf-8") | |
| iframe_html = f'<iframe style="width:100%;height:85vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>' | |
| return iframe_html | |
| def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str): | |
| return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal) | |
| def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str): | |
| return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal) | |
| # βββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CUSTOM_CSS = """ | |
| #graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;} | |
| """ | |
| with gr.Blocks(css=CUSTOM_CSS) as demo: | |
| gr.Markdown("## π Modular-candidate explorer for π€ Transformers") | |
| # capture the Tabs as a component so we can control selection | |
| with gr.Tabs() as tabs: | |
| with gr.Tab("Chronological Timeline", id="timeline"): | |
| with gr.Row(): | |
| timeline_repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") | |
| timeline_thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β₯") | |
| timeline_multi_cb = gr.Checkbox(label="Only multimodal models") | |
| gr.Markdown("**Embedding method:** TBD") | |
| timeline_btn = gr.Button("Build timeline") | |
| timeline_html_out = gr.HTML(elem_id="timeline_html", show_label=False) | |
| timeline_json_out = gr.File(label="Download timeline.json") | |
| timeline_btn.click( | |
| lambda repo, thresh, multi: run_timeline(repo, thresh, multi, "jaccard"), | |
| [timeline_repo_in, timeline_thresh, timeline_multi_cb], | |
| [timeline_html_out, timeline_json_out], | |
| ) | |
| with gr.Tab("LOC Growth", id="loc"): | |
| sim_radio2 = gr.Radio(["jaccard","embedding"], value="jaccard", label="Similarity metric") | |
| multi_cb2 = gr.Checkbox(label="Only multimodal models") | |
| go_loc = gr.Button("Show LOC growth") | |
| loc_html = gr.HTML(show_label=False) | |
| go_loc.click(run_loc, [sim_radio2, multi_cb2], loc_html) | |
| with gr.Tab("Dependency Graph", id="graph"): | |
| with gr.Row(): | |
| repo_in = gr.Text(value=HF_MAIN_REPO, label="Repo / fork URL") | |
| thresh = gr.Slider(0.50, 0.95, value=0.5, step=0.01, label="Similarity β₯") | |
| multi_cb = gr.Checkbox(label="Only multimodal models") | |
| gr.Markdown("**Embedding method:** TBD") | |
| go_btn = gr.Button("Build graph") | |
| graph_html_out = gr.HTML(elem_id="graph_html", show_label=False) | |
| graph_json_out = gr.File(label="Download graph.json") | |
| go_btn.click( | |
| lambda repo, thresh, multi: run_graph(repo, thresh, multi, "jaccard"), | |
| [repo_in, thresh, multi_cb], | |
| [graph_html_out, graph_json_out], | |
| ) | |
| def _select_tab_on_load(req: gr.Request): | |
| tab = (req.query_params or {}).get("tab") | |
| if tab in {"timeline", "loc", "graph"}: | |
| return gr.update(selected=tab) | |
| return gr.update() | |
| demo.load(_select_tab_on_load, outputs=tabs) | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=["static"]) | |
| demo.launch(allowed_paths=["static"]) |