Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,21 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import json
|
| 4 |
-
import shutil
|
| 5 |
-
import subprocess
|
| 6 |
import tempfile
|
| 7 |
-
from datetime import datetime, timedelta
|
| 8 |
-
from functools import lru_cache
|
| 9 |
from pathlib import Path
|
| 10 |
-
from huggingface_hub import hf_hub_download
|
| 11 |
|
| 12 |
import gradio as gr
|
|
|
|
| 13 |
|
| 14 |
-
from modular_graph_and_candidates import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def _escape_srcdoc(text: str) -> str:
|
| 17 |
-
"""Escape for inclusion inside an <iframe srcdoc="β¦"> attribute."""
|
| 18 |
return (
|
| 19 |
text.replace("&", "&")
|
| 20 |
.replace("\"", """)
|
|
@@ -23,12 +24,10 @@ def _escape_srcdoc(text: str) -> str:
|
|
| 23 |
.replace(">", ">")
|
| 24 |
)
|
| 25 |
|
| 26 |
-
|
| 27 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
| 28 |
-
|
| 29 |
CACHE_REPO = "Molbap/hf_cached_embeds_log"
|
| 30 |
|
| 31 |
-
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool):
|
| 32 |
repo_id = CACHE_REPO
|
| 33 |
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
|
| 34 |
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
|
|
@@ -40,49 +39,42 @@ def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimo
|
|
| 40 |
filtered_data = filter_graph_by_threshold(raw_data, threshold)
|
| 41 |
|
| 42 |
if kind == "timeline":
|
| 43 |
-
from modular_graph_and_candidates import generate_timeline_html
|
| 44 |
raw_html = generate_timeline_html(filtered_data)
|
| 45 |
else:
|
| 46 |
raw_html = generate_html(filtered_data)
|
| 47 |
|
| 48 |
-
iframe_html = f'<iframe style="width:100%;height:
|
| 49 |
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
|
| 50 |
tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
|
| 51 |
return iframe_html, str(tmp)
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def run_loc(sim_method: str, multimodal: bool):
|
| 56 |
latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
|
| 57 |
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
|
| 58 |
sha = info["sha"]
|
| 59 |
key = f"{sha}/{sim_method}-m{int(multimodal)}"
|
| 60 |
html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
|
| 61 |
raw_html = Path(html_fp).read_text(encoding="utf-8")
|
| 62 |
-
iframe_html = f'<iframe style="width:100%;height:
|
| 63 |
return iframe_html
|
| 64 |
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
def
|
| 67 |
-
return _fetch_from_cache_repo("
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str):
|
| 71 |
-
return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal)
|
| 72 |
-
|
| 73 |
-
|
| 74 |
|
| 75 |
# βββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
|
| 77 |
-
|
| 78 |
CUSTOM_CSS = """
|
| 79 |
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
|
| 80 |
"""
|
|
|
|
| 81 |
TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
|
| 82 |
|
| 83 |
with gr.Blocks(css=CUSTOM_CSS) as demo:
|
| 84 |
header = gr.Markdown("## π Modular-candidate explorer for π€ Transformers")
|
| 85 |
-
|
| 86 |
with gr.Tabs() as tabs:
|
| 87 |
with gr.Tab("Chronological Timeline", id="timeline"):
|
| 88 |
with gr.Row():
|
|
@@ -124,33 +116,38 @@ with gr.Blocks(css=CUSTOM_CSS) as demo:
|
|
| 124 |
[repo_in, thresh, multi_cb],
|
| 125 |
[graph_html_out, graph_json_out],
|
| 126 |
)
|
| 127 |
-
|
|
|
|
|
|
|
| 128 |
|
| 129 |
def _on_load(req: gr.Request):
|
| 130 |
qp = req.query_params or {}
|
| 131 |
tab_key = (qp.get("tab") or "").lower()
|
| 132 |
embed = (qp.get("embed") == "1")
|
| 133 |
-
|
| 134 |
tab_sel = TAB_INDEX.get(tab_key, 0)
|
| 135 |
-
|
| 136 |
if embed:
|
|
|
|
| 137 |
if tab_key == "graph":
|
| 138 |
-
html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard")
|
| 139 |
elif tab_key == "timeline":
|
| 140 |
-
html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard")
|
| 141 |
-
else:
|
| 142 |
-
html = run_loc("jaccard", False)
|
| 143 |
-
|
| 144 |
return (
|
| 145 |
-
gr.update(visible=False),
|
| 146 |
-
gr.update(visible=False),
|
| 147 |
-
gr.update(value=html, visible=True),
|
| 148 |
)
|
| 149 |
-
|
| 150 |
return (
|
| 151 |
-
gr.update(visible=True),
|
| 152 |
-
gr.update(visible=True, selected=tab_sel),
|
| 153 |
-
gr.update(visible=False),
|
| 154 |
)
|
| 155 |
-
|
| 156 |
demo.load(_on_load, outputs=[header, tabs, embed_html])
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import json
|
|
|
|
|
|
|
| 4 |
import tempfile
|
|
|
|
|
|
|
| 5 |
from pathlib import Path
|
|
|
|
| 6 |
|
| 7 |
import gradio as gr
|
| 8 |
+
from huggingface_hub import hf_hub_download
|
| 9 |
|
| 10 |
+
from modular_graph_and_candidates import (
|
| 11 |
+
build_graph_json,
|
| 12 |
+
generate_html,
|
| 13 |
+
build_timeline_json,
|
| 14 |
+
generate_timeline_html,
|
| 15 |
+
filter_graph_by_threshold,
|
| 16 |
+
)
|
| 17 |
|
| 18 |
def _escape_srcdoc(text: str) -> str:
|
|
|
|
| 19 |
return (
|
| 20 |
text.replace("&", "&")
|
| 21 |
.replace("\"", """)
|
|
|
|
| 24 |
.replace(">", ">")
|
| 25 |
)
|
| 26 |
|
|
|
|
| 27 |
HF_MAIN_REPO = "https://github.com/huggingface/transformers"
|
|
|
|
| 28 |
CACHE_REPO = "Molbap/hf_cached_embeds_log"
|
| 29 |
|
| 30 |
+
def _fetch_from_cache_repo(kind: str, sim_method: str, threshold: float, multimodal: bool, *, height_vh: int = 85):
|
| 31 |
repo_id = CACHE_REPO
|
| 32 |
latest_fp = hf_hub_download(repo_id=repo_id, filename="latest.json", repo_type="dataset")
|
| 33 |
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
|
|
|
|
| 39 |
filtered_data = filter_graph_by_threshold(raw_data, threshold)
|
| 40 |
|
| 41 |
if kind == "timeline":
|
|
|
|
| 42 |
raw_html = generate_timeline_html(filtered_data)
|
| 43 |
else:
|
| 44 |
raw_html = generate_html(filtered_data)
|
| 45 |
|
| 46 |
+
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
|
| 47 |
tmp = Path(tempfile.mkstemp(suffix=("_timeline.json" if kind == "timeline" else ".json"))[1])
|
| 48 |
tmp.write_text(json.dumps(filtered_data), encoding="utf-8")
|
| 49 |
return iframe_html, str(tmp)
|
| 50 |
|
| 51 |
+
def run_loc(sim_method: str, multimodal: bool, *, height_vh: int = 85):
|
|
|
|
|
|
|
| 52 |
latest_fp = hf_hub_download(repo_id=CACHE_REPO, filename="latest.json", repo_type="dataset")
|
| 53 |
info = json.loads(Path(latest_fp).read_text(encoding="utf-8"))
|
| 54 |
sha = info["sha"]
|
| 55 |
key = f"{sha}/{sim_method}-m{int(multimodal)}"
|
| 56 |
html_fp = hf_hub_download(repo_id=CACHE_REPO, filename=f"loc/{key}.html", repo_type="dataset")
|
| 57 |
raw_html = Path(html_fp).read_text(encoding="utf-8")
|
| 58 |
+
iframe_html = f'<iframe style="width:100%;height:{height_vh}vh;border:none;" srcdoc="{_escape_srcdoc(raw_html)}"></iframe>'
|
| 59 |
return iframe_html
|
| 60 |
|
| 61 |
+
def run_graph(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
|
| 62 |
+
return _fetch_from_cache_repo("graph", sim_method, threshold, multimodal, height_vh=height_vh)
|
| 63 |
|
| 64 |
+
def run_timeline(repo_url: str, threshold: float, multimodal: bool, sim_method: str, *, height_vh: int = 85):
|
| 65 |
+
return _fetch_from_cache_repo("timeline", sim_method, threshold, multimodal, height_vh=height_vh)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
# βββββββββββββββββββββββββββββ UI ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 68 |
|
|
|
|
| 69 |
CUSTOM_CSS = """
|
| 70 |
#graph_html iframe, #timeline_html iframe {height:85vh !important; width:100% !important; border:none;}
|
| 71 |
"""
|
| 72 |
+
|
| 73 |
TAB_INDEX = {"timeline": 0, "loc": 1, "graph": 2}
|
| 74 |
|
| 75 |
with gr.Blocks(css=CUSTOM_CSS) as demo:
|
| 76 |
header = gr.Markdown("## π Modular-candidate explorer for π€ Transformers")
|
| 77 |
+
|
| 78 |
with gr.Tabs() as tabs:
|
| 79 |
with gr.Tab("Chronological Timeline", id="timeline"):
|
| 80 |
with gr.Row():
|
|
|
|
| 116 |
[repo_in, thresh, multi_cb],
|
| 117 |
[graph_html_out, graph_json_out],
|
| 118 |
)
|
| 119 |
+
|
| 120 |
+
# make embed_html a sibling of Tabs (not a child), so we can hide Tabs but show this
|
| 121 |
+
embed_html = gr.HTML(visible=False)
|
| 122 |
|
| 123 |
def _on_load(req: gr.Request):
|
| 124 |
qp = req.query_params or {}
|
| 125 |
tab_key = (qp.get("tab") or "").lower()
|
| 126 |
embed = (qp.get("embed") == "1")
|
|
|
|
| 127 |
tab_sel = TAB_INDEX.get(tab_key, 0)
|
| 128 |
+
|
| 129 |
if embed:
|
| 130 |
+
# shorter iframe inside article view
|
| 131 |
if tab_key == "graph":
|
| 132 |
+
html, _ = run_graph(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
|
| 133 |
elif tab_key == "timeline":
|
| 134 |
+
html, _ = run_timeline(HF_MAIN_REPO, 0.7, False, "jaccard", height_vh=60)
|
| 135 |
+
else:
|
| 136 |
+
html = run_loc("jaccard", False, height_vh=60)
|
| 137 |
+
|
| 138 |
return (
|
| 139 |
+
gr.Markdown.update(visible=False), # header
|
| 140 |
+
gr.Tabs.update(visible=False), # tabs
|
| 141 |
+
gr.HTML.update(value=html, visible=True), # embed_html
|
| 142 |
)
|
| 143 |
+
|
| 144 |
return (
|
| 145 |
+
gr.Markdown.update(visible=True),
|
| 146 |
+
gr.Tabs.update(visible=True, selected=tab_sel),
|
| 147 |
+
gr.HTML.update(visible=False),
|
| 148 |
)
|
| 149 |
+
|
| 150 |
demo.load(_on_load, outputs=[header, tabs, embed_html])
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
demo.launch(allowed_paths=["static"])
|