Linker1907 commited on
Commit
7532d9b
·
1 Parent(s): 72e68df

pull from lighteval everytime

Browse files
Files changed (2) hide show
  1. app.py +23 -41
  2. tasks_index.json +0 -0
app.py CHANGED
@@ -15,22 +15,19 @@ This file stays outside the lighteval src tree, per request.
15
  """
16
 
17
  import ast
18
- import json
19
  import os
20
  import re
21
  from collections import Counter
22
- from dataclasses import asdict, dataclass
23
 
24
  import gradio as gr
25
 
26
 
27
- REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
28
  TASK_DIRS = [
29
- os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
30
- os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
31
  ]
32
- # place cache inside repo root to avoid dirname('') issue
33
- CACHE_PATH = "tasks_index.json"
34
 
35
 
36
  star_benchmarks = [
@@ -112,7 +109,9 @@ def split_list_field(value: str) -> list[str]:
112
 
113
  def discover_task_files() -> list[str]:
114
  files: list[str] = []
 
115
  for base in TASK_DIRS:
 
116
  if not os.path.isdir(base):
117
  continue
118
  # Top-level python files in the directory
@@ -167,42 +166,11 @@ def index_tasks() -> tuple[list[TaskDoc], list[str], list[str]]:
167
  return docs, languages_sorted, tags_sorted
168
 
169
 
170
- def save_index(path: str, tasks: list[TaskDoc], langs: list[str], tags: list[str]) -> None:
171
- data = {
172
- "tasks": [asdict(t) for t in tasks],
173
- "languages": list(langs),
174
- "tags": list(tags),
175
- }
176
- os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
177
- with open(path, "w", encoding="utf-8") as f:
178
- json.dump(data, f, ensure_ascii=False, indent=2)
179
 
180
 
181
- def load_index(path: str) -> tuple[list[TaskDoc], list[str], list[str]] | None:
182
- if not os.path.exists(path):
183
- return None
184
- with open(path, "r", encoding="utf-8") as f:
185
- data = json.load(f)
186
- tasks = [TaskDoc(**t) for t in data.get("tasks", [])]
187
- langs = list(data.get("languages", []))
188
- tags = list(data.get("tags", []))
189
- return tasks, langs, tags
190
-
191
-
192
- def build_and_cache_index() -> tuple[list[TaskDoc], list[str], list[str]]:
193
- tasks, langs, tags = index_tasks()
194
- save_index(CACHE_PATH, tasks, langs, tags)
195
- return tasks, langs, tags
196
-
197
-
198
- _loaded = load_index(CACHE_PATH)
199
- if _loaded is None:
200
- print("Building and caching index...")
201
- ALL_TASKS, ALL_LANGS, ALL_TAGS = build_and_cache_index()
202
- else:
203
- print("Loading index from cache...")
204
- ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
205
- print(f"Loaded {len(ALL_TASKS)} tasks from cache")
206
  TOP_LANGS = ALL_LANGS[:8] # show more by default
207
 
208
 
@@ -273,6 +241,7 @@ def render_cards(tasks: list[TaskDoc]) -> str:
273
  fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
274
  task_name = (t.name or fallback_name).replace("_", " ").title()
275
  mod_path = t.module.replace("\\", "/")
 
276
  source_html = f'<a href="https://github.com/huggingface/lighteval/blob/main/{mod_path}" target="_blank" rel="noopener">source</a>'
277
  paper_html = f'<a href="{t.paper}" target="_blank" rel="noopener">paper</a>' if t.paper else ""
278
  tags_html = " ".join([f'<span class=\"chip\" title=\"tag: {tag}\">{tag}</span>' for tag in t.tags]) if t.tags else ""
@@ -446,5 +415,18 @@ with gr.Blocks(title="Lighteval Tasks Explorer", css=None) as demo:
446
 
447
 
448
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
449
  # Run with `python benchmark_finder/app.py`
450
  demo.launch()
 
15
  """
16
 
17
  import ast
 
18
  import os
19
  import re
20
  from collections import Counter
21
+ from dataclasses import dataclass
22
 
23
  import gradio as gr
24
 
25
 
26
+ REPO_ROOT = "."
27
  TASK_DIRS = [
28
+ os.path.join(REPO_ROOT, "lighteval", "src", "lighteval", "tasks", "tasks"),
29
+ os.path.join(REPO_ROOT, "lighteval", "src", "lighteval", "tasks", "multilingual", "tasks"),
30
  ]
 
 
31
 
32
 
33
  star_benchmarks = [
 
109
 
110
  def discover_task_files() -> list[str]:
111
  files: list[str] = []
112
+ print(f"Discovering task files in: {TASK_DIRS}")
113
  for base in TASK_DIRS:
114
+ print(f"Discovering task files in: {base}")
115
  if not os.path.isdir(base):
116
  continue
117
  # Top-level python files in the directory
 
166
  return docs, languages_sorted, tags_sorted
167
 
168
 
169
+ def build_index() -> tuple[list[TaskDoc], list[str], list[str]]:
170
+ return index_tasks()
 
 
 
 
 
 
 
171
 
172
 
173
+ ALL_TASKS, ALL_LANGS, ALL_TAGS = build_index()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  TOP_LANGS = ALL_LANGS[:8] # show more by default
175
 
176
 
 
241
  fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
242
  task_name = (t.name or fallback_name).replace("_", " ").title()
243
  mod_path = t.module.replace("\\", "/")
244
+ mod_path = mod_path.split("/", 1)[1]
245
  source_html = f'<a href="https://github.com/huggingface/lighteval/blob/main/{mod_path}" target="_blank" rel="noopener">source</a>'
246
  paper_html = f'<a href="{t.paper}" target="_blank" rel="noopener">paper</a>' if t.paper else ""
247
  tags_html = " ".join([f'<span class=\"chip\" title=\"tag: {tag}\">{tag}</span>' for tag in t.tags]) if t.tags else ""
 
415
 
416
 
417
  if __name__ == "__main__":
418
+ from git import Repo # pip install gitpython
419
+
420
+ git_url = "https://github.com/huggingface/lighteval.git"
421
+ repo_dir = "./lighteval"
422
+
423
+ if os.path.exists(repo_dir) and os.path.isdir(os.path.join(repo_dir, ".git")):
424
+ print(f"Pulling latest changes from {git_url}...")
425
+ repo = Repo(repo_dir)
426
+ repo.remotes.origin.pull()
427
+ else:
428
+ print(f"Cloning {git_url} to {repo_dir}...")
429
+ Repo.clone_from(git_url, repo_dir)
430
+
431
  # Run with `python benchmark_finder/app.py`
432
  demo.launch()
tasks_index.json DELETED
The diff for this file is too large to render. See raw diff