Spaces:
Running
Running
Commit
Β·
3c37eb3
1
Parent(s):
e2e1ee9
move things around
Browse files- app.py +16 -21
- src/assets/text_content.py +5 -5
app.py
CHANGED
|
@@ -27,13 +27,13 @@ LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
|
| 27 |
OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
| 28 |
|
| 29 |
ALL_COLUMNS_MAPPING = {
|
| 30 |
-
"weight_class": "Class ποΈ",
|
| 31 |
-
"model_type": "Type π€",
|
| 32 |
-
#
|
| 33 |
"backend.name": "Backend π",
|
| 34 |
"backend.torch_dtype": "Dtype π₯",
|
| 35 |
-
"quantization": "Quantization ποΈ",
|
| 36 |
"optimizations": "Optimizations π οΈ",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
#
|
| 38 |
"generate.peak_memory(MB)": "Memory (MB) β¬οΈ",
|
| 39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
|
@@ -45,9 +45,9 @@ ALL_COLUMNS_MAPPING = {
|
|
| 45 |
ALL_COLUMNS_DATATYPES = [
|
| 46 |
"str",
|
| 47 |
"str",
|
| 48 |
-
#
|
| 49 |
"str",
|
| 50 |
"str",
|
|
|
|
| 51 |
"str",
|
| 52 |
"str",
|
| 53 |
#
|
|
@@ -85,21 +85,16 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
|
|
| 85 |
merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
|
| 86 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
| 87 |
)
|
| 88 |
-
# distance to 100% score
|
| 89 |
-
score_distance =
|
| 90 |
-
# distance to 0s latency
|
| 91 |
-
latency_distance = merged_df["generate.latency(s)"]
|
| 92 |
-
|
| 93 |
-
)
|
| 94 |
-
#
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
)
|
| 99 |
-
# add perf distance
|
| 100 |
-
merged_df["perf_distance"] = (
|
| 101 |
-
score_distance**2 + latency_distance**2 + memory_distance**2
|
| 102 |
-
) ** 0.5
|
| 103 |
|
| 104 |
return merged_df
|
| 105 |
|
|
@@ -121,7 +116,7 @@ def get_benchmark_table(bench_df):
|
|
| 121 |
# rename
|
| 122 |
copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
| 123 |
# transform
|
| 124 |
-
copy_df["
|
| 125 |
copy_df["Best Scored LLM π"] = copy_df["Best Scored LLM π"].apply(
|
| 126 |
process_model_name
|
| 127 |
)
|
|
|
|
| 27 |
OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
|
| 28 |
|
| 29 |
ALL_COLUMNS_MAPPING = {
|
|
|
|
|
|
|
|
|
|
| 30 |
"backend.name": "Backend π",
|
| 31 |
"backend.torch_dtype": "Dtype π₯",
|
|
|
|
| 32 |
"optimizations": "Optimizations π οΈ",
|
| 33 |
+
"quantization": "Quantization ποΈ",
|
| 34 |
+
#
|
| 35 |
+
"weight_class": "Class ποΈ",
|
| 36 |
+
"model_type": "Type π€",
|
| 37 |
#
|
| 38 |
"generate.peak_memory(MB)": "Memory (MB) β¬οΈ",
|
| 39 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) β¬οΈ",
|
|
|
|
| 45 |
ALL_COLUMNS_DATATYPES = [
|
| 46 |
"str",
|
| 47 |
"str",
|
|
|
|
| 48 |
"str",
|
| 49 |
"str",
|
| 50 |
+
#
|
| 51 |
"str",
|
| 52 |
"str",
|
| 53 |
#
|
|
|
|
| 85 |
merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
|
| 86 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
| 87 |
)
|
| 88 |
+
# # distance to 100% score
|
| 89 |
+
# score_distance = 100 - merged_df["best_score"]
|
| 90 |
+
# # distance to 0s latency
|
| 91 |
+
# latency_distance = merged_df["generate.latency(s)"]
|
| 92 |
+
# # distance to 0MB memory
|
| 93 |
+
# memory_distance = merged_df["forward.peak_memory(MB)"]
|
| 94 |
+
# # add perf distance
|
| 95 |
+
# merged_df["perf_distance"] = (
|
| 96 |
+
# score_distance**2 + latency_distance**2 + memory_distance**2
|
| 97 |
+
# ) ** 0.5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
return merged_df
|
| 100 |
|
|
|
|
| 116 |
# rename
|
| 117 |
copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
| 118 |
# transform
|
| 119 |
+
copy_df["Type π€"] = copy_df["Type π€"].apply(process_model_type)
|
| 120 |
copy_df["Best Scored LLM π"] = copy_df["Best Scored LLM π"].apply(
|
| 121 |
process_model_name
|
| 122 |
)
|
src/assets/text_content.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
-
TITLE = """<h1 align="center" id="space-title">π€
|
| 2 |
|
| 3 |
INTRODUCTION_TEXT = f"""
|
| 4 |
-
The π€
|
| 5 |
|
| 6 |
Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
|
| 7 |
-
- Model evaluation requests should be made in the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the π€
|
| 8 |
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
|
| 9 |
"""
|
| 10 |
|
| 11 |
-
ABOUT_TEXT = """<h3>About the π€
|
| 12 |
<ul>
|
| 13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
| 14 |
<li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
|
|
@@ -63,7 +63,7 @@ benchmark:
|
|
| 63 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
| 64 |
CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
|
| 65 |
author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
|
| 66 |
-
title = {
|
| 67 |
year = {2023},
|
| 68 |
publisher = {Hugging Face},
|
| 69 |
howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
|
|
|
|
| 1 |
+
TITLE = """<h1 align="center" id="space-title">π€ LLM-Perf Leaderboard ποΈ</h1>"""
|
| 2 |
|
| 3 |
INTRODUCTION_TEXT = f"""
|
| 4 |
+
The π€ LLM-Perf Leaderboard ποΈ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
|
| 5 |
|
| 6 |
Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
|
| 7 |
+
- Model evaluation requests should be made in the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the π€ LLM-Perf Leaderboard ποΈ automatically.
|
| 8 |
- Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
|
| 9 |
"""
|
| 10 |
|
| 11 |
+
ABOUT_TEXT = """<h3>About the π€ LLM-Perf Leaderboard ποΈ</h3>
|
| 12 |
<ul>
|
| 13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
| 14 |
<li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
|
|
|
|
| 63 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
|
| 64 |
CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
|
| 65 |
author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
|
| 66 |
+
title = {LLM-Perf Leaderboard},
|
| 67 |
year = {2023},
|
| 68 |
publisher = {Hugging Face},
|
| 69 |
howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
|