Spaces:
Running
Running
Commit
·
4cfc121
1
Parent(s):
d3abea5
updated plot
Browse files- app.py +8 -8
- src/utils.py +3 -3
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
-
import plotly.express as px
|
| 2 |
import os
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 6 |
|
| 7 |
from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
|
@@ -19,7 +19,7 @@ COLUMNS_MAPPING = {
|
|
| 19 |
"backend.torch_dtype": "Datatype 📥",
|
| 20 |
"forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
|
| 21 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
|
| 22 |
-
"h4_score": "H4 Score ⬆️",
|
| 23 |
}
|
| 24 |
COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
|
| 25 |
SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
|
|
@@ -66,7 +66,7 @@ def get_benchmark_plot(benchmark):
|
|
| 66 |
scores_df = pd.read_csv(
|
| 67 |
f"./llm-perf-dataset/reports/additional_data.csv")
|
| 68 |
bench_df = bench_df.merge(scores_df, on="model", how="left")
|
| 69 |
-
|
| 70 |
bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
|
| 71 |
|
| 72 |
fig = px.scatter(
|
|
@@ -85,11 +85,11 @@ def get_benchmark_plot(benchmark):
|
|
| 85 |
},
|
| 86 |
xaxis_title="Average H4 Score",
|
| 87 |
yaxis_title="Latency per 1000 Tokens (s)",
|
| 88 |
-
legend_title="Model Type",
|
| 89 |
width=1200,
|
| 90 |
height=600,
|
| 91 |
)
|
| 92 |
-
|
| 93 |
fig.update_traces(
|
| 94 |
hovertemplate="<br>".join([
|
| 95 |
"Model: %{customdata[0]}",
|
|
@@ -174,7 +174,7 @@ with demo:
|
|
| 174 |
max_rows=None,
|
| 175 |
visible=False,
|
| 176 |
)
|
| 177 |
-
|
| 178 |
submit_button.click(
|
| 179 |
submit_query,
|
| 180 |
[
|
|
@@ -187,14 +187,14 @@ with demo:
|
|
| 187 |
with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
|
| 188 |
# Original leaderboard plot
|
| 189 |
gr.HTML(SINGLE_A100_TEXT)
|
| 190 |
-
|
| 191 |
# Original leaderboard plot
|
| 192 |
single_A100_plotly = gr.components.Plot(
|
| 193 |
value=single_A100_plot,
|
| 194 |
elem_id="1xA100-plot",
|
| 195 |
show_label=False,
|
| 196 |
)
|
| 197 |
-
|
| 198 |
with gr.Row():
|
| 199 |
with gr.Accordion("📙 Citation", open=False):
|
| 200 |
citation_button = gr.Textbox(
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
+
import plotly.express as px
|
| 5 |
from apscheduler.schedulers.background import BackgroundScheduler
|
| 6 |
|
| 7 |
from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
|
|
|
|
| 19 |
"backend.torch_dtype": "Datatype 📥",
|
| 20 |
"forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
|
| 21 |
"generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
|
| 22 |
+
"h4_score": "Average H4 Score ⬆️",
|
| 23 |
}
|
| 24 |
COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
|
| 25 |
SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
|
|
|
|
| 66 |
scores_df = pd.read_csv(
|
| 67 |
f"./llm-perf-dataset/reports/additional_data.csv")
|
| 68 |
bench_df = bench_df.merge(scores_df, on="model", how="left")
|
| 69 |
+
|
| 70 |
bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
|
| 71 |
|
| 72 |
fig = px.scatter(
|
|
|
|
| 85 |
},
|
| 86 |
xaxis_title="Average H4 Score",
|
| 87 |
yaxis_title="Latency per 1000 Tokens (s)",
|
| 88 |
+
legend_title="Model Type, Backend",
|
| 89 |
width=1200,
|
| 90 |
height=600,
|
| 91 |
)
|
| 92 |
+
|
| 93 |
fig.update_traces(
|
| 94 |
hovertemplate="<br>".join([
|
| 95 |
"Model: %{customdata[0]}",
|
|
|
|
| 174 |
max_rows=None,
|
| 175 |
visible=False,
|
| 176 |
)
|
| 177 |
+
|
| 178 |
submit_button.click(
|
| 179 |
submit_query,
|
| 180 |
[
|
|
|
|
| 187 |
with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
|
| 188 |
# Original leaderboard plot
|
| 189 |
gr.HTML(SINGLE_A100_TEXT)
|
| 190 |
+
|
| 191 |
# Original leaderboard plot
|
| 192 |
single_A100_plotly = gr.components.Plot(
|
| 193 |
value=single_A100_plot,
|
| 194 |
elem_id="1xA100-plot",
|
| 195 |
show_label=False,
|
| 196 |
)
|
| 197 |
+
|
| 198 |
with gr.Row():
|
| 199 |
with gr.Accordion("📙 Citation", open=False):
|
| 200 |
citation_button = gr.Textbox(
|
src/utils.py
CHANGED
|
@@ -73,16 +73,16 @@ def extract_score_from_clickable(clickable_score) -> float:
|
|
| 73 |
|
| 74 |
|
| 75 |
def submit_query(text, backends, datatypes, threshold, raw_df):
|
| 76 |
-
raw_df["H4 Score ⬆️"] = raw_df["H4 Score ⬆️"].apply(
|
| 77 |
extract_score_from_clickable)
|
| 78 |
|
| 79 |
filtered_df = raw_df[
|
| 80 |
raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
|
| 81 |
raw_df["Backend 🏭"].isin(backends) &
|
| 82 |
raw_df["Datatype 📥"].isin(datatypes) &
|
| 83 |
-
(raw_df["H4 Score ⬆️"] >= threshold)
|
| 84 |
]
|
| 85 |
|
| 86 |
-
filtered_df["H4 Score ⬆️"] = filtered_df["H4 Score ⬆️"].apply(
|
| 87 |
make_clickable_score)
|
| 88 |
return filtered_df
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def submit_query(text, backends, datatypes, threshold, raw_df):
|
| 76 |
+
raw_df["Average H4 Score ⬆️"] = raw_df["Average H4 Score ⬆️"].apply(
|
| 77 |
extract_score_from_clickable)
|
| 78 |
|
| 79 |
filtered_df = raw_df[
|
| 80 |
raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
|
| 81 |
raw_df["Backend 🏭"].isin(backends) &
|
| 82 |
raw_df["Datatype 📥"].isin(datatypes) &
|
| 83 |
+
(raw_df["Average H4 Score ⬆️"] >= threshold)
|
| 84 |
]
|
| 85 |
|
| 86 |
+
filtered_df["Average H4 Score ⬆️"] = filtered_df["Average H4 Score ⬆️"].apply(
|
| 87 |
make_clickable_score)
|
| 88 |
return filtered_df
|