Spaces:
Running
Running
Commit
Β·
a18f8de
1
Parent(s):
bee5389
added single and multi gpu inference
Browse files
app.py
CHANGED
|
@@ -26,16 +26,13 @@ SORTING_COLUMN = ["Throughput (tokens/s) β¬οΈ"]
|
|
| 26 |
llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
|
| 27 |
|
| 28 |
|
| 29 |
-
def get_benchmark_df():
|
| 30 |
if llm_perf_dataset_repo:
|
| 31 |
llm_perf_dataset_repo.git_pull()
|
| 32 |
|
| 33 |
# load
|
| 34 |
df = pd.read_csv(
|
| 35 |
-
"./llm-perf-dataset/reports/
|
| 36 |
-
|
| 37 |
-
# remove quantized models
|
| 38 |
-
df = df[df["backend.quantization"].isnull()]
|
| 39 |
|
| 40 |
# preprocess
|
| 41 |
df["model"] = df["model"].apply(make_clickable_model)
|
|
@@ -61,19 +58,34 @@ with demo:
|
|
| 61 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 62 |
|
| 63 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 64 |
-
with gr.TabItem("
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
-
|
| 68 |
-
-
|
| 69 |
-
|
| 70 |
-
gr.
|
| 71 |
-
|
|
|
|
| 72 |
leaderboard_table_lite = gr.components.Dataframe(
|
| 73 |
-
value=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
datatype=COLUMNS_DATATYPES,
|
| 75 |
headers=NEW_COLUMNS,
|
| 76 |
-
elem_id="
|
| 77 |
)
|
| 78 |
|
| 79 |
with gr.Row():
|
|
|
|
| 26 |
llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
|
| 27 |
|
| 28 |
|
| 29 |
+
def get_benchmark_df(benchmark):
|
| 30 |
if llm_perf_dataset_repo:
|
| 31 |
llm_perf_dataset_repo.git_pull()
|
| 32 |
|
| 33 |
# load
|
| 34 |
df = pd.read_csv(
|
| 35 |
+
f"./llm-perf-dataset/reports/{benchmark}/inference_report.csv")
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# preprocess
|
| 38 |
df["model"] = df["model"].apply(make_clickable_model)
|
|
|
|
| 58 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 59 |
|
| 60 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 61 |
+
with gr.TabItem("π₯οΈ A100-80GB Benchmark ποΈ", elem_id="A100-benchmark", id=0):
|
| 62 |
+
|
| 63 |
+
SINGLE_A100_TEXT = """<h4>Specifications:</h4>
|
| 64 |
+
- Single-GPU (1)
|
| 65 |
+
- Singleton Batch (1)
|
| 66 |
+
- Thousand Tokens (1000)"""
|
| 67 |
+
gr.HTML(SINGLE_A100_TEXT)
|
| 68 |
+
|
| 69 |
+
single_A100_df = get_benchmark_df(benchmark="1xA100-80GB")
|
| 70 |
leaderboard_table_lite = gr.components.Dataframe(
|
| 71 |
+
value=single_A100_df,
|
| 72 |
+
datatype=COLUMNS_DATATYPES,
|
| 73 |
+
headers=NEW_COLUMNS,
|
| 74 |
+
elem_id="1xA100-table",
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
MULTI_A100_TEXT = """<h4>Specifications:</h4>
|
| 78 |
+
- Multi-GPU (4)
|
| 79 |
+
- Singleton Batch (1)
|
| 80 |
+
- Thousand Tokens (1000)"""
|
| 81 |
+
gr.HTML(MULTI_A100_TEXT)
|
| 82 |
+
|
| 83 |
+
multi_A100_df = get_benchmark_df(benchmark="4xA100-80GB")
|
| 84 |
+
leaderboard_table_full = gr.components.Dataframe(
|
| 85 |
+
value=multi_A100_df,
|
| 86 |
datatype=COLUMNS_DATATYPES,
|
| 87 |
headers=NEW_COLUMNS,
|
| 88 |
+
elem_id="4xA100-table",
|
| 89 |
)
|
| 90 |
|
| 91 |
with gr.Row():
|