Spaces:
Running
Running
| import gradio as gr | |
| from src.llm_perf import get_llm_perf_df | |
| from src.leaderboard import get_leaderboard_df | |
| from src.latency_score_memory import get_lat_score_mem_fig | |
| from src.bettertransformer import get_bt_prefill_fig, get_bt_decode_fig | |
| from src.flashattentionv2 import get_fa2_prefill_fig, get_fa2_decode_fig | |
| from src.quantization_kernels import get_quant_prefill_fig, get_quant_decode_fig | |
| def create_control_panel(machine: str = "hf-dgx-01"): | |
| # controls | |
| machine_textbox = gr.Textbox(value=machine, visible=False) | |
| with gr.Accordion("Control Panel ποΈ", open=False, elem_id="control-panel"): | |
| with gr.Row(): | |
| with gr.Column(scale=1, variant="panel"): | |
| score_slider = gr.Slider( | |
| label="Open LLM Score (%) π", | |
| info="ποΈ Slide to minimum Open LLM score", | |
| value=0, | |
| elem_id="threshold-slider", | |
| ) | |
| with gr.Column(scale=1, variant="panel"): | |
| memory_slider = gr.Slider( | |
| label="Peak Memory (MB) π", | |
| info="ποΈ Slide to maximum Peak Memory", | |
| minimum=0, | |
| maximum=80 * 1024, | |
| value=80 * 1024, | |
| elem_id="memory-slider", | |
| ) | |
| with gr.Column(scale=1): | |
| backend_checkboxes = gr.CheckboxGroup( | |
| label="Backends π", | |
| choices=["pytorch"], | |
| value=["pytorch"], | |
| info="βοΈ Select the backends", | |
| elem_id="backend-checkboxes", | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1, variant="panel"): | |
| datatype_checkboxes = gr.CheckboxGroup( | |
| label="Load DTypes π₯", | |
| choices=["float32", "float16", "bfloat16"], | |
| value=["float32", "float16", "bfloat16"], | |
| info="βοΈ Select the load data types", | |
| elem_id="dtype-checkboxes", | |
| ) | |
| with gr.Column(scale=1, variant="panel"): | |
| optimization_checkboxes = gr.CheckboxGroup( | |
| label="Optimizations π οΈ", | |
| choices=["None", "BetterTransformer", "FlashAttentionV2"], | |
| value=["None", "BetterTransformer", "FlashAttentionV2"], | |
| info="βοΈ Select the optimization", | |
| elem_id="optimization-checkboxes", | |
| ) | |
| with gr.Column(scale=2): | |
| quantization_checkboxes = gr.CheckboxGroup( | |
| label="Quantizations ποΈ", | |
| choices=[ | |
| "None", | |
| "BnB.4bit", | |
| "BnB.8bit", | |
| "GPTQ.4bit", | |
| "GPTQ.4bit+ExllamaV1", | |
| "GPTQ.4bit+ExllamaV2", | |
| "AWQ.4bit+GEMM", | |
| "AWQ.4bit+GEMV", | |
| ], | |
| value=[ | |
| "None", | |
| "BnB.4bit", | |
| "BnB.8bit", | |
| "GPTQ.4bit", | |
| "GPTQ.4bit+ExllamaV1", | |
| "GPTQ.4bit+ExllamaV2", | |
| "AWQ.4bit+GEMM", | |
| "AWQ.4bit+GEMV", | |
| ], | |
| info="βοΈ Select the quantization schemes", | |
| elem_id="quantization-checkboxes", | |
| elem_classes="boxed-option", | |
| ) | |
| with gr.Row(): | |
| filter_button = gr.Button( | |
| value="Filter π", | |
| elem_id="filter-button", | |
| elem_classes="boxed-option", | |
| ) | |
| return ( | |
| filter_button, | |
| machine_textbox, | |
| score_slider, | |
| memory_slider, | |
| backend_checkboxes, | |
| datatype_checkboxes, | |
| optimization_checkboxes, | |
| quantization_checkboxes, | |
| ) | |
| def filter_fn( | |
| machine, | |
| # inputs | |
| score, | |
| memory, | |
| backends, | |
| datatypes, | |
| optimizations, | |
| quantizations, | |
| # interactive | |
| columns, | |
| search, | |
| ): | |
| raw_df = get_llm_perf_df(machine=machine) | |
| filtered_df = raw_df[ | |
| # raw_df["Model π€"].str.contains(model, case=False) | |
| raw_df["Backend π"].isin(backends) | |
| & raw_df["DType π₯"].isin(datatypes) | |
| & raw_df["Optimization π οΈ"].isin(optimizations) | |
| & raw_df["Quantization ποΈ"].isin(quantizations) | |
| & (raw_df["Open LLM Score (%)"] >= score) | |
| & (raw_df["Allocated Memory (MB)"] <= memory) | |
| ] | |
| filtered_leaderboard_df = select_fn(machine, columns, search) | |
| filtered_lat_score_mem_fig = get_lat_score_mem_fig(filtered_df) | |
| filtered_bt_prefill_fig = get_bt_prefill_fig(filtered_df) | |
| filtered_bt_decode_fig = get_bt_decode_fig(filtered_df) | |
| filtered_fa2_prefill_fig = get_fa2_prefill_fig(filtered_df) | |
| filtered_fa2_decode_fig = get_fa2_decode_fig(filtered_df) | |
| filtered_quant_prefill_fig = get_quant_prefill_fig(filtered_df) | |
| filtered_quant_decode_fig = get_quant_decode_fig(filtered_df) | |
| return [ | |
| filtered_leaderboard_df, | |
| filtered_lat_score_mem_fig, | |
| filtered_bt_prefill_fig, | |
| filtered_bt_decode_fig, | |
| filtered_fa2_prefill_fig, | |
| filtered_fa2_decode_fig, | |
| filtered_quant_prefill_fig, | |
| filtered_quant_decode_fig, | |
| ] | |
| def create_control_callback( | |
| # button | |
| filter_button, | |
| # fixed | |
| machine_textbox, | |
| # inputs | |
| score_slider, | |
| memory_slider, | |
| backend_checkboxes, | |
| datatype_checkboxes, | |
| optimization_checkboxes, | |
| quantization_checkboxes, | |
| # interactive | |
| columns_checkboxes, | |
| search_bar, | |
| # outputs | |
| leaderboard_table, | |
| lat_score_mem_plot, | |
| bt_prefill_plot, | |
| bt_decode_plot, | |
| fa2_prefill_plot, | |
| fa2_decode_plot, | |
| quant_prefill_plot, | |
| quant_decode_plot, | |
| ): | |
| filter_button.click( | |
| fn=filter_fn, | |
| inputs=[ | |
| # fixed | |
| machine_textbox, | |
| # inputs | |
| score_slider, | |
| memory_slider, | |
| backend_checkboxes, | |
| datatype_checkboxes, | |
| optimization_checkboxes, | |
| quantization_checkboxes, | |
| # interactive | |
| columns_checkboxes, | |
| search_bar, | |
| ], | |
| outputs=[ | |
| leaderboard_table, | |
| lat_score_mem_plot, | |
| bt_prefill_plot, | |
| bt_decode_plot, | |
| fa2_prefill_plot, | |
| fa2_decode_plot, | |
| quant_prefill_plot, | |
| quant_decode_plot, | |
| ], | |
| ) | |
| def select_fn(machine, columns, search): | |
| raw_df = get_llm_perf_df(machine=machine) | |
| selected_leaderboard_df = get_leaderboard_df(raw_df) | |
| selected_leaderboard_df = selected_leaderboard_df[columns] | |
| selected_leaderboard_df = selected_leaderboard_df[ | |
| selected_leaderboard_df["Model π€"].str.contains(search, case=False) | |
| ] | |
| return selected_leaderboard_df | |
| def create_select_callback( | |
| # fixed | |
| machine_textbox, | |
| # interactive | |
| columns_checkboxes, | |
| search_bar, | |
| # outputs | |
| leaderboard_table, | |
| ): | |
| columns_checkboxes.change( | |
| fn=select_fn, | |
| inputs=[machine_textbox, columns_checkboxes, search_bar], | |
| outputs=[leaderboard_table], | |
| ) | |
| search_bar.change( | |
| fn=select_fn, | |
| inputs=[machine_textbox, columns_checkboxes, search_bar], | |
| outputs=[leaderboard_table], | |
| ) | |