Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -352,58 +352,21 @@ def set_example_metric(metric_name):
|
|
| 352 |
return [
|
| 353 |
DEFAULT_EVAL_PROMPT,
|
| 354 |
DEFAULT_INPUT,
|
| 355 |
-
DEFAULT_RESPONSE
|
| 356 |
-
metric_name # Return the selected metric name
|
| 357 |
]
|
| 358 |
|
| 359 |
metric_data = EXAMPLE_METRICS[metric_name]
|
| 360 |
return [
|
| 361 |
metric_data["prompt"],
|
| 362 |
metric_data["input"],
|
| 363 |
-
metric_data["response"]
|
| 364 |
-
metric_name # Return the selected metric name
|
| 365 |
]
|
| 366 |
|
| 367 |
# Select random metric at startup
|
| 368 |
def get_random_metric():
|
| 369 |
metrics = list(EXAMPLE_METRICS.keys())
|
| 370 |
-
|
| 371 |
-
return set_example_metric(selected_metric)
|
| 372 |
-
|
| 373 |
-
# Add this CSS to your CSS_STYLES constant or create it if it doesn't exist
|
| 374 |
-
CSS_STYLES = """
|
| 375 |
-
... existing styles ...
|
| 376 |
-
.selected-button {
|
| 377 |
-
background-color: #2B3A55 !important;
|
| 378 |
-
color: white !important;
|
| 379 |
-
}
|
| 380 |
-
"""
|
| 381 |
-
|
| 382 |
-
# Modify the set_example_metric function to return the metric name
|
| 383 |
-
def set_example_metric(metric_name):
|
| 384 |
-
if metric_name == "Custom":
|
| 385 |
-
return [
|
| 386 |
-
DEFAULT_EVAL_PROMPT,
|
| 387 |
-
DEFAULT_INPUT,
|
| 388 |
-
DEFAULT_RESPONSE,
|
| 389 |
-
metric_name # Return the selected metric name
|
| 390 |
-
]
|
| 391 |
-
|
| 392 |
-
metric_data = EXAMPLE_METRICS[metric_name]
|
| 393 |
-
return [
|
| 394 |
-
metric_data["prompt"],
|
| 395 |
-
metric_data["input"],
|
| 396 |
-
metric_data["response"],
|
| 397 |
-
metric_name # Return the selected metric name
|
| 398 |
-
]
|
| 399 |
-
|
| 400 |
-
# Modify get_random_metric to return the selected metric
|
| 401 |
-
def get_random_metric():
|
| 402 |
-
metrics = list(EXAMPLE_METRICS.keys())
|
| 403 |
-
selected_metric = random.choice(metrics)
|
| 404 |
-
return set_example_metric(selected_metric)
|
| 405 |
|
| 406 |
-
# In your Gradio interface setup, add a State for tracking the selected metric
|
| 407 |
with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
|
| 408 |
judge_id = gr.State(get_new_session_id())
|
| 409 |
gr.Markdown(MAIN_TITLE)
|
|
@@ -678,53 +641,41 @@ with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
|
|
| 678 |
outputs=[leaderboard_table, stats_display]
|
| 679 |
)
|
| 680 |
|
| 681 |
-
#
|
| 682 |
-
def update_button_states(metric_name):
|
| 683 |
-
results = set_example_metric(metric_name)
|
| 684 |
-
button_states = {
|
| 685 |
-
"Custom": False,
|
| 686 |
-
"Hallucination": False,
|
| 687 |
-
"Precision": False,
|
| 688 |
-
"Recall": False,
|
| 689 |
-
"Logical coherence": False,
|
| 690 |
-
"Faithfulness": False
|
| 691 |
-
}
|
| 692 |
-
button_states[metric_name] = True
|
| 693 |
-
return [
|
| 694 |
-
results[0], # eval_prompt
|
| 695 |
-
results[1], # variable_rows[0][1]
|
| 696 |
-
results[2], # variable_rows[1][1]
|
| 697 |
-
*[gr.update(variant="primary" if button_states[m] else "secondary")
|
| 698 |
-
for m in button_states.keys()]
|
| 699 |
-
]
|
| 700 |
-
|
| 701 |
-
# Update the button click handlers
|
| 702 |
custom_btn.click(
|
| 703 |
-
fn=lambda:
|
| 704 |
-
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]
|
| 705 |
-
custom_btn, hallucination_btn, precision_btn, recall_btn,
|
| 706 |
-
coherence_btn, faithfulness_btn]
|
| 707 |
)
|
| 708 |
|
| 709 |
-
# Repeat for other buttons...
|
| 710 |
hallucination_btn.click(
|
| 711 |
-
fn=lambda:
|
| 712 |
-
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]
|
| 713 |
-
|
| 714 |
-
|
|
|
|
|
|
|
|
|
|
| 715 |
)
|
| 716 |
|
| 717 |
-
|
|
|
|
|
|
|
|
|
|
| 718 |
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 722 |
|
|
|
|
| 723 |
demo.load(
|
| 724 |
-
fn=
|
| 725 |
-
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]
|
| 726 |
-
custom_btn, hallucination_btn, precision_btn, recall_btn,
|
| 727 |
-
coherence_btn, faithfulness_btn]
|
| 728 |
)
|
| 729 |
|
| 730 |
demo.launch()
|
|
|
|
| 352 |
return [
|
| 353 |
DEFAULT_EVAL_PROMPT,
|
| 354 |
DEFAULT_INPUT,
|
| 355 |
+
DEFAULT_RESPONSE
|
|
|
|
| 356 |
]
|
| 357 |
|
| 358 |
metric_data = EXAMPLE_METRICS[metric_name]
|
| 359 |
return [
|
| 360 |
metric_data["prompt"],
|
| 361 |
metric_data["input"],
|
| 362 |
+
metric_data["response"]
|
|
|
|
| 363 |
]
|
| 364 |
|
| 365 |
# Select random metric at startup
|
| 366 |
def get_random_metric():
|
| 367 |
metrics = list(EXAMPLE_METRICS.keys())
|
| 368 |
+
return set_example_metric(random.choice(metrics))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
|
|
|
| 370 |
with gr.Blocks(theme='default', css=CSS_STYLES) as demo:
|
| 371 |
judge_id = gr.State(get_new_session_id())
|
| 372 |
gr.Markdown(MAIN_TITLE)
|
|
|
|
| 641 |
outputs=[leaderboard_table, stats_display]
|
| 642 |
)
|
| 643 |
|
| 644 |
+
# Add click handlers for metric buttons
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
custom_btn.click(
|
| 646 |
+
fn=lambda: set_example_metric("Custom"),
|
| 647 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
|
|
|
|
|
|
| 648 |
)
|
| 649 |
|
|
|
|
| 650 |
hallucination_btn.click(
|
| 651 |
+
fn=lambda: set_example_metric("Hallucination"),
|
| 652 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
+
precision_btn.click(
|
| 656 |
+
fn=lambda: set_example_metric("Precision"),
|
| 657 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
| 658 |
)
|
| 659 |
|
| 660 |
+
recall_btn.click(
|
| 661 |
+
fn=lambda: set_example_metric("Recall"),
|
| 662 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
| 663 |
+
)
|
| 664 |
|
| 665 |
+
coherence_btn.click(
|
| 666 |
+
fn=lambda: set_example_metric("Logical coherence"),
|
| 667 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
faithfulness_btn.click(
|
| 671 |
+
fn=lambda: set_example_metric("Faithfulness"),
|
| 672 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
| 673 |
+
)
|
| 674 |
|
| 675 |
+
# Set random metric at startup
|
| 676 |
demo.load(
|
| 677 |
+
fn=get_random_metric,
|
| 678 |
+
outputs=[eval_prompt, variable_rows[0][1], variable_rows[1][1]]
|
|
|
|
|
|
|
| 679 |
)
|
| 680 |
|
| 681 |
demo.launch()
|