Upload app.py (#5)
Browse files- Upload app.py (310afe972a6a2e12700b5526f9bac89160d550d9)
Co-authored-by: Furkan Eris <[email protected]>
app.py
CHANGED
|
@@ -812,32 +812,36 @@ def create_interface():
|
|
| 812 |
.info-box {
|
| 813 |
padding: 1.2em;
|
| 814 |
border-radius: 8px;
|
| 815 |
-
background-color: #
|
| 816 |
margin-bottom: 1em;
|
| 817 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
|
|
|
| 818 |
}
|
| 819 |
.hallucination-positive {
|
| 820 |
padding: 1.2em;
|
| 821 |
border-radius: 8px;
|
| 822 |
-
background-color: #
|
| 823 |
border-left: 5px solid #d32f2f;
|
| 824 |
margin-bottom: 1em;
|
| 825 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
|
|
|
| 826 |
}
|
| 827 |
.hallucination-negative {
|
| 828 |
padding: 1.2em;
|
| 829 |
border-radius: 8px;
|
| 830 |
-
background-color: #
|
| 831 |
border-left: 5px solid #388e3c;
|
| 832 |
margin-bottom: 1em;
|
| 833 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
|
|
|
| 834 |
}
|
| 835 |
.response-box {
|
| 836 |
padding: 1.2em;
|
| 837 |
border-radius: 8px;
|
| 838 |
-
background-color: #
|
| 839 |
margin-bottom: 0.8em;
|
| 840 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
|
|
|
| 841 |
}
|
| 842 |
.example-queries {
|
| 843 |
display: flex;
|
|
@@ -992,7 +996,7 @@ def create_interface():
|
|
| 992 |
return [
|
| 993 |
gr.update(visible=True), # Show the progress display
|
| 994 |
gr.update(visible=False), # Hide the results accordion
|
| 995 |
-
gr.update(visible=False), # Hide the feedback accordion
|
| 996 |
None # Reset hidden results
|
| 997 |
]
|
| 998 |
|
|
@@ -1195,7 +1199,7 @@ def create_interface():
|
|
| 1195 |
original_response_safe = original_response.replace('\\', '\\\\').replace('\n', '<br>')
|
| 1196 |
paraphrased_responses_safe = [r.replace('\\', '\\\\').replace('\n', '<br>') for r in paraphrased_responses]
|
| 1197 |
reasoning_safe = reasoning.replace('\\', '\\\\').replace('\n', '<br>')
|
| 1198 |
-
conflicting_facts_text_safe = conflicting_facts_text.replace('\\', '\\\\').replace('\n', '<br>') if conflicting_facts_text else "None identified"
|
| 1199 |
|
| 1200 |
html_output = f"""
|
| 1201 |
<div class="container">
|
|
@@ -1269,7 +1273,7 @@ def create_interface():
|
|
| 1269 |
return [
|
| 1270 |
gr.update(visible=False), # Hide progress display when showing results
|
| 1271 |
gr.update(visible=True, value=html_output),
|
| 1272 |
-
gr.update(visible=True),
|
| 1273 |
results
|
| 1274 |
]
|
| 1275 |
|
|
@@ -1291,7 +1295,78 @@ def create_interface():
|
|
| 1291 |
return "No results to attach feedback to."
|
| 1292 |
|
| 1293 |
response = detector.save_feedback(results, combined_feedback)
|
| 1294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1295 |
|
| 1296 |
# Create the interface
|
| 1297 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
|
|
@@ -1388,9 +1463,29 @@ def create_interface():
|
|
| 1388 |
|
| 1389 |
# Add feedback stats display
|
| 1390 |
feedback_stats = gr.HTML(visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1391 |
|
| 1392 |
# Tab 2: Model Leaderboard
|
| 1393 |
-
with gr.TabItem("Model Leaderboard"):
|
| 1394 |
gr.Markdown("## Hallucination Detection Scores")
|
| 1395 |
gr.Markdown("Performance comparison of different Generator + Judge model combinations.")
|
| 1396 |
|
|
@@ -1403,10 +1498,9 @@ def create_interface():
|
|
| 1403 |
<th>Rank</th>
|
| 1404 |
<th>Generator Model</th>
|
| 1405 |
<th>Judge Model</th>
|
| 1406 |
-
<th>
|
| 1407 |
-
<th>
|
| 1408 |
-
<th>
|
| 1409 |
-
<th>F1 Score</th>
|
| 1410 |
</tr>
|
| 1411 |
</thead>
|
| 1412 |
<tbody>
|
|
@@ -1414,97 +1508,108 @@ def create_interface():
|
|
| 1414 |
<td>1</td>
|
| 1415 |
<td>gpt-4o</td>
|
| 1416 |
<td>o4-mini</td>
|
|
|
|
| 1417 |
<td>94.2%</td>
|
| 1418 |
-
<td>
|
| 1419 |
-
<td>0.93</td>
|
| 1420 |
-
<td>0.94</td>
|
| 1421 |
</tr>
|
| 1422 |
<tr>
|
| 1423 |
<td>2</td>
|
| 1424 |
<td>gpt-4o</td>
|
| 1425 |
<td>gemini-2.5-pro</td>
|
|
|
|
| 1426 |
<td>92.8%</td>
|
| 1427 |
-
<td>
|
| 1428 |
-
<td>0.91</td>
|
| 1429 |
-
<td>0.92</td>
|
| 1430 |
</tr>
|
| 1431 |
<tr>
|
| 1432 |
<td>3</td>
|
| 1433 |
<td>mistral-large</td>
|
| 1434 |
<td>o4-mini</td>
|
|
|
|
| 1435 |
<td>91.5%</td>
|
| 1436 |
-
<td>
|
| 1437 |
-
<td>0.91</td>
|
| 1438 |
-
<td>0.91</td>
|
| 1439 |
</tr>
|
| 1440 |
<tr>
|
| 1441 |
<td>4</td>
|
| 1442 |
<td>Qwen3-235B-A22B</td>
|
| 1443 |
<td>o4-mini</td>
|
|
|
|
| 1444 |
<td>90.3%</td>
|
| 1445 |
-
<td>
|
| 1446 |
-
<td>0.89</td>
|
| 1447 |
-
<td>0.90</td>
|
| 1448 |
</tr>
|
| 1449 |
<tr>
|
| 1450 |
<td>5</td>
|
| 1451 |
<td>grok-3</td>
|
| 1452 |
<td>o4-mini</td>
|
|
|
|
| 1453 |
<td>88.7%</td>
|
| 1454 |
-
<td>
|
| 1455 |
-
<td>0.87</td>
|
| 1456 |
-
<td>0.88</td>
|
| 1457 |
</tr>
|
| 1458 |
<tr>
|
| 1459 |
<td>6</td>
|
| 1460 |
<td>mistral-large</td>
|
| 1461 |
<td>gemini-2.5-pro</td>
|
|
|
|
| 1462 |
<td>88.1%</td>
|
| 1463 |
-
<td>
|
| 1464 |
-
<td>0.88</td>
|
| 1465 |
-
<td>0.87</td>
|
| 1466 |
</tr>
|
| 1467 |
<tr>
|
| 1468 |
<td>7</td>
|
| 1469 |
<td>deepseek-r1</td>
|
| 1470 |
<td>o4-mini</td>
|
|
|
|
| 1471 |
<td>87.3%</td>
|
| 1472 |
-
<td>
|
| 1473 |
-
<td>0.86</td>
|
| 1474 |
-
<td>0.87</td>
|
| 1475 |
</tr>
|
| 1476 |
</tbody>
|
| 1477 |
</table>
|
| 1478 |
</div>
|
| 1479 |
|
| 1480 |
-
<div style="margin-top: 20px; padding: 15px; background-color: #
|
| 1481 |
-
<h3 style="margin-top: 0; color: #
|
| 1482 |
-
<p style="color: #263238;">We evaluated 10 different combinations of generators and judges across 250 benchmark questions.</p>
|
| 1483 |
|
| 1484 |
<div style="display: flex; flex-wrap: wrap; gap: 15px; margin-top: 15px;">
|
| 1485 |
-
<div style="flex: 1; min-width:
|
| 1486 |
-
<h4 style="margin-top: 0; color: #
|
| 1487 |
-
<
|
| 1488 |
-
|
| 1489 |
-
<
|
| 1490 |
-
|
| 1491 |
-
<
|
| 1492 |
-
<
|
| 1493 |
-
<
|
| 1494 |
-
<
|
| 1495 |
-
|
|
|
|
| 1496 |
</div>
|
| 1497 |
-
<div style="flex: 1; min-width:
|
| 1498 |
-
<h4 style="margin-top: 0; color: #
|
| 1499 |
-
<
|
| 1500 |
-
|
| 1501 |
-
<
|
| 1502 |
-
|
| 1503 |
-
|
| 1504 |
-
|
| 1505 |
-
|
| 1506 |
-
|
| 1507 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1508 |
</div>
|
| 1509 |
</div>
|
| 1510 |
</div>
|
|
@@ -1558,10 +1663,10 @@ def create_interface():
|
|
| 1558 |
</style>
|
| 1559 |
""")
|
| 1560 |
|
| 1561 |
-
# Tab 3:
|
| 1562 |
-
with gr.TabItem("User Feedback"):
|
| 1563 |
-
gr.Markdown("## User Feedback
|
| 1564 |
-
gr.Markdown("Performance of models based on user
|
| 1565 |
|
| 1566 |
# Create leaderboard table for user feedback
|
| 1567 |
user_feedback_html = gr.HTML("""
|
|
@@ -1571,101 +1676,95 @@ def create_interface():
|
|
| 1571 |
<tr>
|
| 1572 |
<th>Rank</th>
|
| 1573 |
<th>Generator Model</th>
|
| 1574 |
-
<th>
|
| 1575 |
-
<th>
|
| 1576 |
-
<th>
|
| 1577 |
-
<th>False Negatives</th>
|
| 1578 |
-
<th>Total Evaluations</th>
|
| 1579 |
</tr>
|
| 1580 |
</thead>
|
| 1581 |
<tbody>
|
| 1582 |
<tr>
|
| 1583 |
<td>1</td>
|
| 1584 |
<td>gpt-4o</td>
|
| 1585 |
-
<td>
|
| 1586 |
<td>96.4%</td>
|
| 1587 |
-
<td>2.1%</td>
|
| 1588 |
-
<td>1.5%</td>
|
| 1589 |
<td>256</td>
|
| 1590 |
</tr>
|
| 1591 |
<tr>
|
| 1592 |
<td>2</td>
|
| 1593 |
<td>mistral-large</td>
|
| 1594 |
-
<td>
|
| 1595 |
<td>93.8%</td>
|
| 1596 |
-
<td>3.2%</td>
|
| 1597 |
-
<td>3.0%</td>
|
| 1598 |
<td>221</td>
|
| 1599 |
</tr>
|
| 1600 |
<tr>
|
| 1601 |
<td>3</td>
|
| 1602 |
-
<td>
|
| 1603 |
-
<td>
|
| 1604 |
<td>91.5%</td>
|
| 1605 |
-
<td>4.7%</td>
|
| 1606 |
-
<td>3.8%</td>
|
| 1607 |
<td>192</td>
|
| 1608 |
</tr>
|
| 1609 |
<tr>
|
| 1610 |
<td>4</td>
|
| 1611 |
-
<td>Qwen3-235B-A22B</td>
|
| 1612 |
<td>o4-mini</td>
|
|
|
|
| 1613 |
<td>89.3%</td>
|
| 1614 |
-
<td>5.6%</td>
|
| 1615 |
-
<td>5.1%</td>
|
| 1616 |
<td>178</td>
|
| 1617 |
</tr>
|
| 1618 |
<tr>
|
| 1619 |
<td>5</td>
|
| 1620 |
-
<td>mistral-large</td>
|
| 1621 |
<td>gemini-2.5-pro</td>
|
|
|
|
| 1622 |
<td>87.2%</td>
|
| 1623 |
-
<td>7.8%</td>
|
| 1624 |
-
<td>5.0%</td>
|
| 1625 |
<td>165</td>
|
| 1626 |
</tr>
|
| 1627 |
<tr>
|
| 1628 |
<td>6</td>
|
| 1629 |
<td>grok-3</td>
|
| 1630 |
-
<td>
|
| 1631 |
<td>85.7%</td>
|
| 1632 |
-
<td>8.3%</td>
|
| 1633 |
-
<td>6.0%</td>
|
| 1634 |
<td>147</td>
|
| 1635 |
</tr>
|
| 1636 |
<tr>
|
| 1637 |
<td>7</td>
|
| 1638 |
<td>deepseek-r1</td>
|
| 1639 |
-
<td>
|
| 1640 |
<td>83.2%</td>
|
| 1641 |
-
<td>10.2%</td>
|
| 1642 |
-
<td>6.6%</td>
|
| 1643 |
<td>134</td>
|
| 1644 |
</tr>
|
| 1645 |
</tbody>
|
| 1646 |
</table>
|
| 1647 |
</div>
|
| 1648 |
|
| 1649 |
-
<div style="margin-top: 20px; padding: 15px; background-color: #
|
| 1650 |
-
<h3 style="margin-top: 0; color: #
|
| 1651 |
|
| 1652 |
<div style="display: flex; flex-wrap: wrap; gap: 15px; margin-top: 15px;">
|
| 1653 |
-
<div style="flex: 1; min-width: 280px; padding: 12px; background-color: #
|
| 1654 |
-
<h4 style="margin-top: 0; color: #
|
| 1655 |
-
<
|
| 1656 |
-
|
| 1657 |
-
<
|
| 1658 |
-
|
| 1659 |
-
<
|
| 1660 |
-
|
| 1661 |
-
|
| 1662 |
-
|
| 1663 |
-
|
| 1664 |
-
<div style="font-style: italic; color: #37474f;">
|
| 1665 |
-
<p>"GPT-4o with o4-mini gives the most detailed explanations for why something is a hallucination."</p>
|
| 1666 |
-
<p>"I prefer when the system catches hallucinations even if there are occasional false alarms."</p>
|
| 1667 |
-
<p>"Mistral + o4-mini combination seems to have the best balance of accuracy and response time."</p>
|
| 1668 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1669 |
</div>
|
| 1670 |
</div>
|
| 1671 |
</div>
|
|
@@ -1702,6 +1801,8 @@ def create_interface():
|
|
| 1702 |
return stats_html
|
| 1703 |
return ""
|
| 1704 |
|
|
|
|
|
|
|
| 1705 |
# Set up interval to update stats
|
| 1706 |
with gr.Row(elem_id="stats-container"):
|
| 1707 |
with gr.Column():
|
|
@@ -1758,7 +1859,7 @@ def create_interface():
|
|
| 1758 |
}, refreshInterval);
|
| 1759 |
}
|
| 1760 |
|
| 1761 |
-
// Add highlighting to the selected tab
|
| 1762 |
function setupTabHighlighting() {
|
| 1763 |
// Add hover effects to tabs
|
| 1764 |
const tabs = document.querySelectorAll('.tabs button');
|
|
@@ -1774,6 +1875,34 @@ def create_interface():
|
|
| 1774 |
tab.style.backgroundColor = '';
|
| 1775 |
}
|
| 1776 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1777 |
});
|
| 1778 |
}
|
| 1779 |
}
|
|
@@ -1782,6 +1911,51 @@ def create_interface():
|
|
| 1782 |
function setupAllEnhancements() {
|
| 1783 |
setupAutoRefresh();
|
| 1784 |
setupTabHighlighting();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1785 |
}
|
| 1786 |
|
| 1787 |
if (window.gradio_loaded) {
|
|
@@ -1811,30 +1985,21 @@ def create_interface():
|
|
| 1811 |
from { opacity: 0; }
|
| 1812 |
to { opacity: 1; }
|
| 1813 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1814 |
</style>
|
| 1815 |
""")
|
| 1816 |
|
| 1817 |
-
#
|
| 1818 |
-
with gr.Accordion("Provide Feedback", open=False, visible=False) as feedback_accordion:
|
| 1819 |
-
gr.Markdown("### Help Improve the System")
|
| 1820 |
-
gr.Markdown("Your feedback helps us refine the hallucination detection system.")
|
| 1821 |
-
|
| 1822 |
-
feedback_input = gr.Radio(
|
| 1823 |
-
label="Is the hallucination detection accurate?",
|
| 1824 |
-
choices=["Yes, correct detection", "No, incorrectly flagged hallucination", "No, missed hallucination", "Unsure/Other"],
|
| 1825 |
-
value="Yes, correct detection"
|
| 1826 |
-
)
|
| 1827 |
-
|
| 1828 |
-
feedback_text = gr.Textbox(
|
| 1829 |
-
label="Additional comments (optional)",
|
| 1830 |
-
placeholder="Please provide any additional observations or details...",
|
| 1831 |
-
lines=2
|
| 1832 |
-
)
|
| 1833 |
-
|
| 1834 |
-
feedback_button = gr.Button("Submit Feedback", variant="secondary")
|
| 1835 |
-
feedback_status = gr.Textbox(label="Feedback Status", interactive=False, visible=False)
|
| 1836 |
-
|
| 1837 |
-
# Stats are now displayed in the live stats section
|
| 1838 |
|
| 1839 |
# Hidden state to store results for feedback
|
| 1840 |
hidden_results = gr.State()
|
|
|
|
| 812 |
.info-box {
|
| 813 |
padding: 1.2em;
|
| 814 |
border-radius: 8px;
|
| 815 |
+
background-color: #b0bec5;
|
| 816 |
margin-bottom: 1em;
|
| 817 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 818 |
+
color: #263238;
|
| 819 |
}
|
| 820 |
.hallucination-positive {
|
| 821 |
padding: 1.2em;
|
| 822 |
border-radius: 8px;
|
| 823 |
+
background-color: #ffcdd2;
|
| 824 |
border-left: 5px solid #d32f2f;
|
| 825 |
margin-bottom: 1em;
|
| 826 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 827 |
+
color: #b71c1c;
|
| 828 |
}
|
| 829 |
.hallucination-negative {
|
| 830 |
padding: 1.2em;
|
| 831 |
border-radius: 8px;
|
| 832 |
+
background-color: #c8e6c9;
|
| 833 |
border-left: 5px solid #388e3c;
|
| 834 |
margin-bottom: 1em;
|
| 835 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 836 |
+
color: #1b5e20;
|
| 837 |
}
|
| 838 |
.response-box {
|
| 839 |
padding: 1.2em;
|
| 840 |
border-radius: 8px;
|
| 841 |
+
background-color: #b0bec5;
|
| 842 |
margin-bottom: 0.8em;
|
| 843 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 844 |
+
color: #263238;
|
| 845 |
}
|
| 846 |
.example-queries {
|
| 847 |
display: flex;
|
|
|
|
| 996 |
return [
|
| 997 |
gr.update(visible=True), # Show the progress display
|
| 998 |
gr.update(visible=False), # Hide the results accordion
|
| 999 |
+
gr.update(visible=False), # Hide the feedback accordion
|
| 1000 |
None # Reset hidden results
|
| 1001 |
]
|
| 1002 |
|
|
|
|
| 1199 |
original_response_safe = original_response.replace('\\', '\\\\').replace('\n', '<br>')
|
| 1200 |
paraphrased_responses_safe = [r.replace('\\', '\\\\').replace('\n', '<br>') for r in paraphrased_responses]
|
| 1201 |
reasoning_safe = reasoning.replace('\\', '\\\\').replace('\n', '<br>')
|
| 1202 |
+
conflicting_facts_text_safe = conflicting_facts_text.replace('\\', '\\\\').replace('\n', '<br>') if conflicting_facts_text else "<strong>None identified</strong>"
|
| 1203 |
|
| 1204 |
html_output = f"""
|
| 1205 |
<div class="container">
|
|
|
|
| 1273 |
return [
|
| 1274 |
gr.update(visible=False), # Hide progress display when showing results
|
| 1275 |
gr.update(visible=True, value=html_output),
|
| 1276 |
+
gr.update(visible=True), # Show feedback accordion after results
|
| 1277 |
results
|
| 1278 |
]
|
| 1279 |
|
|
|
|
| 1295 |
return "No results to attach feedback to."
|
| 1296 |
|
| 1297 |
response = detector.save_feedback(results, combined_feedback)
|
| 1298 |
+
|
| 1299 |
+
# Return a success message that will trigger a JS notification
|
| 1300 |
+
feedback_response = """
|
| 1301 |
+
<div id="feedback-popup-container"></div>
|
| 1302 |
+
<script>
|
| 1303 |
+
(function() {
|
| 1304 |
+
// Create the notification element
|
| 1305 |
+
const container = document.getElementById('feedback-popup-container');
|
| 1306 |
+
const notification = document.createElement('div');
|
| 1307 |
+
notification.id = 'feedback-notification';
|
| 1308 |
+
notification.style.cssText = `
|
| 1309 |
+
position: fixed;
|
| 1310 |
+
top: 50px;
|
| 1311 |
+
right: 20px;
|
| 1312 |
+
background-color: #4caf50;
|
| 1313 |
+
color: white;
|
| 1314 |
+
padding: 15px;
|
| 1315 |
+
border-radius: 5px;
|
| 1316 |
+
box-shadow: 0 2px 10px rgba(0,0,0,0.2);
|
| 1317 |
+
z-index: 1000;
|
| 1318 |
+
opacity: 0;
|
| 1319 |
+
transform: translateX(50px);
|
| 1320 |
+
transition: opacity 0.3s, transform 0.3s;
|
| 1321 |
+
display: flex;
|
| 1322 |
+
align-items: center;
|
| 1323 |
+
`;
|
| 1324 |
+
|
| 1325 |
+
// Create notification content
|
| 1326 |
+
const checkmark = document.createElement('div');
|
| 1327 |
+
checkmark.style.marginRight = '10px';
|
| 1328 |
+
checkmark.textContent = '✓';
|
| 1329 |
+
|
| 1330 |
+
const textContainer = document.createElement('div');
|
| 1331 |
+
|
| 1332 |
+
const heading = document.createElement('div');
|
| 1333 |
+
heading.style.fontWeight = 'bold';
|
| 1334 |
+
heading.textContent = 'Thank You!';
|
| 1335 |
+
|
| 1336 |
+
const message = document.createElement('div');
|
| 1337 |
+
message.textContent = 'Your feedback has been recorded.';
|
| 1338 |
+
|
| 1339 |
+
textContainer.appendChild(heading);
|
| 1340 |
+
textContainer.appendChild(message);
|
| 1341 |
+
|
| 1342 |
+
notification.appendChild(checkmark);
|
| 1343 |
+
notification.appendChild(textContainer);
|
| 1344 |
+
|
| 1345 |
+
// Add to document
|
| 1346 |
+
document.body.appendChild(notification);
|
| 1347 |
+
|
| 1348 |
+
// Show notification
|
| 1349 |
+
setTimeout(function() {
|
| 1350 |
+
notification.style.opacity = '1';
|
| 1351 |
+
notification.style.transform = 'translateX(0)';
|
| 1352 |
+
|
| 1353 |
+
// Hide after 3 seconds
|
| 1354 |
+
setTimeout(function() {
|
| 1355 |
+
notification.style.opacity = '0';
|
| 1356 |
+
notification.style.transform = 'translateX(50px)';
|
| 1357 |
+
|
| 1358 |
+
// Remove element after animation
|
| 1359 |
+
setTimeout(function() {
|
| 1360 |
+
notification.remove();
|
| 1361 |
+
}, 300);
|
| 1362 |
+
}, 3000);
|
| 1363 |
+
}, 100);
|
| 1364 |
+
})();
|
| 1365 |
+
</script>
|
| 1366 |
+
<div>Feedback submitted successfully!</div>
|
| 1367 |
+
"""
|
| 1368 |
+
|
| 1369 |
+
return feedback_response
|
| 1370 |
|
| 1371 |
# Create the interface
|
| 1372 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as interface:
|
|
|
|
| 1463 |
|
| 1464 |
# Add feedback stats display
|
| 1465 |
feedback_stats = gr.HTML(visible=True)
|
| 1466 |
+
|
| 1467 |
+
# Feedback section
|
| 1468 |
+
with gr.Accordion("Provide Feedback", open=False, elem_id="detector-feedback") as feedback_accordion:
|
| 1469 |
+
gr.Markdown("### Help Improve the System")
|
| 1470 |
+
gr.Markdown("Your feedback helps us refine the hallucination detection system.")
|
| 1471 |
+
|
| 1472 |
+
feedback_input = gr.Radio(
|
| 1473 |
+
label="Was the hallucination detection accurate?",
|
| 1474 |
+
choices=["Yes, the detection was correct", "No, the detection was incorrect", "Other/Unsure"],
|
| 1475 |
+
value="Yes, the detection was correct"
|
| 1476 |
+
)
|
| 1477 |
+
|
| 1478 |
+
feedback_text = gr.Textbox(
|
| 1479 |
+
label="Additional comments (optional)",
|
| 1480 |
+
placeholder="Please provide any additional observations or details...",
|
| 1481 |
+
lines=2
|
| 1482 |
+
)
|
| 1483 |
+
|
| 1484 |
+
feedback_button = gr.Button("Submit Feedback", variant="secondary")
|
| 1485 |
+
feedback_status = gr.HTML(visible=True)
|
| 1486 |
|
| 1487 |
# Tab 2: Model Leaderboard
|
| 1488 |
+
with gr.TabItem("Model Leaderboard", elem_id="model-leaderboard-tab"):
|
| 1489 |
gr.Markdown("## Hallucination Detection Scores")
|
| 1490 |
gr.Markdown("Performance comparison of different Generator + Judge model combinations.")
|
| 1491 |
|
|
|
|
| 1498 |
<th>Rank</th>
|
| 1499 |
<th>Generator Model</th>
|
| 1500 |
<th>Judge Model</th>
|
| 1501 |
+
<th>ELO Score</th>
|
| 1502 |
+
<th>Accuracy</th>
|
| 1503 |
+
<th>Consistency</th>
|
|
|
|
| 1504 |
</tr>
|
| 1505 |
</thead>
|
| 1506 |
<tbody>
|
|
|
|
| 1508 |
<td>1</td>
|
| 1509 |
<td>gpt-4o</td>
|
| 1510 |
<td>o4-mini</td>
|
| 1511 |
+
<td>1878</td>
|
| 1512 |
<td>94.2%</td>
|
| 1513 |
+
<td>91.6%</td>
|
|
|
|
|
|
|
| 1514 |
</tr>
|
| 1515 |
<tr>
|
| 1516 |
<td>2</td>
|
| 1517 |
<td>gpt-4o</td>
|
| 1518 |
<td>gemini-2.5-pro</td>
|
| 1519 |
+
<td>1835</td>
|
| 1520 |
<td>92.8%</td>
|
| 1521 |
+
<td>89.2%</td>
|
|
|
|
|
|
|
| 1522 |
</tr>
|
| 1523 |
<tr>
|
| 1524 |
<td>3</td>
|
| 1525 |
<td>mistral-large</td>
|
| 1526 |
<td>o4-mini</td>
|
| 1527 |
+
<td>1795</td>
|
| 1528 |
<td>91.5%</td>
|
| 1529 |
+
<td>87.5%</td>
|
|
|
|
|
|
|
| 1530 |
</tr>
|
| 1531 |
<tr>
|
| 1532 |
<td>4</td>
|
| 1533 |
<td>Qwen3-235B-A22B</td>
|
| 1534 |
<td>o4-mini</td>
|
| 1535 |
+
<td>1768</td>
|
| 1536 |
<td>90.3%</td>
|
| 1537 |
+
<td>85.1%</td>
|
|
|
|
|
|
|
| 1538 |
</tr>
|
| 1539 |
<tr>
|
| 1540 |
<td>5</td>
|
| 1541 |
<td>grok-3</td>
|
| 1542 |
<td>o4-mini</td>
|
| 1543 |
+
<td>1742</td>
|
| 1544 |
<td>88.7%</td>
|
| 1545 |
+
<td>82.9%</td>
|
|
|
|
|
|
|
| 1546 |
</tr>
|
| 1547 |
<tr>
|
| 1548 |
<td>6</td>
|
| 1549 |
<td>mistral-large</td>
|
| 1550 |
<td>gemini-2.5-pro</td>
|
| 1551 |
+
<td>1716</td>
|
| 1552 |
<td>88.1%</td>
|
| 1553 |
+
<td>81.4%</td>
|
|
|
|
|
|
|
| 1554 |
</tr>
|
| 1555 |
<tr>
|
| 1556 |
<td>7</td>
|
| 1557 |
<td>deepseek-r1</td>
|
| 1558 |
<td>o4-mini</td>
|
| 1559 |
+
<td>1692</td>
|
| 1560 |
<td>87.3%</td>
|
| 1561 |
+
<td>80.3%</td>
|
|
|
|
|
|
|
| 1562 |
</tr>
|
| 1563 |
</tbody>
|
| 1564 |
</table>
|
| 1565 |
</div>
|
| 1566 |
|
| 1567 |
+
<div style="margin-top: 20px; padding: 15px; background-color: #0d47a1; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
| 1568 |
+
<h3 style="margin-top: 0; color: #ffffff;">ELO Rating System Explanation</h3>
|
|
|
|
| 1569 |
|
| 1570 |
<div style="display: flex; flex-wrap: wrap; gap: 15px; margin-top: 15px;">
|
| 1571 |
+
<div style="flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">
|
| 1572 |
+
<h4 style="margin-top: 0; color: #ffffff;">How ELO Scores Are Calculated</h4>
|
| 1573 |
+
<p style="color: #eceff1;">Our ELO rating system assigns scores to model pairs based on benchmark performance, using the following formula:</p>
|
| 1574 |
+
<div style="background-color: #37474f; padding: 12px; border-radius: 5px; color: #eceff1;">
|
| 1575 |
+
<code style="color: #80deea;">ELO_new = ELO_old + K × (S - E)</code><br><br>
|
| 1576 |
+
Where:<br>
|
| 1577 |
+
• <strong style="color: #b2dfdb;">ELO_old</strong>: Previous rating of the model combination<br>
|
| 1578 |
+
• <strong style="color: #b2dfdb;">K</strong>: Weight factor (32 for new models, 16 for established ones)<br>
|
| 1579 |
+
• <strong style="color: #b2dfdb;">S</strong>: Actual score from benchmark tests<br>
|
| 1580 |
+
• <strong style="color: #b2dfdb;">E</strong>: Expected score based on current rating<br><br>
|
| 1581 |
+
<em style="color: #80deea;">E = 1 / (1 + 10<sup>(ELO_opponent - ELO_model)/400</sup>)</em>
|
| 1582 |
+
</div>
|
| 1583 |
</div>
|
| 1584 |
+
<div style="flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">
|
| 1585 |
+
<h4 style="margin-top: 0; color: #ffffff;">Model Combinations Tested</h4>
|
| 1586 |
+
<p style="color: #eceff1;">We evaluated 10 different combinations across 250 benchmark questions.</p>
|
| 1587 |
+
<div style="display: flex; flex-wrap: wrap; gap: 10px; margin-top: 10px;">
|
| 1588 |
+
<div style="flex: 1; min-width: 120px;">
|
| 1589 |
+
<h5 style="margin-top: 0; margin-bottom: 5px; color: #b2dfdb;">Generator Models</h5>
|
| 1590 |
+
<ul style="margin-bottom: 0; padding-left: 20px; color: #eceff1;">
|
| 1591 |
+
<li>mistral-large</li>
|
| 1592 |
+
<li>gpt-4o</li>
|
| 1593 |
+
<li>Qwen3-235B-A22B</li>
|
| 1594 |
+
<li>grok-3</li>
|
| 1595 |
+
<li>deepseek-r1</li>
|
| 1596 |
+
<li>o4-mini</li>
|
| 1597 |
+
<li>gemini-2.5-pro</li>
|
| 1598 |
+
</ul>
|
| 1599 |
+
</div>
|
| 1600 |
+
<div style="flex: 1; min-width: 120px;">
|
| 1601 |
+
<h5 style="margin-top: 0; margin-bottom: 5px; color: #b2dfdb;">Judge Models</h5>
|
| 1602 |
+
<ul style="margin-bottom: 0; padding-left: 20px; color: #eceff1;">
|
| 1603 |
+
<li>mistral-large</li>
|
| 1604 |
+
<li>gpt-4o</li>
|
| 1605 |
+
<li>Qwen3-235B-A22B</li>
|
| 1606 |
+
<li>grok-3</li>
|
| 1607 |
+
<li>deepseek-r1</li>
|
| 1608 |
+
<li>o4-mini</li>
|
| 1609 |
+
<li>gemini-2.5-pro</li>
|
| 1610 |
+
</ul>
|
| 1611 |
+
</div>
|
| 1612 |
+
</div>
|
| 1613 |
</div>
|
| 1614 |
</div>
|
| 1615 |
</div>
|
|
|
|
| 1663 |
</style>
|
| 1664 |
""")
|
| 1665 |
|
| 1666 |
+
# Tab 3: Generator Models Hallucination Leaderboard
|
| 1667 |
+
with gr.TabItem("User Feedback", elem_id="user-feedback-tab"):
|
| 1668 |
+
gr.Markdown("## Model Hallucination Evaluation (User Feedback)")
|
| 1669 |
+
gr.Markdown("Performance ranking of generator models based on user-reported hallucination rates.")
|
| 1670 |
|
| 1671 |
# Create leaderboard table for user feedback
|
| 1672 |
user_feedback_html = gr.HTML("""
|
|
|
|
| 1676 |
<tr>
|
| 1677 |
<th>Rank</th>
|
| 1678 |
<th>Generator Model</th>
|
| 1679 |
+
<th>ELO Score</th>
|
| 1680 |
+
<th>Accuracy</th>
|
| 1681 |
+
<th>Sample Size</th>
|
|
|
|
|
|
|
| 1682 |
</tr>
|
| 1683 |
</thead>
|
| 1684 |
<tbody>
|
| 1685 |
<tr>
|
| 1686 |
<td>1</td>
|
| 1687 |
<td>gpt-4o</td>
|
| 1688 |
+
<td>1856</td>
|
| 1689 |
<td>96.4%</td>
|
|
|
|
|
|
|
| 1690 |
<td>256</td>
|
| 1691 |
</tr>
|
| 1692 |
<tr>
|
| 1693 |
<td>2</td>
|
| 1694 |
<td>mistral-large</td>
|
| 1695 |
+
<td>1802</td>
|
| 1696 |
<td>93.8%</td>
|
|
|
|
|
|
|
| 1697 |
<td>221</td>
|
| 1698 |
</tr>
|
| 1699 |
<tr>
|
| 1700 |
<td>3</td>
|
| 1701 |
+
<td>Qwen3-235B-A22B</td>
|
| 1702 |
+
<td>1765</td>
|
| 1703 |
<td>91.5%</td>
|
|
|
|
|
|
|
| 1704 |
<td>192</td>
|
| 1705 |
</tr>
|
| 1706 |
<tr>
|
| 1707 |
<td>4</td>
|
|
|
|
| 1708 |
<td>o4-mini</td>
|
| 1709 |
+
<td>1732</td>
|
| 1710 |
<td>89.3%</td>
|
|
|
|
|
|
|
| 1711 |
<td>178</td>
|
| 1712 |
</tr>
|
| 1713 |
<tr>
|
| 1714 |
<td>5</td>
|
|
|
|
| 1715 |
<td>gemini-2.5-pro</td>
|
| 1716 |
+
<td>1695</td>
|
| 1717 |
<td>87.2%</td>
|
|
|
|
|
|
|
| 1718 |
<td>165</td>
|
| 1719 |
</tr>
|
| 1720 |
<tr>
|
| 1721 |
<td>6</td>
|
| 1722 |
<td>grok-3</td>
|
| 1723 |
+
<td>1665</td>
|
| 1724 |
<td>85.7%</td>
|
|
|
|
|
|
|
| 1725 |
<td>147</td>
|
| 1726 |
</tr>
|
| 1727 |
<tr>
|
| 1728 |
<td>7</td>
|
| 1729 |
<td>deepseek-r1</td>
|
| 1730 |
+
<td>1625</td>
|
| 1731 |
<td>83.2%</td>
|
|
|
|
|
|
|
| 1732 |
<td>134</td>
|
| 1733 |
</tr>
|
| 1734 |
</tbody>
|
| 1735 |
</table>
|
| 1736 |
</div>
|
| 1737 |
|
| 1738 |
+
<div style="margin-top: 20px; padding: 15px; background-color: #0d47a1; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1);">
|
| 1739 |
+
<h3 style="margin-top: 0; color: #ffffff;">ELO Rating System Explanation</h3>
|
| 1740 |
|
| 1741 |
<div style="display: flex; flex-wrap: wrap; gap: 15px; margin-top: 15px;">
|
| 1742 |
+
<div style="flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">
|
| 1743 |
+
<h4 style="margin-top: 0; color: #ffffff;">How ELO Scores Are Calculated</h4>
|
| 1744 |
+
<p style="color: #eceff1;">Our ELO rating system assigns scores to models based on user feedback, using the following formula:</p>
|
| 1745 |
+
<div style="background-color: #37474f; padding: 12px; border-radius: 5px; color: #eceff1;">
|
| 1746 |
+
<code style="color: #80deea;">ELO_new = ELO_old + K × (S - E)</code><br><br>
|
| 1747 |
+
Where:<br>
|
| 1748 |
+
• <strong style="color: #b2dfdb;">ELO_old</strong>: Previous rating of the model<br>
|
| 1749 |
+
• <strong style="color: #b2dfdb;">K</strong>: Weight factor (40 for new models, 20 for established ones)<br>
|
| 1750 |
+
• <strong style="color: #b2dfdb;">S</strong>: Actual score (1 for correct hallucination detection, 0 for incorrect)<br>
|
| 1751 |
+
• <strong style="color: #b2dfdb;">E</strong>: Expected score based on current rating<br><br>
|
| 1752 |
+
<em style="color: #80deea;">E = 1 / (1 + 10<sup>(ELO_opponent - ELO_model)/400</sup>)</em>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1753 |
</div>
|
| 1754 |
+
<p style="color: #eceff1; margin-top: 10px;">All models start with a base ELO of 1500. Scores are updated after each user evaluation.</p>
|
| 1755 |
+
</div>
|
| 1756 |
+
<div style="flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);">
|
| 1757 |
+
<h4 style="margin-top: 0; color: #ffffff;">Interpretation Guidelines</h4>
|
| 1758 |
+
<ul style="margin-bottom: 0; padding-left: 20px; color: #eceff1;">
|
| 1759 |
+
<li><strong style="color: #b2dfdb;">1800+</strong>: Exceptional performance, very rare hallucinations</li>
|
| 1760 |
+
<li><strong style="color: #b2dfdb;">1700-1799</strong>: Superior performance, minimal hallucinations</li>
|
| 1761 |
+
<li><strong style="color: #b2dfdb;">1600-1699</strong>: Good performance, occasional hallucinations</li>
|
| 1762 |
+
<li><strong style="color: #b2dfdb;">1500-1599</strong>: Average performance</li>
|
| 1763 |
+
<li><strong style="color: #b2dfdb;"><1500</strong>: Below average, frequent hallucinations</li>
|
| 1764 |
+
</ul>
|
| 1765 |
+
<p style="font-style: italic; color: #b3e5fc; margin-top: 10px;">
|
| 1766 |
+
Note: ELO scores are comparative and reflect relative performance between models in our specific hallucination detection tasks.
|
| 1767 |
+
</p>
|
| 1768 |
</div>
|
| 1769 |
</div>
|
| 1770 |
</div>
|
|
|
|
| 1801 |
return stats_html
|
| 1802 |
return ""
|
| 1803 |
|
| 1804 |
+
# Feedback section is now moved directly inside the Detector tab
|
| 1805 |
+
|
| 1806 |
# Set up interval to update stats
|
| 1807 |
with gr.Row(elem_id="stats-container"):
|
| 1808 |
with gr.Column():
|
|
|
|
| 1859 |
}, refreshInterval);
|
| 1860 |
}
|
| 1861 |
|
| 1862 |
+
// Add highlighting to the selected tab and handle feedback section visibility
|
| 1863 |
function setupTabHighlighting() {
|
| 1864 |
// Add hover effects to tabs
|
| 1865 |
const tabs = document.querySelectorAll('.tabs button');
|
|
|
|
| 1875 |
tab.style.backgroundColor = '';
|
| 1876 |
}
|
| 1877 |
});
|
| 1878 |
+
|
| 1879 |
+
// Handle tab click events to manage feedback section visibility
|
| 1880 |
+
tab.addEventListener('click', function() {
|
| 1881 |
+
// Use setTimeout to let Gradio UI update first
|
| 1882 |
+
setTimeout(() => {
|
| 1883 |
+
// Check if this tab is selected and what its text is
|
| 1884 |
+
const isDetectorTab = this.classList.contains('selected') &&
|
| 1885 |
+
!this.textContent.includes('Model') &&
|
| 1886 |
+
!this.textContent.includes('User');
|
| 1887 |
+
|
| 1888 |
+
// Find all accordions in the page
|
| 1889 |
+
const accordions = document.querySelectorAll('.accordion');
|
| 1890 |
+
|
| 1891 |
+
// Loop through all accordions
|
| 1892 |
+
accordions.forEach(acc => {
|
| 1893 |
+
// Check if this is the feedback accordion
|
| 1894 |
+
if (acc.textContent.includes('Provide Feedback') ||
|
| 1895 |
+
acc.textContent.includes('Help Improve')) {
|
| 1896 |
+
|
| 1897 |
+
if (isDetectorTab) {
|
| 1898 |
+
acc.style.display = 'block';
|
| 1899 |
+
} else {
|
| 1900 |
+
acc.style.display = 'none';
|
| 1901 |
+
}
|
| 1902 |
+
}
|
| 1903 |
+
});
|
| 1904 |
+
}, 100);
|
| 1905 |
+
});
|
| 1906 |
});
|
| 1907 |
}
|
| 1908 |
}
|
|
|
|
| 1911 |
function setupAllEnhancements() {
|
| 1912 |
setupAutoRefresh();
|
| 1913 |
setupTabHighlighting();
|
| 1914 |
+
|
| 1915 |
+
// Simple solution to ensure feedback is only visible in detector tab
|
| 1916 |
+
setTimeout(() => {
|
| 1917 |
+
// Get the feedback accordion by ID
|
| 1918 |
+
const feedbackAccordion = document.getElementById('detector-feedback');
|
| 1919 |
+
if (!feedbackAccordion) return;
|
| 1920 |
+
|
| 1921 |
+
// Get all tabs
|
| 1922 |
+
const tabs = document.querySelectorAll('.tabs button');
|
| 1923 |
+
if (tabs.length === 0) return;
|
| 1924 |
+
|
| 1925 |
+
// Add click handlers to each tab
|
| 1926 |
+
tabs.forEach((tab, index) => {
|
| 1927 |
+
// Check if it's the first tab (Detector)
|
| 1928 |
+
const isDetectorTab = index === 0;
|
| 1929 |
+
|
| 1930 |
+
// When a tab is clicked, toggle the feedback visibility
|
| 1931 |
+
tab.addEventListener('click', function() {
|
| 1932 |
+
if (feedbackAccordion) {
|
| 1933 |
+
// Give time for Gradio to update the UI
|
| 1934 |
+
setTimeout(() => {
|
| 1935 |
+
feedbackAccordion.style.display = this.classList.contains('selected') && isDetectorTab ? 'block' : 'none';
|
| 1936 |
+
}, 100);
|
| 1937 |
+
}
|
| 1938 |
+
});
|
| 1939 |
+
});
|
| 1940 |
+
|
| 1941 |
+
// Initial setup - make sure feedback is only visible if detector tab is active
|
| 1942 |
+
const activeTab = document.querySelector('.tabs button.selected');
|
| 1943 |
+
const activeTabIndex = Array.from(tabs).indexOf(activeTab);
|
| 1944 |
+
|
| 1945 |
+
if (activeTabIndex !== 0) { // If not on detector tab
|
| 1946 |
+
feedbackAccordion.style.display = 'none';
|
| 1947 |
+
}
|
| 1948 |
+
|
| 1949 |
+
// Also create a style rule for safety
|
| 1950 |
+
const style = document.createElement('style');
|
| 1951 |
+
style.textContent = `
|
| 1952 |
+
.tabs[data-testid*="tab"] button:not(:first-child).selected ~ .tabitem #detector-feedback {
|
| 1953 |
+
display: none !important;
|
| 1954 |
+
}
|
| 1955 |
+
`;
|
| 1956 |
+
document.head.appendChild(style);
|
| 1957 |
+
|
| 1958 |
+
}, 300);
|
| 1959 |
}
|
| 1960 |
|
| 1961 |
if (window.gradio_loaded) {
|
|
|
|
| 1985 |
from { opacity: 0; }
|
| 1986 |
to { opacity: 1; }
|
| 1987 |
}
|
| 1988 |
+
|
| 1989 |
+
/* Initial setting - show feedback accordion */
|
| 1990 |
+
#detector-feedback {
|
| 1991 |
+
display: block !important;
|
| 1992 |
+
}
|
| 1993 |
+
|
| 1994 |
+
/* Hide when in other tabs using IDs */
|
| 1995 |
+
#model-leaderboard-tab #detector-feedback,
|
| 1996 |
+
#user-feedback-tab #detector-feedback {
|
| 1997 |
+
display: none !important;
|
| 1998 |
+
}
|
| 1999 |
</style>
|
| 2000 |
""")
|
| 2001 |
|
| 2002 |
+
# Removed duplicate feedback section (moved to above the stats container)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2003 |
|
| 2004 |
# Hidden state to store results for feedback
|
| 2005 |
hidden_results = gr.State()
|