deepseek-reasoner prompt engineering and UI improvements (#8)
Browse files- deepseek-reasoner prompt engineering and UI improvements (806eaf0e3d06715c3aae050362fc88852f203145)
Co-authored-by: Furkan Eris <[email protected]>
app.py
CHANGED
|
@@ -230,14 +230,22 @@ class PAS2:
|
|
| 230 |
model_id = model_config["model_id"]
|
| 231 |
model_type = model_config["type"]
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
messages = [
|
| 234 |
{
|
| 235 |
"role": "system",
|
| 236 |
-
"content":
|
| 237 |
},
|
| 238 |
{
|
| 239 |
"role": "user",
|
| 240 |
-
"content":
|
| 241 |
}
|
| 242 |
]
|
| 243 |
|
|
@@ -450,27 +458,49 @@ Your response should be a JSON with the following fields:
|
|
| 450 |
try:
|
| 451 |
logger.info("Sending judgment request to %s...", self.judge_model)
|
| 452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
# Use the appropriate client and model based on the type
|
| 454 |
if model_type == "mistral":
|
| 455 |
response = client.chat.complete(
|
| 456 |
model=model_id,
|
| 457 |
messages=[
|
| 458 |
-
{"role": "system", "content":
|
| 459 |
-
{"role": "user", "content":
|
| 460 |
],
|
| 461 |
response_format={"type": "json_object"}
|
| 462 |
)
|
| 463 |
-
|
|
|
|
|
|
|
| 464 |
else: # openai-compatible API
|
| 465 |
response = client.chat.completions.create(
|
| 466 |
model=model_id,
|
| 467 |
messages=[
|
| 468 |
-
{"role": "system", "content":
|
| 469 |
-
{"role": "user", "content":
|
| 470 |
],
|
| 471 |
response_format={"type": "json_object"}
|
| 472 |
)
|
| 473 |
-
|
|
|
|
|
|
|
| 474 |
|
| 475 |
logger.debug("Received judgment response from %s: %s", self.judge_model, result_json)
|
| 476 |
|
|
@@ -1288,36 +1318,62 @@ def create_interface():
|
|
| 1288 |
.info-box {
|
| 1289 |
padding: 1.2em;
|
| 1290 |
border-radius: 8px;
|
| 1291 |
-
background-color: #
|
| 1292 |
margin-bottom: 1em;
|
| 1293 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1294 |
color: #263238;
|
|
|
|
| 1295 |
}
|
| 1296 |
.hallucination-positive {
|
| 1297 |
padding: 1.2em;
|
| 1298 |
border-radius: 8px;
|
| 1299 |
-
background-color: #
|
| 1300 |
-
border-left: 5px solid #
|
| 1301 |
margin-bottom: 1em;
|
| 1302 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1303 |
-
color: #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1304 |
}
|
| 1305 |
.hallucination-negative {
|
| 1306 |
padding: 1.2em;
|
| 1307 |
border-radius: 8px;
|
| 1308 |
-
background-color: #
|
| 1309 |
-
border-left: 5px solid #
|
| 1310 |
margin-bottom: 1em;
|
| 1311 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1312 |
color: #1b5e20;
|
| 1313 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1314 |
.response-box {
|
| 1315 |
padding: 1.2em;
|
| 1316 |
border-radius: 8px;
|
| 1317 |
-
background-color: #
|
| 1318 |
margin-bottom: 0.8em;
|
| 1319 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1320 |
-
color: #
|
|
|
|
|
|
|
| 1321 |
}
|
| 1322 |
.example-queries {
|
| 1323 |
display: flex;
|
|
@@ -1801,11 +1857,19 @@ def create_interface():
|
|
| 1801 |
|
| 1802 |
response = detector.save_feedback(results, combined_feedback)
|
| 1803 |
|
| 1804 |
-
#
|
| 1805 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1806 |
<div id="feedback-popup-container"></div>
|
| 1807 |
<script>
|
| 1808 |
-
(function() {
|
| 1809 |
// Create the notification element
|
| 1810 |
const container = document.getElementById('feedback-popup-container');
|
| 1811 |
const notification = document.createElement('div');
|
|
@@ -1814,7 +1878,7 @@ def create_interface():
|
|
| 1814 |
position: fixed;
|
| 1815 |
top: 50px;
|
| 1816 |
right: 20px;
|
| 1817 |
-
background-color:
|
| 1818 |
color: white;
|
| 1819 |
padding: 15px;
|
| 1820 |
border-radius: 5px;
|
|
@@ -1830,16 +1894,18 @@ def create_interface():
|
|
| 1830 |
// Create notification content
|
| 1831 |
const checkmark = document.createElement('div');
|
| 1832 |
checkmark.style.marginRight = '10px';
|
| 1833 |
-
checkmark.textContent = '
|
| 1834 |
|
| 1835 |
const textContainer = document.createElement('div');
|
| 1836 |
|
| 1837 |
const heading = document.createElement('div');
|
| 1838 |
heading.style.fontWeight = 'bold';
|
| 1839 |
-
heading.textContent = '
|
| 1840 |
|
| 1841 |
const message = document.createElement('div');
|
| 1842 |
-
message.textContent = '
|
|
|
|
|
|
|
| 1843 |
|
| 1844 |
textContainer.appendChild(heading);
|
| 1845 |
textContainer.appendChild(message);
|
|
@@ -1851,24 +1917,24 @@ def create_interface():
|
|
| 1851 |
document.body.appendChild(notification);
|
| 1852 |
|
| 1853 |
// Show notification
|
| 1854 |
-
setTimeout(function() {
|
| 1855 |
notification.style.opacity = '1';
|
| 1856 |
notification.style.transform = 'translateX(0)';
|
| 1857 |
|
| 1858 |
// Hide after 3 seconds
|
| 1859 |
-
setTimeout(function() {
|
| 1860 |
notification.style.opacity = '0';
|
| 1861 |
notification.style.transform = 'translateX(50px)';
|
| 1862 |
|
| 1863 |
// Remove element after animation
|
| 1864 |
-
setTimeout(function() {
|
| 1865 |
notification.remove();
|
| 1866 |
-
}, 300);
|
| 1867 |
-
}, 3000);
|
| 1868 |
-
}, 100);
|
| 1869 |
-
})();
|
| 1870 |
</script>
|
| 1871 |
-
<div>Feedback
|
| 1872 |
"""
|
| 1873 |
|
| 1874 |
return feedback_response
|
|
@@ -1970,7 +2036,7 @@ def create_interface():
|
|
| 1970 |
feedback_stats = gr.HTML(visible=True)
|
| 1971 |
|
| 1972 |
# Feedback section
|
| 1973 |
-
with gr.Accordion("Provide Feedback", open=
|
| 1974 |
gr.Markdown("### Help Improve the System")
|
| 1975 |
gr.Markdown("Your feedback helps us refine the hallucination detection system.")
|
| 1976 |
|
|
@@ -2113,7 +2179,7 @@ def create_interface():
|
|
| 2113 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (24 for model pairs)<br>" +
|
| 2114 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score from user feedback (1 for correct, 0 for incorrect)<br>" +
|
| 2115 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
| 2116 |
-
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 -
|
| 2117 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
| 2118 |
"<h4 style='margin-top: 0; color: #ffffff;'>Available Models</h4>" +
|
| 2119 |
"<p style='color: #eceff1;'>The system randomly selects from these models for each hallucination detection:</p>" +
|
|
@@ -2260,7 +2326,7 @@ def create_interface():
|
|
| 2260 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (32 for individual models)<br>" +
|
| 2261 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score (1 for correct judgment, 0 for incorrect)<br>" +
|
| 2262 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
| 2263 |
-
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 -
|
| 2264 |
"<p style='color: #eceff1; margin-top: 10px;'>All models start with a base ELO of 1500. Scores are updated after each user evaluation.</p></div>" +
|
| 2265 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
| 2266 |
"<h4 style='margin-top: 0; color: #ffffff;'>Interpretation Guidelines</h4>" +
|
|
@@ -2307,62 +2373,10 @@ def create_interface():
|
|
| 2307 |
return ""
|
| 2308 |
|
| 2309 |
# Feedback section is now moved directly inside the Detector tab
|
| 2310 |
-
|
| 2311 |
-
# Set up interval to update stats
|
| 2312 |
-
with gr.Row(elem_id="stats-container"):
|
| 2313 |
-
with gr.Column():
|
| 2314 |
-
gr.Markdown("### 📊 Live Prediction Accuracy")
|
| 2315 |
-
gr.Markdown("_Auto-refreshes every 5 seconds from MongoDB based on user feedback_")
|
| 2316 |
-
live_stats = gr.HTML(update_stats())
|
| 2317 |
-
|
| 2318 |
-
# Add loading animation style
|
| 2319 |
-
gr.HTML(
|
| 2320 |
-
"<style>" +
|
| 2321 |
-
"@keyframes pulse {" +
|
| 2322 |
-
"0% { opacity: 0.6; }" +
|
| 2323 |
-
"50% { opacity: 1; }" +
|
| 2324 |
-
"100% { opacity: 0.6; }" +
|
| 2325 |
-
"}" +
|
| 2326 |
-
".refreshing::after {" +
|
| 2327 |
-
"content: \"⟳\";" +
|
| 2328 |
-
"display: inline-block;" +
|
| 2329 |
-
"margin-left: 8px;" +
|
| 2330 |
-
"animation: pulse 1.5s infinite ease-in-out;" +
|
| 2331 |
-
"color: #2e7d32;" +
|
| 2332 |
-
"}" +
|
| 2333 |
-
"#stats-container {" +
|
| 2334 |
-
"border: 1px solid #b3e5fc;" +
|
| 2335 |
-
"border-radius: 10px;" +
|
| 2336 |
-
"padding: 15px;" +
|
| 2337 |
-
"margin: 10px 0;" +
|
| 2338 |
-
"background-color: #0277bd;" +
|
| 2339 |
-
"}" +
|
| 2340 |
-
"</style>" +
|
| 2341 |
-
"<div class=\"refreshing\" style=\"text-align: right; font-size: 0.8em; color: #eceff1;\">Auto-refreshing</div>"
|
| 2342 |
-
)
|
| 2343 |
|
| 2344 |
-
#
|
| 2345 |
-
refresh_btn = gr.Button("Refresh Stats", visible=False)
|
| 2346 |
-
refresh_btn.click(
|
| 2347 |
-
fn=update_stats,
|
| 2348 |
-
outputs=[live_stats]
|
| 2349 |
-
)
|
| 2350 |
-
|
| 2351 |
-
# Add JavaScript to auto-refresh the statistics and enhance the tabs
|
| 2352 |
gr.HTML("""
|
| 2353 |
<script>
|
| 2354 |
-
// Auto-refresh stats every 5 seconds
|
| 2355 |
-
function setupAutoRefresh() {
|
| 2356 |
-
const refreshInterval = 5000; // 5 seconds
|
| 2357 |
-
setInterval(() => {
|
| 2358 |
-
// Find the refresh button by its text and click it
|
| 2359 |
-
const refreshButtons = Array.from(document.querySelectorAll('button'));
|
| 2360 |
-
const refreshBtn = refreshButtons.find(btn => btn.textContent.includes('Refresh Stats'));
|
| 2361 |
-
if (refreshBtn) {
|
| 2362 |
-
refreshBtn.click();
|
| 2363 |
-
}
|
| 2364 |
-
}, refreshInterval);
|
| 2365 |
-
}
|
| 2366 |
|
| 2367 |
// Add highlighting to the selected tab and handle feedback section visibility
|
| 2368 |
function setupTabHighlighting() {
|
|
@@ -2414,7 +2428,6 @@ def create_interface():
|
|
| 2414 |
|
| 2415 |
// Set up all JavaScript enhancements after the page loads
|
| 2416 |
function setupAllEnhancements() {
|
| 2417 |
-
setupAutoRefresh();
|
| 2418 |
setupTabHighlighting();
|
| 2419 |
|
| 2420 |
// Simple solution to ensure feedback is only visible in detector tab
|
|
|
|
| 230 |
model_id = model_config["model_id"]
|
| 231 |
model_type = model_config["type"]
|
| 232 |
|
| 233 |
+
# Customize messages based on model
|
| 234 |
+
system_content = "You are a helpful AI assistant. Provide accurate, factual information in response to questions."
|
| 235 |
+
user_content = query
|
| 236 |
+
|
| 237 |
+
# Special handling for deepseek-reasoner
|
| 238 |
+
if model_id == "deepseek-reasoner":
|
| 239 |
+
user_content = f"Extract the following information and format it as JSON:\n\n{query}"
|
| 240 |
+
|
| 241 |
messages = [
|
| 242 |
{
|
| 243 |
"role": "system",
|
| 244 |
+
"content": system_content
|
| 245 |
},
|
| 246 |
{
|
| 247 |
"role": "user",
|
| 248 |
+
"content": user_content
|
| 249 |
}
|
| 250 |
]
|
| 251 |
|
|
|
|
| 458 |
try:
|
| 459 |
logger.info("Sending judgment request to %s...", self.judge_model)
|
| 460 |
|
| 461 |
+
# Customize the system prompt for deepseek-reasoner
|
| 462 |
+
customized_system_prompt = system_prompt
|
| 463 |
+
user_content = f"Evaluate these responses for hallucinations:\n\n{context}"
|
| 464 |
+
|
| 465 |
+
# Additional prompt engineering for deepseek-reasoner
|
| 466 |
+
if model_id == "deepseek-reasoner":
|
| 467 |
+
user_content = f"""Extract the following information and format it as JSON:
|
| 468 |
+
|
| 469 |
+
Evaluate these responses for hallucinations:\n\n{context}\n\n
|
| 470 |
+
- hallucination_detected: boolean indicating whether hallucinations were found
|
| 471 |
+
- confidence_score: number between 0 and 1 representing your confidence in the judgment
|
| 472 |
+
- conflicting_facts: an array of objects describing any conflicting information found
|
| 473 |
+
- reasoning: detailed explanation for your judgment
|
| 474 |
+
- summary: a concise summary of your analysis
|
| 475 |
+
|
| 476 |
+
Respond ONLY with valid JSON and no other text.
|
| 477 |
+
"""
|
| 478 |
+
|
| 479 |
# Use the appropriate client and model based on the type
|
| 480 |
if model_type == "mistral":
|
| 481 |
response = client.chat.complete(
|
| 482 |
model=model_id,
|
| 483 |
messages=[
|
| 484 |
+
{"role": "system", "content": customized_system_prompt},
|
| 485 |
+
{"role": "user", "content": user_content}
|
| 486 |
],
|
| 487 |
response_format={"type": "json_object"}
|
| 488 |
)
|
| 489 |
+
content = response.choices[0].message.content
|
| 490 |
+
# Normal JSON parsing for mistral
|
| 491 |
+
result_json = json.loads(content)
|
| 492 |
else: # openai-compatible API
|
| 493 |
response = client.chat.completions.create(
|
| 494 |
model=model_id,
|
| 495 |
messages=[
|
| 496 |
+
{"role": "system", "content": customized_system_prompt},
|
| 497 |
+
{"role": "user", "content": user_content}
|
| 498 |
],
|
| 499 |
response_format={"type": "json_object"}
|
| 500 |
)
|
| 501 |
+
content = response.choices[0].message.content
|
| 502 |
+
|
| 503 |
+
result_json = json.loads(content)
|
| 504 |
|
| 505 |
logger.debug("Received judgment response from %s: %s", self.judge_model, result_json)
|
| 506 |
|
|
|
|
| 1318 |
.info-box {
|
| 1319 |
padding: 1.2em;
|
| 1320 |
border-radius: 8px;
|
| 1321 |
+
background-color: #eceff1;
|
| 1322 |
margin-bottom: 1em;
|
| 1323 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1324 |
+
color: #455a64;
|
| 1325 |
+
line-height: 1.5;
|
| 1326 |
+
border-left: 3px solid #607d8b;
|
| 1327 |
+
}
|
| 1328 |
+
.info-box p strong {
|
| 1329 |
color: #263238;
|
| 1330 |
+
font-weight: 600;
|
| 1331 |
}
|
| 1332 |
.hallucination-positive {
|
| 1333 |
padding: 1.2em;
|
| 1334 |
border-radius: 8px;
|
| 1335 |
+
background-color: #f8e8e8;
|
| 1336 |
+
border-left: 5px solid #c62828;
|
| 1337 |
margin-bottom: 1em;
|
| 1338 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1339 |
+
color: #4d0c0c;
|
| 1340 |
+
}
|
| 1341 |
+
.hallucination-positive h3 {
|
| 1342 |
+
color: #c62828;
|
| 1343 |
+
margin-top: 0;
|
| 1344 |
+
margin-bottom: 0.5em;
|
| 1345 |
+
}
|
| 1346 |
+
.hallucination-positive p {
|
| 1347 |
+
color: #5d4141;
|
| 1348 |
+
line-height: 1.5;
|
| 1349 |
}
|
| 1350 |
.hallucination-negative {
|
| 1351 |
padding: 1.2em;
|
| 1352 |
border-radius: 8px;
|
| 1353 |
+
background-color: #e8f5e9;
|
| 1354 |
+
border-left: 5px solid #2e7d32;
|
| 1355 |
margin-bottom: 1em;
|
| 1356 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1357 |
color: #1b5e20;
|
| 1358 |
}
|
| 1359 |
+
.hallucination-negative h3 {
|
| 1360 |
+
color: #2e7d32;
|
| 1361 |
+
margin-top: 0;
|
| 1362 |
+
margin-bottom: 0.5em;
|
| 1363 |
+
}
|
| 1364 |
+
.hallucination-negative p {
|
| 1365 |
+
color: #3e5e40;
|
| 1366 |
+
line-height: 1.5;
|
| 1367 |
+
}
|
| 1368 |
.response-box {
|
| 1369 |
padding: 1.2em;
|
| 1370 |
border-radius: 8px;
|
| 1371 |
+
background-color: #eceff1;
|
| 1372 |
margin-bottom: 0.8em;
|
| 1373 |
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
| 1374 |
+
color: #37474f;
|
| 1375 |
+
line-height: 1.5;
|
| 1376 |
+
border-left: 3px solid #78909c;
|
| 1377 |
}
|
| 1378 |
.example-queries {
|
| 1379 |
display: flex;
|
|
|
|
| 1857 |
|
| 1858 |
response = detector.save_feedback(results, combined_feedback)
|
| 1859 |
|
| 1860 |
+
# Check if this is a duplicate feedback submission message
|
| 1861 |
+
is_duplicate = "already provided feedback" in response
|
| 1862 |
+
notification_color = "#ff9800" if is_duplicate else "#4caf50"
|
| 1863 |
+
icon = "ℹ" if is_duplicate else "✓"
|
| 1864 |
+
heading_text = "Note" if is_duplicate else "Thank You!"
|
| 1865 |
+
message_text = response
|
| 1866 |
+
status_text = "already submitted" if is_duplicate else "submitted successfully"
|
| 1867 |
+
|
| 1868 |
+
# Return a message that will trigger a JS notification
|
| 1869 |
+
feedback_response = f"""
|
| 1870 |
<div id="feedback-popup-container"></div>
|
| 1871 |
<script>
|
| 1872 |
+
(function() {{
|
| 1873 |
// Create the notification element
|
| 1874 |
const container = document.getElementById('feedback-popup-container');
|
| 1875 |
const notification = document.createElement('div');
|
|
|
|
| 1878 |
position: fixed;
|
| 1879 |
top: 50px;
|
| 1880 |
right: 20px;
|
| 1881 |
+
background-color: {notification_color};
|
| 1882 |
color: white;
|
| 1883 |
padding: 15px;
|
| 1884 |
border-radius: 5px;
|
|
|
|
| 1894 |
// Create notification content
|
| 1895 |
const checkmark = document.createElement('div');
|
| 1896 |
checkmark.style.marginRight = '10px';
|
| 1897 |
+
checkmark.textContent = '{icon}';
|
| 1898 |
|
| 1899 |
const textContainer = document.createElement('div');
|
| 1900 |
|
| 1901 |
const heading = document.createElement('div');
|
| 1902 |
heading.style.fontWeight = 'bold';
|
| 1903 |
+
heading.textContent = '{heading_text}';
|
| 1904 |
|
| 1905 |
const message = document.createElement('div');
|
| 1906 |
+
message.textContent = '{message_text}';
|
| 1907 |
+
message.style.fontSize = '0.9em';
|
| 1908 |
+
message.style.marginTop = '2px';
|
| 1909 |
|
| 1910 |
textContainer.appendChild(heading);
|
| 1911 |
textContainer.appendChild(message);
|
|
|
|
| 1917 |
document.body.appendChild(notification);
|
| 1918 |
|
| 1919 |
// Show notification
|
| 1920 |
+
setTimeout(function() {{
|
| 1921 |
notification.style.opacity = '1';
|
| 1922 |
notification.style.transform = 'translateX(0)';
|
| 1923 |
|
| 1924 |
// Hide after 3 seconds
|
| 1925 |
+
setTimeout(function() {{
|
| 1926 |
notification.style.opacity = '0';
|
| 1927 |
notification.style.transform = 'translateX(50px)';
|
| 1928 |
|
| 1929 |
// Remove element after animation
|
| 1930 |
+
setTimeout(function() {{
|
| 1931 |
notification.remove();
|
| 1932 |
+
}}, 300);
|
| 1933 |
+
}}, 3000);
|
| 1934 |
+
}}, 100);
|
| 1935 |
+
}})();
|
| 1936 |
</script>
|
| 1937 |
+
<div>Feedback {status_text}!</div>
|
| 1938 |
"""
|
| 1939 |
|
| 1940 |
return feedback_response
|
|
|
|
| 2036 |
feedback_stats = gr.HTML(visible=True)
|
| 2037 |
|
| 2038 |
# Feedback section
|
| 2039 |
+
with gr.Accordion("Provide Feedback", open=True, elem_id="detector-feedback") as feedback_accordion:
|
| 2040 |
gr.Markdown("### Help Improve the System")
|
| 2041 |
gr.Markdown("Your feedback helps us refine the hallucination detection system.")
|
| 2042 |
|
|
|
|
| 2179 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (24 for model pairs)<br>" +
|
| 2180 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score from user feedback (1 for correct, 0 for incorrect)<br>" +
|
| 2181 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
| 2182 |
+
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 - ELO_model)/400</sup>)</em></div></div>" +
|
| 2183 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
| 2184 |
"<h4 style='margin-top: 0; color: #ffffff;'>Available Models</h4>" +
|
| 2185 |
"<p style='color: #eceff1;'>The system randomly selects from these models for each hallucination detection:</p>" +
|
|
|
|
| 2326 |
"* <strong style='color: #b2dfdb;'>K</strong>: Weight factor (32 for individual models)<br>" +
|
| 2327 |
"* <strong style='color: #b2dfdb;'>S</strong>: Actual score (1 for correct judgment, 0 for incorrect)<br>" +
|
| 2328 |
"* <strong style='color: #b2dfdb;'>E</strong>: Expected score based on current rating<br><br>" +
|
| 2329 |
+
"<em style='color: #80deea;'>E = 1 / (1 + 10<sup>(1500 - ELO_model)/400</sup>)</em></div>" +
|
| 2330 |
"<p style='color: #eceff1; margin-top: 10px;'>All models start with a base ELO of 1500. Scores are updated after each user evaluation.</p></div>" +
|
| 2331 |
"<div style='flex: 1; min-width: 280px; padding: 12px; background-color: #455a64; border-radius: 6px; box-shadow: 0 1px 3px rgba(0,0,0,0.12);'>" +
|
| 2332 |
"<h4 style='margin-top: 0; color: #ffffff;'>Interpretation Guidelines</h4>" +
|
|
|
|
| 2373 |
return ""
|
| 2374 |
|
| 2375 |
# Feedback section is now moved directly inside the Detector tab
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2376 |
|
| 2377 |
+
# Add JavaScript to enhance the tabs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2378 |
gr.HTML("""
|
| 2379 |
<script>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2380 |
|
| 2381 |
// Add highlighting to the selected tab and handle feedback section visibility
|
| 2382 |
function setupTabHighlighting() {
|
|
|
|
| 2428 |
|
| 2429 |
// Set up all JavaScript enhancements after the page loads
|
| 2430 |
function setupAllEnhancements() {
|
|
|
|
| 2431 |
setupTabHighlighting();
|
| 2432 |
|
| 2433 |
// Simple solution to ensure feedback is only visible in detector tab
|