Spaces:
Runtime error
Runtime error
ycy
commited on
Commit
·
5ca8d20
1
Parent(s):
cce655b
about
Browse files- src/about.py +13 -5
src/about.py
CHANGED
|
@@ -63,11 +63,19 @@ def get_INTRODUCTION_TEXT(model_num: int, LAST_UPDATED: str, paper_link="TODO"):
|
|
| 63 |
|
| 64 |
#TODO
|
| 65 |
INTRODUCE_BENCHMARK = f"""
|
| 66 |
-
<details>
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
</
|
| 70 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
#TODO About
|
| 73 |
LLM_BENCHMARKS_TEXT = f"""
|
|
|
|
| 63 |
|
| 64 |
#TODO
|
| 65 |
INTRODUCE_BENCHMARK = f"""
|
| 66 |
+
<details style="margin: 10px 0; padding: 10px;">
|
| 67 |
+
<summary style="cursor: pointer; font-size: 18px; color: #2c3e50; font-weight: bold; transition: color 0.3s;">
|
| 68 |
+
💬 Metric Explanations
|
| 69 |
+
</summary>
|
| 70 |
+
<div style="color: #2c3e50; border-left: 4px solid #2980b9; padding-left: 12px; margin-top: 8px;">
|
| 71 |
+
<p>
|
| 72 |
+
<strong>CapArena-Auto</strong> is an arena-style automated evaluation benchmark for detailed captioning.
|
| 73 |
+
It includes <strong>600 evaluation images</strong> and assesses model performance through
|
| 74 |
+
<em>pairwise battles</em> with three baseline models. The final score is calculated by <strong>GPT4o-as-a-Judge</strong>.
|
| 75 |
+
</p>
|
| 76 |
+
</div>
|
| 77 |
+
</details>
|
| 78 |
+
"""
|
| 79 |
|
| 80 |
#TODO About
|
| 81 |
LLM_BENCHMARKS_TEXT = f"""
|