Spaces:
Running
Running
Added selector of p_value
Browse files
app.py
CHANGED
|
@@ -133,6 +133,8 @@ def process_submission(*inputs):
|
|
| 133 |
submit_prompt = gr.update(visible=True)
|
| 134 |
submission_btn_yes = gr.update(interactive=True, visible=True)
|
| 135 |
|
|
|
|
|
|
|
| 136 |
pre_submit_leaderboard_table = gr.update(
|
| 137 |
value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
|
| 138 |
visible=True,
|
|
@@ -254,25 +256,29 @@ def fetch_model_detail(submission_id):
|
|
| 254 |
gr.update(value=metadata['link_to_model'], visible=True)
|
| 255 |
)
|
| 256 |
|
| 257 |
-
def fetch_model_tournament_results_table(submission_id, category):
|
|
|
|
|
|
|
| 258 |
if submission_id == None or category == None:
|
| 259 |
return gr.update(
|
| 260 |
visible=False,
|
| 261 |
)
|
| 262 |
else:
|
| 263 |
return gr.update(
|
| 264 |
-
value=leaderboard_server.get_model_tournament_table(submission_id, category),
|
| 265 |
visible=True,
|
| 266 |
)
|
| 267 |
|
| 268 |
-
def fetch_model_tournament_results_table_csv(submission_id, category):
|
|
|
|
|
|
|
| 269 |
if submission_id == None or category == None:
|
| 270 |
return gr.update(
|
| 271 |
visible=False,
|
| 272 |
)
|
| 273 |
else:
|
| 274 |
return gr.update(
|
| 275 |
-
value=leaderboard_server.get_model_tournament_table_csv(submission_id, category),
|
| 276 |
visible=True,
|
| 277 |
)
|
| 278 |
|
|
@@ -288,7 +294,7 @@ def create_task_abbreviation_legend_table(category):
|
|
| 288 |
|
| 289 |
return task_abbreviation_legend_body
|
| 290 |
|
| 291 |
-
def change_leaderboard_category(category, selected_submission_id):
|
| 292 |
if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
|
| 293 |
task_abbreviation_legend = gr.update(
|
| 294 |
visible=False,
|
|
@@ -319,19 +325,21 @@ def change_leaderboard_category(category, selected_submission_id):
|
|
| 319 |
visible=True,
|
| 320 |
)
|
| 321 |
|
| 322 |
-
model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category)
|
| 323 |
-
model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category)
|
|
|
|
|
|
|
| 324 |
|
| 325 |
leaderboard = gr.update(
|
| 326 |
-
value=leaderboard_server.get_leaderboard(category=category),
|
| 327 |
visible=True,
|
| 328 |
)
|
| 329 |
leaderboard_csv = gr.update(
|
| 330 |
-
value=leaderboard_server.get_leaderboard_csv(category=category),
|
| 331 |
visible=True,
|
| 332 |
)
|
| 333 |
leaderboard_scatter_plot = gr.update(
|
| 334 |
-
value=leaderboard_server.get_leaderboard_scatter_plot(category=category),
|
| 335 |
visible=True,
|
| 336 |
)
|
| 337 |
|
|
@@ -552,6 +560,9 @@ const intervalId = setInterval(addTitleForEachRowOfLeaderboardTable, 1000);
|
|
| 552 |
</script>
|
| 553 |
"""
|
| 554 |
|
|
|
|
|
|
|
|
|
|
| 555 |
def gradio_app():
|
| 556 |
with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
|
| 557 |
check_significance_is_reachable_timer = gr.Timer(
|
|
@@ -654,6 +665,13 @@ def gradio_app():
|
|
| 654 |
interactive=True,
|
| 655 |
)
|
| 656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
with gr.Row():
|
| 658 |
leaderboard_table = gr.DataFrame(
|
| 659 |
leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
|
|
@@ -725,29 +743,35 @@ def gradio_app():
|
|
| 725 |
visible=False,
|
| 726 |
)
|
| 727 |
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
|
| 746 |
tournament_results_dropdown.change(
|
| 747 |
fn=fetch_model_tournament_results_table,
|
| 748 |
inputs=[
|
| 749 |
tournament_results_dropdown,
|
| 750 |
leaderboard_category_of_tasks,
|
|
|
|
| 751 |
],
|
| 752 |
outputs=model_tournament_results_table,
|
| 753 |
).then(
|
|
@@ -755,6 +779,7 @@ def gradio_app():
|
|
| 755 |
inputs=[
|
| 756 |
tournament_results_dropdown,
|
| 757 |
leaderboard_category_of_tasks,
|
|
|
|
| 758 |
],
|
| 759 |
outputs=model_tournament_results_table_csv,
|
| 760 |
)
|
|
@@ -792,6 +817,14 @@ def gradio_app():
|
|
| 792 |
interactive=True,
|
| 793 |
)
|
| 794 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
with gr.Row():
|
| 796 |
model_details_model_tournament_results_table = gr.DataFrame(
|
| 797 |
value=None,
|
|
@@ -820,11 +853,16 @@ def gradio_app():
|
|
| 820 |
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
| 821 |
inputs=model_details_model_dropdown,
|
| 822 |
outputs=model_details_category_of_tasks
|
|
|
|
|
|
|
|
|
|
|
|
|
| 823 |
).then(
|
| 824 |
fn=fetch_model_tournament_results_table,
|
| 825 |
inputs=[
|
| 826 |
model_details_model_dropdown,
|
| 827 |
model_details_category_of_tasks,
|
|
|
|
| 828 |
],
|
| 829 |
outputs=model_details_model_tournament_results_table
|
| 830 |
).then(
|
|
@@ -832,25 +870,32 @@ def gradio_app():
|
|
| 832 |
inputs=[
|
| 833 |
model_details_model_dropdown,
|
| 834 |
model_details_category_of_tasks,
|
|
|
|
| 835 |
],
|
| 836 |
outputs=model_details_model_tournament_results_table_csv
|
| 837 |
)
|
| 838 |
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 854 |
|
| 855 |
with gr.TabItem('Submission'):
|
| 856 |
with gr.Column():
|
|
|
|
| 133 |
submit_prompt = gr.update(visible=True)
|
| 134 |
submission_btn_yes = gr.update(interactive=True, visible=True)
|
| 135 |
|
| 136 |
+
# TODO: checkbox use_corrected_p_value
|
| 137 |
+
|
| 138 |
pre_submit_leaderboard_table = gr.update(
|
| 139 |
value=leaderboard_server.get_leaderboard(pre_submit=pre_submit, category=leaderboard_server.TASKS_CATEGORY_OVERALL_DETAILS),
|
| 140 |
visible=True,
|
|
|
|
| 256 |
gr.update(value=metadata['link_to_model'], visible=True)
|
| 257 |
)
|
| 258 |
|
| 259 |
+
def fetch_model_tournament_results_table(submission_id, category, use_corrected_p_value):
|
| 260 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
| 261 |
+
|
| 262 |
if submission_id == None or category == None:
|
| 263 |
return gr.update(
|
| 264 |
visible=False,
|
| 265 |
)
|
| 266 |
else:
|
| 267 |
return gr.update(
|
| 268 |
+
value=leaderboard_server.get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value),
|
| 269 |
visible=True,
|
| 270 |
)
|
| 271 |
|
| 272 |
+
def fetch_model_tournament_results_table_csv(submission_id, category, use_corrected_p_value):
|
| 273 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
| 274 |
+
|
| 275 |
if submission_id == None or category == None:
|
| 276 |
return gr.update(
|
| 277 |
visible=False,
|
| 278 |
)
|
| 279 |
else:
|
| 280 |
return gr.update(
|
| 281 |
+
value=leaderboard_server.get_model_tournament_table_csv(submission_id, category, kind_of_p_value=kind_of_p_value),
|
| 282 |
visible=True,
|
| 283 |
)
|
| 284 |
|
|
|
|
| 294 |
|
| 295 |
return task_abbreviation_legend_body
|
| 296 |
|
| 297 |
+
def change_leaderboard_category(category, use_corrected_p_value, selected_submission_id):
|
| 298 |
if category == leaderboard_server.TASKS_CATEGORY_OVERALL:
|
| 299 |
task_abbreviation_legend = gr.update(
|
| 300 |
visible=False,
|
|
|
|
| 325 |
visible=True,
|
| 326 |
)
|
| 327 |
|
| 328 |
+
model_tournament_results_table = fetch_model_tournament_results_table(selected_submission_id, category, use_corrected_p_value)
|
| 329 |
+
model_tournament_results_table_csv = fetch_model_tournament_results_table_csv(selected_submission_id, category, use_corrected_p_value)
|
| 330 |
+
|
| 331 |
+
kind_of_p_value = leaderboard_server.KINDS_OF_P_VALUE[int(use_corrected_p_value)]
|
| 332 |
|
| 333 |
leaderboard = gr.update(
|
| 334 |
+
value=leaderboard_server.get_leaderboard(category=category, kind_of_p_value=kind_of_p_value),
|
| 335 |
visible=True,
|
| 336 |
)
|
| 337 |
leaderboard_csv = gr.update(
|
| 338 |
+
value=leaderboard_server.get_leaderboard_csv(category=category, kind_of_p_value=kind_of_p_value),
|
| 339 |
visible=True,
|
| 340 |
)
|
| 341 |
leaderboard_scatter_plot = gr.update(
|
| 342 |
+
value=leaderboard_server.get_leaderboard_scatter_plot(category=category, kind_of_p_value=kind_of_p_value),
|
| 343 |
visible=True,
|
| 344 |
)
|
| 345 |
|
|
|
|
| 560 |
</script>
|
| 561 |
"""
|
| 562 |
|
| 563 |
+
CHECKBOX_USE_CORRECTED_P_VALUE_INFO = "Switch to False Discovery Rate (FDR) guarantees"
|
| 564 |
+
CHECKBOX_USE_CORRECTED_P_VALUE_LABEL = "FDR guarantees"
|
| 565 |
+
|
| 566 |
def gradio_app():
|
| 567 |
with gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css, head=custom_js) as main:
|
| 568 |
check_significance_is_reachable_timer = gr.Timer(
|
|
|
|
| 665 |
interactive=True,
|
| 666 |
)
|
| 667 |
|
| 668 |
+
with gr.Row():
|
| 669 |
+
leaderboard_use_corrected_p_value = gr.Checkbox(
|
| 670 |
+
info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
|
| 671 |
+
label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
|
| 672 |
+
interactive=True,
|
| 673 |
+
)
|
| 674 |
+
|
| 675 |
with gr.Row():
|
| 676 |
leaderboard_table = gr.DataFrame(
|
| 677 |
leaderboard_server.get_leaderboard(category=leaderboard_server.TASKS_CATEGORY_OVERALL),
|
|
|
|
| 743 |
visible=False,
|
| 744 |
)
|
| 745 |
|
| 746 |
+
for _leaderboard_form_input in [
|
| 747 |
+
leaderboard_category_of_tasks,
|
| 748 |
+
leaderboard_use_corrected_p_value,
|
| 749 |
+
]:
|
| 750 |
+
_leaderboard_form_input.change(
|
| 751 |
+
fn=change_leaderboard_category,
|
| 752 |
+
inputs=[
|
| 753 |
+
leaderboard_category_of_tasks,
|
| 754 |
+
leaderboard_use_corrected_p_value,
|
| 755 |
+
tournament_results_dropdown,
|
| 756 |
+
],
|
| 757 |
+
outputs=[
|
| 758 |
+
leaderboard_table,
|
| 759 |
+
leaderboard_table_csv,
|
| 760 |
+
leaderboard_table_legend,
|
| 761 |
+
leaderboard_scatter_plot,
|
| 762 |
+
tournament_results_title,
|
| 763 |
+
tournament_results_dropdown,
|
| 764 |
+
model_tournament_results_table,
|
| 765 |
+
model_tournament_results_table_csv,
|
| 766 |
+
],
|
| 767 |
+
)
|
| 768 |
|
| 769 |
tournament_results_dropdown.change(
|
| 770 |
fn=fetch_model_tournament_results_table,
|
| 771 |
inputs=[
|
| 772 |
tournament_results_dropdown,
|
| 773 |
leaderboard_category_of_tasks,
|
| 774 |
+
leaderboard_use_corrected_p_value,
|
| 775 |
],
|
| 776 |
outputs=model_tournament_results_table,
|
| 777 |
).then(
|
|
|
|
| 779 |
inputs=[
|
| 780 |
tournament_results_dropdown,
|
| 781 |
leaderboard_category_of_tasks,
|
| 782 |
+
leaderboard_use_corrected_p_value,
|
| 783 |
],
|
| 784 |
outputs=model_tournament_results_table_csv,
|
| 785 |
)
|
|
|
|
| 817 |
interactive=True,
|
| 818 |
)
|
| 819 |
|
| 820 |
+
with gr.Row():
|
| 821 |
+
model_details_use_corrected_p_value = gr.Checkbox(
|
| 822 |
+
info=CHECKBOX_USE_CORRECTED_P_VALUE_INFO,
|
| 823 |
+
label=CHECKBOX_USE_CORRECTED_P_VALUE_LABEL,
|
| 824 |
+
visible=False,
|
| 825 |
+
interactive=True,
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
with gr.Row():
|
| 829 |
model_details_model_tournament_results_table = gr.DataFrame(
|
| 830 |
value=None,
|
|
|
|
| 853 |
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
| 854 |
inputs=model_details_model_dropdown,
|
| 855 |
outputs=model_details_category_of_tasks
|
| 856 |
+
).then(
|
| 857 |
+
fn=lambda submission_id: gr.update(visible=True) if submission_id else gr.update(visible=False),
|
| 858 |
+
inputs=model_details_model_dropdown,
|
| 859 |
+
outputs=model_details_use_corrected_p_value
|
| 860 |
).then(
|
| 861 |
fn=fetch_model_tournament_results_table,
|
| 862 |
inputs=[
|
| 863 |
model_details_model_dropdown,
|
| 864 |
model_details_category_of_tasks,
|
| 865 |
+
model_details_use_corrected_p_value,
|
| 866 |
],
|
| 867 |
outputs=model_details_model_tournament_results_table
|
| 868 |
).then(
|
|
|
|
| 870 |
inputs=[
|
| 871 |
model_details_model_dropdown,
|
| 872 |
model_details_category_of_tasks,
|
| 873 |
+
model_details_use_corrected_p_value,
|
| 874 |
],
|
| 875 |
outputs=model_details_model_tournament_results_table_csv
|
| 876 |
)
|
| 877 |
|
| 878 |
+
for _model_details_form_input in [
|
| 879 |
+
model_details_category_of_tasks,
|
| 880 |
+
model_details_use_corrected_p_value,
|
| 881 |
+
]:
|
| 882 |
+
_model_details_form_input.change(
|
| 883 |
+
fn=fetch_model_tournament_results_table,
|
| 884 |
+
inputs=[
|
| 885 |
+
model_details_model_dropdown,
|
| 886 |
+
model_details_category_of_tasks,
|
| 887 |
+
model_details_use_corrected_p_value,
|
| 888 |
+
],
|
| 889 |
+
outputs=model_details_model_tournament_results_table,
|
| 890 |
+
).then(
|
| 891 |
+
fn=fetch_model_tournament_results_table_csv,
|
| 892 |
+
inputs=[
|
| 893 |
+
model_details_model_dropdown,
|
| 894 |
+
model_details_category_of_tasks,
|
| 895 |
+
model_details_use_corrected_p_value,
|
| 896 |
+
],
|
| 897 |
+
outputs=model_details_model_tournament_results_table_csv,
|
| 898 |
+
)
|
| 899 |
|
| 900 |
with gr.TabItem('Submission'):
|
| 901 |
with gr.Column():
|
server.py
CHANGED
|
@@ -277,6 +277,8 @@ class LeaderboardServer:
|
|
| 277 |
self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
|
| 278 |
self.MAX_LENGTH_OF_MODEL_TITLE = 28
|
| 279 |
self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
|
|
|
|
|
|
|
| 280 |
|
| 281 |
self.var_lock = ReadWriteLock()
|
| 282 |
self.submission_ids = set()
|
|
@@ -326,45 +328,45 @@ class LeaderboardServer:
|
|
| 326 |
|
| 327 |
categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
|
| 328 |
|
| 329 |
-
leaderboard_dataframes = {
|
| 330 |
-
category: self._get_leaderboard(category=category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
| 331 |
for category in categories
|
| 332 |
-
}
|
| 333 |
|
| 334 |
with self.var_lock.ro:
|
| 335 |
submission_ids = self.submission_ids
|
| 336 |
|
| 337 |
-
tournament_dataframes = {
|
| 338 |
submission_id: {
|
| 339 |
-
category: self._get_model_tournament_table(submission_id, category) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
| 340 |
for category in categories
|
| 341 |
}
|
| 342 |
for submission_id in submission_ids
|
| 343 |
-
}
|
| 344 |
|
| 345 |
with self.var_lock.rw:
|
| 346 |
self.leaderboard_dataframes = leaderboard_dataframes
|
| 347 |
self.tournament_dataframes = tournament_dataframes
|
| 348 |
|
| 349 |
-
leaderboard_dataframes_csv = {
|
| 350 |
category: self._dataframe_to_csv(
|
| 351 |
-
self._get_leaderboard(category=category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
| 352 |
-
f"Leaderboard - {category}.csv"
|
| 353 |
)
|
| 354 |
for category in categories
|
| 355 |
-
}
|
| 356 |
|
| 357 |
with self.var_lock.ro:
|
| 358 |
-
tournament_dataframes_csv = {
|
| 359 |
submission_id: {
|
| 360 |
category: self._dataframe_to_csv(
|
| 361 |
-
self._get_model_tournament_table(submission_id, category, to_csv=True) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
| 362 |
-
f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}.csv",
|
| 363 |
)
|
| 364 |
for category in categories
|
| 365 |
}
|
| 366 |
for submission_id in submission_ids
|
| 367 |
-
}
|
| 368 |
|
| 369 |
with self.var_lock.rw:
|
| 370 |
self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
|
|
@@ -554,30 +556,36 @@ class LeaderboardServer:
|
|
| 554 |
df_css.loc[i, c] = ''
|
| 555 |
return df_css
|
| 556 |
|
| 557 |
-
def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None):
|
|
|
|
|
|
|
| 558 |
if pre_submit == None:
|
| 559 |
with self.var_lock.ro:
|
| 560 |
-
return self.tournament_dataframes_csv[submission_id][category]
|
| 561 |
else:
|
| 562 |
return self._dataframe_to_csv(
|
| 563 |
-
self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True),
|
| 564 |
f"Tournament table - pre-submit - {category}.csv",
|
| 565 |
)
|
| 566 |
|
| 567 |
-
def get_model_tournament_table(self, submission_id, category, pre_submit=None):
|
|
|
|
|
|
|
| 568 |
if pre_submit == None:
|
| 569 |
with self.var_lock.ro:
|
| 570 |
-
return copy.copy(self.tournament_dataframes[submission_id][category])
|
| 571 |
else:
|
| 572 |
-
return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit)
|
| 573 |
|
| 574 |
-
def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False):
|
|
|
|
|
|
|
| 575 |
model_tournament_table = []
|
| 576 |
|
| 577 |
with self.var_lock.ro:
|
| 578 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 579 |
|
| 580 |
-
for competitor_id in tournament_results[submission_id].keys() - {submission_id}:
|
| 581 |
if competitor_id not in self.submission_id_to_data:
|
| 582 |
if pre_submit and competitor_id == pre_submit.submission_id:
|
| 583 |
data = pre_submit.data
|
|
@@ -590,13 +598,14 @@ class LeaderboardServer:
|
|
| 590 |
for task in self.TASKS_METADATA:
|
| 591 |
task_category = self.TASKS_METADATA[task]["category"]
|
| 592 |
if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 593 |
if to_csv:
|
| 594 |
-
match_results[task] =
|
| 595 |
else:
|
| 596 |
-
match_task_result_details
|
| 597 |
-
match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
|
| 598 |
-
match_task_result_details["significant"] = str(match_task_result_details["significant"]).lower() # originaly bool
|
| 599 |
-
match_task_result_significant = match_task_result_details["significant"]
|
| 600 |
match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
|
| 601 |
match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
|
| 602 |
|
|
@@ -654,7 +663,10 @@ class LeaderboardServer:
|
|
| 654 |
|
| 655 |
return True
|
| 656 |
|
| 657 |
-
def
|
|
|
|
|
|
|
|
|
|
| 658 |
tournament_results = copy.deepcopy(tournament_results)
|
| 659 |
|
| 660 |
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
|
@@ -665,7 +677,7 @@ class LeaderboardServer:
|
|
| 665 |
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
| 666 |
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
| 667 |
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
| 668 |
-
tournament_results[submission_id][competitor_id][task]["significant"] =
|
| 669 |
|
| 670 |
return tournament_results
|
| 671 |
|
|
@@ -680,17 +692,19 @@ class LeaderboardServer:
|
|
| 680 |
dataframe.to_csv(filepath, index=False)
|
| 681 |
return filepath
|
| 682 |
|
| 683 |
-
def get_leaderboard_scatter_plot(self, pre_submit=None, category=None):
|
| 684 |
import numpy as np
|
| 685 |
from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
|
| 686 |
|
|
|
|
|
|
|
| 687 |
#m = self.TASKS_METADATA
|
| 688 |
#tournament = self.tournament_results
|
| 689 |
name_map = self.submission_id_to_model_title
|
| 690 |
|
| 691 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 692 |
|
| 693 |
-
csv_file_path = self.leaderboard_dataframes_csv[self.TASKS_CATEGORY_OVERALL]
|
| 694 |
ldb_records = get_ldb_records(name_map, csv_file_path)
|
| 695 |
categories = self.TASKS_CATEGORIES
|
| 696 |
model_names = list(ldb_records.keys())
|
|
@@ -725,29 +739,32 @@ class LeaderboardServer:
|
|
| 725 |
|
| 726 |
return fig
|
| 727 |
|
| 728 |
-
def get_leaderboard_csv(self, pre_submit=None, category=None):
|
| 729 |
if pre_submit == None:
|
| 730 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
|
| 731 |
with self.var_lock.ro:
|
| 732 |
-
return self.leaderboard_dataframes_csv[category]
|
| 733 |
else:
|
| 734 |
return self._dataframe_to_csv(
|
| 735 |
-
self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True),
|
| 736 |
f"Leaderboard - pre-submit - {category}.csv",
|
| 737 |
)
|
| 738 |
|
| 739 |
-
def get_leaderboard(self, pre_submit=None, category=None):
|
| 740 |
if pre_submit == None:
|
| 741 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
|
| 742 |
with self.var_lock.ro:
|
| 743 |
-
return copy.copy(self.leaderboard_dataframes[category])
|
| 744 |
else:
|
| 745 |
-
return self._get_leaderboard(pre_submit=pre_submit, category=category)
|
| 746 |
|
| 747 |
-
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
| 748 |
with self.var_lock.ro:
|
| 749 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 750 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
|
|
|
| 751 |
|
| 752 |
if len(tournament_results) == 0:
|
| 753 |
return pd.DataFrame(columns=['No submissions yet'])
|
|
@@ -779,8 +796,13 @@ class LeaderboardServer:
|
|
| 779 |
num_of_wins = 0
|
| 780 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
| 781 |
num_of_competitors += 1
|
| 782 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 783 |
num_of_wins += 1
|
|
|
|
| 784 |
task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
|
| 785 |
win_score.setdefault(task_category, []).append(task_score)
|
| 786 |
|
|
@@ -1061,7 +1083,7 @@ class LeaderboardServer:
|
|
| 1061 |
print(f"Locked `submit_lock` for {submission_id = }")
|
| 1062 |
print(info_msg)
|
| 1063 |
|
| 1064 |
-
self.update_leaderboard()
|
| 1065 |
|
| 1066 |
if HF_FAKE_TOURNAMENT:
|
| 1067 |
tournament_results = self.fake_tournament(submission_id, file)
|
|
|
|
| 277 |
self.CATEGORY_TO_TASK_ABBREVIATION_TO_DETAILS = self._prepare_category_to_task_abbr_to_details()
|
| 278 |
self.MAX_LENGTH_OF_MODEL_TITLE = 28
|
| 279 |
self.DIR_DATAFRAMES_CSV = "./dataframes_csv"
|
| 280 |
+
self.DEFAULT_KIND_OF_P_VALUE = "p_value"
|
| 281 |
+
self.KINDS_OF_P_VALUE = ["p_value", "corrected_p_value"]
|
| 282 |
|
| 283 |
self.var_lock = ReadWriteLock()
|
| 284 |
self.submission_ids = set()
|
|
|
|
| 328 |
|
| 329 |
categories = [self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS] + sorted(self.TASKS_CATEGORIES)
|
| 330 |
|
| 331 |
+
leaderboard_dataframes = {kind_of_p_value: {
|
| 332 |
+
category: self._get_leaderboard(category=category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
| 333 |
for category in categories
|
| 334 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
| 335 |
|
| 336 |
with self.var_lock.ro:
|
| 337 |
submission_ids = self.submission_ids
|
| 338 |
|
| 339 |
+
tournament_dataframes = {kind_of_p_value: {
|
| 340 |
submission_id: {
|
| 341 |
+
category: self._get_model_tournament_table(submission_id, category, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity'])
|
| 342 |
for category in categories
|
| 343 |
}
|
| 344 |
for submission_id in submission_ids
|
| 345 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
| 346 |
|
| 347 |
with self.var_lock.rw:
|
| 348 |
self.leaderboard_dataframes = leaderboard_dataframes
|
| 349 |
self.tournament_dataframes = tournament_dataframes
|
| 350 |
|
| 351 |
+
leaderboard_dataframes_csv = {kind_of_p_value: {
|
| 352 |
category: self._dataframe_to_csv(
|
| 353 |
+
self._get_leaderboard(category=category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
| 354 |
+
f"Leaderboard - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv"
|
| 355 |
)
|
| 356 |
for category in categories
|
| 357 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
| 358 |
|
| 359 |
with self.var_lock.ro:
|
| 360 |
+
tournament_dataframes_csv = {kind_of_p_value: {
|
| 361 |
submission_id: {
|
| 362 |
category: self._dataframe_to_csv(
|
| 363 |
+
self._get_model_tournament_table(submission_id, category, to_csv=True, kind_of_p_value=kind_of_p_value) if not self.tournament_results_corrupted else pd.DataFrame(columns=['Corrupted, please check integrity']),
|
| 364 |
+
f"Tournament table - {self.submission_id_to_data[submission_id]['submission_metadata']['model_name'][:self.MAX_LENGTH_OF_MODEL_TITLE].replace('/', '_')} - {category}{' - FDR guarantees' if kind_of_p_value != self.DEFAULT_KIND_OF_P_VALUE else ''}.csv",
|
| 365 |
)
|
| 366 |
for category in categories
|
| 367 |
}
|
| 368 |
for submission_id in submission_ids
|
| 369 |
+
} for kind_of_p_value in self.KINDS_OF_P_VALUE}
|
| 370 |
|
| 371 |
with self.var_lock.rw:
|
| 372 |
self.leaderboard_dataframes_csv = leaderboard_dataframes_csv
|
|
|
|
| 556 |
df_css.loc[i, c] = ''
|
| 557 |
return df_css
|
| 558 |
|
| 559 |
+
def get_model_tournament_table_csv(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
|
| 560 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 561 |
+
|
| 562 |
if pre_submit == None:
|
| 563 |
with self.var_lock.ro:
|
| 564 |
+
return self.tournament_dataframes_csv[kind_of_p_value][submission_id][category]
|
| 565 |
else:
|
| 566 |
return self._dataframe_to_csv(
|
| 567 |
+
self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, to_csv=True, kind_of_p_value=kind_of_p_value),
|
| 568 |
f"Tournament table - pre-submit - {category}.csv",
|
| 569 |
)
|
| 570 |
|
| 571 |
+
def get_model_tournament_table(self, submission_id, category, pre_submit=None, kind_of_p_value=None):
|
| 572 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 573 |
+
|
| 574 |
if pre_submit == None:
|
| 575 |
with self.var_lock.ro:
|
| 576 |
+
return copy.copy(self.tournament_dataframes[kind_of_p_value][submission_id][category])
|
| 577 |
else:
|
| 578 |
+
return self._get_model_tournament_table(submission_id, category, pre_submit=pre_submit, kind_of_p_value=kind_of_p_value)
|
| 579 |
|
| 580 |
+
def _get_model_tournament_table(self, submission_id, category, pre_submit=None, to_csv=False, kind_of_p_value=None):
|
| 581 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 582 |
+
|
| 583 |
model_tournament_table = []
|
| 584 |
|
| 585 |
with self.var_lock.ro:
|
| 586 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 587 |
|
| 588 |
+
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
| 589 |
if competitor_id not in self.submission_id_to_data:
|
| 590 |
if pre_submit and competitor_id == pre_submit.submission_id:
|
| 591 |
data = pre_submit.data
|
|
|
|
| 598 |
for task in self.TASKS_METADATA:
|
| 599 |
task_category = self.TASKS_METADATA[task]["category"]
|
| 600 |
if category in (task_category, self.TASKS_CATEGORY_OVERALL, self.TASKS_CATEGORY_OVERALL_DETAILS):
|
| 601 |
+
match_task_result_details = dict.fromkeys(["significant", "corrected_p_value", "p_value"]) # order has impact to sorting DataFrame
|
| 602 |
+
match_task_result_details.update(copy.deepcopy(tournament_results[submission_id][competitor_id][task]))
|
| 603 |
+
match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
|
| 604 |
+
|
| 605 |
if to_csv:
|
| 606 |
+
match_results[task] = match_task_result_significant
|
| 607 |
else:
|
| 608 |
+
match_task_result_details["significant"] = str(match_task_result_significant).lower() # originaly bool
|
|
|
|
|
|
|
|
|
|
| 609 |
match_task_result_details = "\n".join(f"{k}: {v}" for k, v in match_task_result_details.items())
|
| 610 |
match_results[task] = f'<abbr title={xmlQuoteAttr(match_task_result_details)}>{match_task_result_significant}</abbr>'
|
| 611 |
|
|
|
|
| 663 |
|
| 664 |
return True
|
| 665 |
|
| 666 |
+
def _is_task_pval_significant(self, task_pval, alpha=0.05):
|
| 667 |
+
return bool(task_pval < alpha)
|
| 668 |
+
|
| 669 |
+
def _correct_significance_in_tournament_results(self, tournament_results):
|
| 670 |
tournament_results = copy.deepcopy(tournament_results)
|
| 671 |
|
| 672 |
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
|
|
|
| 677 |
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
| 678 |
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
| 679 |
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
| 680 |
+
tournament_results[submission_id][competitor_id][task]["significant"] = self._is_task_pval_significant(task_pval)
|
| 681 |
|
| 682 |
return tournament_results
|
| 683 |
|
|
|
|
| 692 |
dataframe.to_csv(filepath, index=False)
|
| 693 |
return filepath
|
| 694 |
|
| 695 |
+
def get_leaderboard_scatter_plot(self, pre_submit=None, category=None, kind_of_p_value=None):
|
| 696 |
import numpy as np
|
| 697 |
from analyze_winscore import get_ldb_records, create_scatter_plot_with_curve_with_variances_named
|
| 698 |
|
| 699 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 700 |
+
|
| 701 |
#m = self.TASKS_METADATA
|
| 702 |
#tournament = self.tournament_results
|
| 703 |
name_map = self.submission_id_to_model_title
|
| 704 |
|
| 705 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 706 |
|
| 707 |
+
csv_file_path = self.leaderboard_dataframes_csv[kind_of_p_value][self.TASKS_CATEGORY_OVERALL]
|
| 708 |
ldb_records = get_ldb_records(name_map, csv_file_path)
|
| 709 |
categories = self.TASKS_CATEGORIES
|
| 710 |
model_names = list(ldb_records.keys())
|
|
|
|
| 739 |
|
| 740 |
return fig
|
| 741 |
|
| 742 |
+
def get_leaderboard_csv(self, pre_submit=None, category=None, kind_of_p_value=None):
|
| 743 |
if pre_submit == None:
|
| 744 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 745 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 746 |
with self.var_lock.ro:
|
| 747 |
+
return self.leaderboard_dataframes_csv[kind_of_p_value][category]
|
| 748 |
else:
|
| 749 |
return self._dataframe_to_csv(
|
| 750 |
+
self._get_leaderboard(pre_submit=pre_submit, category=category, to_csv=True, kind_of_p_value=kind_of_p_value),
|
| 751 |
f"Leaderboard - pre-submit - {category}.csv",
|
| 752 |
)
|
| 753 |
|
| 754 |
+
def get_leaderboard(self, pre_submit=None, category=None, kind_of_p_value=None):
|
| 755 |
if pre_submit == None:
|
| 756 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 757 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 758 |
with self.var_lock.ro:
|
| 759 |
+
return copy.copy(self.leaderboard_dataframes[kind_of_p_value][category])
|
| 760 |
else:
|
| 761 |
+
return self._get_leaderboard(pre_submit=pre_submit, category=category, kind_of_p_value=kind_of_p_value)
|
| 762 |
|
| 763 |
+
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False, kind_of_p_value=None):
|
| 764 |
with self.var_lock.ro:
|
| 765 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 766 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 767 |
+
kind_of_p_value = kind_of_p_value if kind_of_p_value else self.DEFAULT_KIND_OF_P_VALUE
|
| 768 |
|
| 769 |
if len(tournament_results) == 0:
|
| 770 |
return pd.DataFrame(columns=['No submissions yet'])
|
|
|
|
| 796 |
num_of_wins = 0
|
| 797 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
| 798 |
num_of_competitors += 1
|
| 799 |
+
|
| 800 |
+
match_task_result_details = tournament_results[submission_id][competitor_id][task]
|
| 801 |
+
match_task_result_significant = self._is_task_pval_significant(match_task_result_details[kind_of_p_value])
|
| 802 |
+
|
| 803 |
+
if match_task_result_significant:
|
| 804 |
num_of_wins += 1
|
| 805 |
+
|
| 806 |
task_score = num_of_wins / num_of_competitors * 100 if num_of_competitors > 0 else 100
|
| 807 |
win_score.setdefault(task_category, []).append(task_score)
|
| 808 |
|
|
|
|
| 1083 |
print(f"Locked `submit_lock` for {submission_id = }")
|
| 1084 |
print(info_msg)
|
| 1085 |
|
| 1086 |
+
self.update_leaderboard() # TODO: Přidat komentář proč to tady je. Nemělo by to být pouze při `do_submit == True`?
|
| 1087 |
|
| 1088 |
if HF_FAKE_TOURNAMENT:
|
| 1089 |
tournament_results = self.fake_tournament(submission_id, file)
|