Spaces:
Running
Running
Optimization of the significance correction
Browse files
server.py
CHANGED
|
@@ -314,6 +314,7 @@ class LeaderboardServer:
|
|
| 314 |
self.fetch_existing_models()
|
| 315 |
|
| 316 |
tournament_results = self.load_tournament_results()
|
|
|
|
| 317 |
with self.var_lock.rw:
|
| 318 |
self.tournament_results = tournament_results
|
| 319 |
|
|
@@ -519,6 +520,9 @@ class LeaderboardServer:
|
|
| 519 |
renew_tournament_ended_time_elapsed = renew_tournament_ended_datetime - renew_tournament_began_datetime
|
| 520 |
print(f"Time elapsed: {renew_tournament_ended_time_elapsed}")
|
| 521 |
|
|
|
|
|
|
|
|
|
|
| 522 |
gr.Info('Uploading tournament results...', duration=5)
|
| 523 |
if self.tournament_results:
|
| 524 |
self._upload_tournament_results(self.tournament_results)
|
|
@@ -569,7 +573,6 @@ class LeaderboardServer:
|
|
| 569 |
|
| 570 |
with self.var_lock.ro:
|
| 571 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 572 |
-
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
| 573 |
|
| 574 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
| 575 |
if competitor_id not in self.submission_id_to_data:
|
|
@@ -638,17 +641,28 @@ class LeaderboardServer:
|
|
| 638 |
dataframe = dataframe.style.apply(self._model_tournament_table_highlight_true_and_false, axis=None)
|
| 639 |
return dataframe
|
| 640 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
def _correct_significance_in_tournament_results(self, tournament_results, alpha=0.05):
|
| 642 |
tournament_results = copy.deepcopy(tournament_results)
|
| 643 |
|
| 644 |
-
|
| 645 |
-
for
|
| 646 |
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
|
|
|
| 652 |
|
| 653 |
return tournament_results
|
| 654 |
|
|
@@ -730,7 +744,6 @@ class LeaderboardServer:
|
|
| 730 |
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
| 731 |
with self.var_lock.ro:
|
| 732 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
| 733 |
-
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
| 734 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 735 |
|
| 736 |
if len(tournament_results) == 0:
|
|
@@ -1051,6 +1064,7 @@ class LeaderboardServer:
|
|
| 1051 |
tournament_results = self.fake_tournament(submission_id, file)
|
| 1052 |
else:
|
| 1053 |
tournament_results = self.start_tournament(submission_id, file)
|
|
|
|
| 1054 |
|
| 1055 |
pre_submit = self.PreSubmit(
|
| 1056 |
tournament_results,
|
|
|
|
| 314 |
self.fetch_existing_models()
|
| 315 |
|
| 316 |
tournament_results = self.load_tournament_results()
|
| 317 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
| 318 |
with self.var_lock.rw:
|
| 319 |
self.tournament_results = tournament_results
|
| 320 |
|
|
|
|
| 520 |
renew_tournament_ended_time_elapsed = renew_tournament_ended_datetime - renew_tournament_began_datetime
|
| 521 |
print(f"Time elapsed: {renew_tournament_ended_time_elapsed}")
|
| 522 |
|
| 523 |
+
gr.Info('Correcting significance in tournament results...', duration=5)
|
| 524 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
| 525 |
+
|
| 526 |
gr.Info('Uploading tournament results...', duration=5)
|
| 527 |
if self.tournament_results:
|
| 528 |
self._upload_tournament_results(self.tournament_results)
|
|
|
|
| 573 |
|
| 574 |
with self.var_lock.ro:
|
| 575 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
|
|
|
| 576 |
|
| 577 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
| 578 |
if competitor_id not in self.submission_id_to_data:
|
|
|
|
| 641 |
dataframe = dataframe.style.apply(self._model_tournament_table_highlight_true_and_false, axis=None)
|
| 642 |
return dataframe
|
| 643 |
|
| 644 |
+
def _is_correct_significance_in_tournament_results(self, tournament_results):
|
| 645 |
+
for submission_id in tournament_results:
|
| 646 |
+
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
| 647 |
+
for task in self.TASKS_METADATA:
|
| 648 |
+
for competitor_id in competitors:
|
| 649 |
+
if "corrected_p_value" not in tournament_results[submission_id][competitor_id][task]:
|
| 650 |
+
return False
|
| 651 |
+
|
| 652 |
+
return True
|
| 653 |
+
|
| 654 |
def _correct_significance_in_tournament_results(self, tournament_results, alpha=0.05):
|
| 655 |
tournament_results = copy.deepcopy(tournament_results)
|
| 656 |
|
| 657 |
+
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
| 658 |
+
for submission_id in tqdm.tqdm(tournament_results):
|
| 659 |
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
| 660 |
+
for task in self.TASKS_METADATA:
|
| 661 |
+
model_task_pvals = [tournament_results[submission_id][competitor_id][task]["p_value"] for competitor_id in competitors]
|
| 662 |
+
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
| 663 |
+
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
| 664 |
+
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
| 665 |
+
tournament_results[submission_id][competitor_id][task]["significant"] = bool(task_pval < alpha)
|
| 666 |
|
| 667 |
return tournament_results
|
| 668 |
|
|
|
|
| 744 |
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
| 745 |
with self.var_lock.ro:
|
| 746 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
|
|
|
| 747 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
| 748 |
|
| 749 |
if len(tournament_results) == 0:
|
|
|
|
| 1064 |
tournament_results = self.fake_tournament(submission_id, file)
|
| 1065 |
else:
|
| 1066 |
tournament_results = self.start_tournament(submission_id, file)
|
| 1067 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
| 1068 |
|
| 1069 |
pre_submit = self.PreSubmit(
|
| 1070 |
tournament_results,
|