File size: 6,528 Bytes
24d6e19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from string import ascii_lowercase, ascii_uppercase
import tqdm
import pandas as pd
CLD_ALPHABET = list(ascii_lowercase) + list(ascii_uppercase)
def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:
"""Assert whether i and j are represented as non-significant in the column
i.e., if the corresponding values in the column are different
Parameters
----------
col : list[bool]
current column
i : int
index of first treatment
j : int
index of second treatment
Returns
-------
bool
If the non-significance is represented accurately
"""
return col[i] and col[j]
def insert(column: list[bool], i: int, j: int):
"""Duplicates column and in one of its copies flip entry i to 0,
and in the other copy flip entry j to 0
Parameters
----------
column : list[bool]
Original column
i : int
Index of first group
j : int
Index of second group
Returns
-------
list[bool], list[bool]
New columns after duplication and flip
"""
col_i = column.copy()
col_j = column.copy()
col_i[i] = False
col_j[j] = False
return col_i, col_j
def can_be_absorbed(new_col: list[bool], ref_col: list[bool]) -> bool:
"""An old column absorbs the new column
if it has a 1 in every row in which the new column has one
Parameters
----------
new_col : list[bool]
Column to add
ref_col : list[bool]
Old column we are checking if it can absorb new_col
Returns
-------
bool
Whether old column cand absorb new_col
"""
return all(ref_col[i] for i, x in enumerate(new_col) if x)
def absorb(new_column: list[bool], columns: list[list[bool]]) -> list[list[bool]]:
"""Absorb new column into existing columns if the condition allows
Parameters
----------
new_column : list[bool]
Column to add
columns : list[list[bool]]
existing columns
Returns
-------
list[list[bool]]
Columns after absorption
"""
if any(can_be_absorbed(new_column, c) for c in columns):
return columns
return columns + [new_column]
def cld(comparisons: pd.DataFrame) -> dict[str, str]:
"""
Compact Letter Display
Compute the compact letter display using the insert-absorb algorithm.
See the following papers for more information:
(1) https://doi.org/10.1016/j.csda.2006.09.035
(2) https://doi.org/10.1198/1061860043515
Parameters
----------
comparisons : pd.DataFrame
A DataFrame containing the pairwise comparisons produced by:
https://www.statsmodels.org/dev/generated/statsmodels.stats.multicomp.pairwise_tukeyhsd.html
"""
unique_groups = set(comparisons["group1"].unique())
unique_groups = unique_groups.union(set(comparisons["group2"].unique()))
unique_groups = list(unique_groups)
unique_groups_indices = {g: i for i, g in enumerate(unique_groups)}
sig_diff = comparisons[comparisons["reject"]]
print(f"Found {len(sig_diff)} significantly different pairs")
# Initialize CLD matrix for all unique groups/models, with "columns" as rows
solution = [[True] * len(unique_groups)]
for _, row in tqdm.tqdm(sig_diff.iterrows(), total=len(sig_diff)):
i = unique_groups_indices[row["group1"]]
j = unique_groups_indices[row["group2"]]
has_changed: bool = True
while has_changed:
has_changed = False
for idx in range(len(solution)):
if asserts_non_significance(solution[idx], i, j):
# Duplicate the column
col_i, col_j = insert(solution[idx], i, j)
# Remove the old column
solution.pop(idx)
# Try absorb the column in an old column
# Simply add it to the solution otherwise
solution = absorb(col_i, solution)
solution = absorb(col_j, solution)
has_changed = True
break
# Assign letters
letters = [""] * len(unique_groups)
for ci, col in enumerate(solution):
letter = CLD_ALPHABET[ci]
for idx, has_letter in enumerate(col):
if has_letter:
letters[idx] += letter
return {group: sorted(letter) for group, letter in zip(unique_groups, letters)}
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import tqdm
def add_cld_to_leaderboard(
leaderboard: pd.DataFrame,
scores: pd.DataFrame,
metric: str,
):
"""Add the compact letter display to the leaderboard.
Parameters
----------
leaderboard : pd.DataFrame
The full leaderboard DataFrame
scores : pd.DataFrame
The **raw** scores DataFrame, with all replicates from bootstrapping
metric_ : str
The metric label to calculate CLD for.
"""
ordered_methods = leaderboard["user"].values
scores = scores[["Sample", "user", metric]]
scores[metric] = scores[metric].astype(float)
# We compared methods using bootstrapping and the Tukey HSD test, presenting results via Compact Letter Display (CLD).
# While acknowledging that bootstrapping likely underestimates variance,
# we are not aware of better sampling techniques that fit the challenge format.
stats = pairwise_tukeyhsd(endog=scores[metric], groups=scores["user"])
# comparisons = stats.summary_frame()
# The version of statsmodel is for some reason not the latest, so we have to do small workaround to get summary_frame
summary_table = stats.summary()
# data attribute is a list of lists with column names as first element
data = summary_table.data[1:]
columns = summary_table.data[0]
comparisons = pd.DataFrame(data=data, columns=columns)
letter_mapping = {}
letter_code = cld(comparisons)
cld_column = [""] * len(leaderboard)
for idx, method in enumerate(ordered_methods):
try:
letters = letter_code[str(method)]
for letter in letters:
if letter not in letter_mapping:
letter_mapping[letter] = CLD_ALPHABET[len(letter_mapping)]
cld_column[idx] += letter_mapping[letter]
except KeyError: # Error with CLD for openadmet-dummy
cld_column[idx] = "None"
leaderboard["CLD"] = cld_column
return leaderboard |