Maria Castellanos
Add code for CLD
24d6e19
raw
history blame
6.53 kB
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from string import ascii_lowercase, ascii_uppercase
import tqdm
import pandas as pd
CLD_ALPHABET = list(ascii_lowercase) + list(ascii_uppercase)
def asserts_non_significance(col: list[bool], i: int, j: int) -> bool:
"""Assert whether i and j are represented as non-significant in the column
i.e., if the corresponding values in the column are different
Parameters
----------
col : list[bool]
current column
i : int
index of first treatment
j : int
index of second treatment
Returns
-------
bool
If the non-significance is represented accurately
"""
return col[i] and col[j]
def insert(column: list[bool], i: int, j: int):
"""Duplicates column and in one of its copies flip entry i to 0,
and in the other copy flip entry j to 0
Parameters
----------
column : list[bool]
Original column
i : int
Index of first group
j : int
Index of second group
Returns
-------
list[bool], list[bool]
New columns after duplication and flip
"""
col_i = column.copy()
col_j = column.copy()
col_i[i] = False
col_j[j] = False
return col_i, col_j
def can_be_absorbed(new_col: list[bool], ref_col: list[bool]) -> bool:
"""An old column absorbs the new column
if it has a 1 in every row in which the new column has one
Parameters
----------
new_col : list[bool]
Column to add
ref_col : list[bool]
Old column we are checking if it can absorb new_col
Returns
-------
bool
Whether old column cand absorb new_col
"""
return all(ref_col[i] for i, x in enumerate(new_col) if x)
def absorb(new_column: list[bool], columns: list[list[bool]]) -> list[list[bool]]:
"""Absorb new column into existing columns if the condition allows
Parameters
----------
new_column : list[bool]
Column to add
columns : list[list[bool]]
existing columns
Returns
-------
list[list[bool]]
Columns after absorption
"""
if any(can_be_absorbed(new_column, c) for c in columns):
return columns
return columns + [new_column]
def cld(comparisons: pd.DataFrame) -> dict[str, str]:
"""
Compact Letter Display
Compute the compact letter display using the insert-absorb algorithm.
See the following papers for more information:
(1) https://doi.org/10.1016/j.csda.2006.09.035
(2) https://doi.org/10.1198/1061860043515
Parameters
----------
comparisons : pd.DataFrame
A DataFrame containing the pairwise comparisons produced by:
https://www.statsmodels.org/dev/generated/statsmodels.stats.multicomp.pairwise_tukeyhsd.html
"""
unique_groups = set(comparisons["group1"].unique())
unique_groups = unique_groups.union(set(comparisons["group2"].unique()))
unique_groups = list(unique_groups)
unique_groups_indices = {g: i for i, g in enumerate(unique_groups)}
sig_diff = comparisons[comparisons["reject"]]
print(f"Found {len(sig_diff)} significantly different pairs")
# Initialize CLD matrix for all unique groups/models, with "columns" as rows
solution = [[True] * len(unique_groups)]
for _, row in tqdm.tqdm(sig_diff.iterrows(), total=len(sig_diff)):
i = unique_groups_indices[row["group1"]]
j = unique_groups_indices[row["group2"]]
has_changed: bool = True
while has_changed:
has_changed = False
for idx in range(len(solution)):
if asserts_non_significance(solution[idx], i, j):
# Duplicate the column
col_i, col_j = insert(solution[idx], i, j)
# Remove the old column
solution.pop(idx)
# Try absorb the column in an old column
# Simply add it to the solution otherwise
solution = absorb(col_i, solution)
solution = absorb(col_j, solution)
has_changed = True
break
# Assign letters
letters = [""] * len(unique_groups)
for ci, col in enumerate(solution):
letter = CLD_ALPHABET[ci]
for idx, has_letter in enumerate(col):
if has_letter:
letters[idx] += letter
return {group: sorted(letter) for group, letter in zip(unique_groups, letters)}
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import tqdm
def add_cld_to_leaderboard(
leaderboard: pd.DataFrame,
scores: pd.DataFrame,
metric: str,
):
"""Add the compact letter display to the leaderboard.
Parameters
----------
leaderboard : pd.DataFrame
The full leaderboard DataFrame
scores : pd.DataFrame
The **raw** scores DataFrame, with all replicates from bootstrapping
metric_ : str
The metric label to calculate CLD for.
"""
ordered_methods = leaderboard["user"].values
scores = scores[["Sample", "user", metric]]
scores[metric] = scores[metric].astype(float)
# We compared methods using bootstrapping and the Tukey HSD test, presenting results via Compact Letter Display (CLD).
# While acknowledging that bootstrapping likely underestimates variance,
# we are not aware of better sampling techniques that fit the challenge format.
stats = pairwise_tukeyhsd(endog=scores[metric], groups=scores["user"])
# comparisons = stats.summary_frame()
# The version of statsmodel is for some reason not the latest, so we have to do small workaround to get summary_frame
summary_table = stats.summary()
# data attribute is a list of lists with column names as first element
data = summary_table.data[1:]
columns = summary_table.data[0]
comparisons = pd.DataFrame(data=data, columns=columns)
letter_mapping = {}
letter_code = cld(comparisons)
cld_column = [""] * len(leaderboard)
for idx, method in enumerate(ordered_methods):
try:
letters = letter_code[str(method)]
for letter in letters:
if letter not in letter_mapping:
letter_mapping[letter] = CLD_ALPHABET[len(letter_mapping)]
cld_column[idx] += letter_mapping[letter]
except KeyError: # Error with CLD for openadmet-dummy
cld_column[idx] = "None"
leaderboard["CLD"] = cld_column
return leaderboard