|
|
from statsmodels.stats.multicomp import pairwise_tukeyhsd |
|
|
from string import ascii_lowercase, ascii_uppercase |
|
|
import tqdm |
|
|
import pandas as pd |
|
|
|
|
|
CLD_ALPHABET = list(ascii_lowercase) + list(ascii_uppercase) |
|
|
|
|
|
def asserts_non_significance(col: list[bool], i: int, j: int) -> bool: |
|
|
"""Assert whether i and j are represented as non-significant in the column |
|
|
i.e., if the corresponding values in the column are different |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
col : list[bool] |
|
|
current column |
|
|
i : int |
|
|
index of first treatment |
|
|
j : int |
|
|
index of second treatment |
|
|
|
|
|
Returns |
|
|
------- |
|
|
bool |
|
|
If the non-significance is represented accurately |
|
|
""" |
|
|
return col[i] and col[j] |
|
|
|
|
|
def insert(column: list[bool], i: int, j: int): |
|
|
"""Duplicates column and in one of its copies flip entry i to 0, |
|
|
and in the other copy flip entry j to 0 |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
column : list[bool] |
|
|
Original column |
|
|
i : int |
|
|
Index of first group |
|
|
j : int |
|
|
Index of second group |
|
|
|
|
|
Returns |
|
|
------- |
|
|
list[bool], list[bool] |
|
|
New columns after duplication and flip |
|
|
""" |
|
|
col_i = column.copy() |
|
|
col_j = column.copy() |
|
|
col_i[i] = False |
|
|
col_j[j] = False |
|
|
return col_i, col_j |
|
|
|
|
|
def can_be_absorbed(new_col: list[bool], ref_col: list[bool]) -> bool: |
|
|
"""An old column absorbs the new column |
|
|
if it has a 1 in every row in which the new column has one |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
new_col : list[bool] |
|
|
Column to add |
|
|
ref_col : list[bool] |
|
|
Old column we are checking if it can absorb new_col |
|
|
|
|
|
Returns |
|
|
------- |
|
|
bool |
|
|
Whether old column cand absorb new_col |
|
|
""" |
|
|
return all(ref_col[i] for i, x in enumerate(new_col) if x) |
|
|
|
|
|
def absorb(new_column: list[bool], columns: list[list[bool]]) -> list[list[bool]]: |
|
|
"""Absorb new column into existing columns if the condition allows |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
new_column : list[bool] |
|
|
Column to add |
|
|
columns : list[list[bool]] |
|
|
existing columns |
|
|
|
|
|
Returns |
|
|
------- |
|
|
list[list[bool]] |
|
|
Columns after absorption |
|
|
""" |
|
|
if any(can_be_absorbed(new_column, c) for c in columns): |
|
|
return columns |
|
|
return columns + [new_column] |
|
|
|
|
|
def cld(comparisons: pd.DataFrame) -> dict[str, str]: |
|
|
""" |
|
|
Compact Letter Display |
|
|
|
|
|
Compute the compact letter display using the insert-absorb algorithm. |
|
|
|
|
|
See the following papers for more information: |
|
|
(1) https://doi.org/10.1016/j.csda.2006.09.035 |
|
|
(2) https://doi.org/10.1198/1061860043515 |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
comparisons : pd.DataFrame |
|
|
A DataFrame containing the pairwise comparisons produced by: |
|
|
https://www.statsmodels.org/dev/generated/statsmodels.stats.multicomp.pairwise_tukeyhsd.html |
|
|
""" |
|
|
unique_groups = set(comparisons["group1"].unique()) |
|
|
unique_groups = unique_groups.union(set(comparisons["group2"].unique())) |
|
|
unique_groups = list(unique_groups) |
|
|
unique_groups_indices = {g: i for i, g in enumerate(unique_groups)} |
|
|
|
|
|
sig_diff = comparisons[comparisons["reject"]] |
|
|
print(f"Found {len(sig_diff)} significantly different pairs") |
|
|
|
|
|
|
|
|
solution = [[True] * len(unique_groups)] |
|
|
|
|
|
for _, row in tqdm.tqdm(sig_diff.iterrows(), total=len(sig_diff)): |
|
|
i = unique_groups_indices[row["group1"]] |
|
|
j = unique_groups_indices[row["group2"]] |
|
|
|
|
|
has_changed: bool = True |
|
|
while has_changed: |
|
|
has_changed = False |
|
|
|
|
|
for idx in range(len(solution)): |
|
|
if asserts_non_significance(solution[idx], i, j): |
|
|
|
|
|
col_i, col_j = insert(solution[idx], i, j) |
|
|
|
|
|
|
|
|
solution.pop(idx) |
|
|
|
|
|
|
|
|
|
|
|
solution = absorb(col_i, solution) |
|
|
solution = absorb(col_j, solution) |
|
|
|
|
|
has_changed = True |
|
|
break |
|
|
|
|
|
|
|
|
letters = [""] * len(unique_groups) |
|
|
|
|
|
for ci, col in enumerate(solution): |
|
|
letter = CLD_ALPHABET[ci] |
|
|
for idx, has_letter in enumerate(col): |
|
|
if has_letter: |
|
|
letters[idx] += letter |
|
|
|
|
|
return {group: sorted(letter) for group, letter in zip(unique_groups, letters)} |
|
|
|
|
|
from statsmodels.stats.multicomp import pairwise_tukeyhsd |
|
|
import tqdm |
|
|
|
|
|
def add_cld_to_leaderboard( |
|
|
leaderboard: pd.DataFrame, |
|
|
scores: pd.DataFrame, |
|
|
metric: str, |
|
|
): |
|
|
"""Add the compact letter display to the leaderboard. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
leaderboard : pd.DataFrame |
|
|
The full leaderboard DataFrame |
|
|
scores : pd.DataFrame |
|
|
The **raw** scores DataFrame, with all replicates from bootstrapping |
|
|
metric_ : str |
|
|
The metric label to calculate CLD for. |
|
|
""" |
|
|
ordered_methods = leaderboard["user"].values |
|
|
|
|
|
scores = scores[["Sample", "user", metric]] |
|
|
scores[metric] = scores[metric].astype(float) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stats = pairwise_tukeyhsd(endog=scores[metric], groups=scores["user"]) |
|
|
|
|
|
|
|
|
summary_table = stats.summary() |
|
|
|
|
|
data = summary_table.data[1:] |
|
|
columns = summary_table.data[0] |
|
|
comparisons = pd.DataFrame(data=data, columns=columns) |
|
|
|
|
|
letter_mapping = {} |
|
|
letter_code = cld(comparisons) |
|
|
|
|
|
cld_column = [""] * len(leaderboard) |
|
|
for idx, method in enumerate(ordered_methods): |
|
|
try: |
|
|
letters = letter_code[str(method)] |
|
|
|
|
|
for letter in letters: |
|
|
if letter not in letter_mapping: |
|
|
letter_mapping[letter] = CLD_ALPHABET[len(letter_mapping)] |
|
|
cld_column[idx] += letter_mapping[letter] |
|
|
except KeyError: |
|
|
cld_column[idx] = "None" |
|
|
|
|
|
leaderboard["CLD"] = cld_column |
|
|
|
|
|
return leaderboard |