annotate-relevance

Sleeping

App Files Files Community

Orion Weller commited on Jul 11, 2023

Commit

a09b56d

1 Parent(s): 56649db

saliency maps

Browse files

Files changed (5) hide show

.gitignore +3 -1
analysis.py +93 -1
app.py +88 -11
dataset_loading.py +11 -2
requirements.txt +3 -1

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 datasets/
 __pycache__/
-env/

 datasets/
 __pycache__/
+env/
+.ipynb_checkpoints/
+*.ipynb

analysis.py CHANGED Viewed

@@ -1,8 +1,21 @@
 import pandas as pd
 import numpy as np
 import plotly.express as px
 import plotly.figure_factory as ff
 def results_to_df(results: dict, metric_name: str):
     metric_scores = []
@@ -38,4 +51,83 @@ def create_boxplot_diff(results1, results2, metric_name):
     x_axis = f"Difference in {metric_name} from 1 to 2"
     fig = px.histogram(pd.DataFrame({x_axis: diff}), x=x_axis, marginal="box")
-    return fig

 import pandas as pd
 import numpy as np
+import os
+import torch
+from transformers import pipeline
+import streamlit as st
 import plotly.express as px
 import plotly.figure_factory as ff
+from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization
+from captum.attr import visualization as viz
+from captum import attr
+from captum.attr._utils.visualization import format_word_importances, format_special_tokens, _get_color
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 def results_to_df(results: dict, metric_name: str):
     metric_scores = []
     x_axis = f"Difference in {metric_name} from 1 to 2"
     fig = px.histogram(pd.DataFrame({x_axis: diff}), x=x_axis, marginal="box")
+    return fig
+def summarize_attributions(attributions):
+    attributions = attributions.sum(dim=-1).squeeze(0)
+    attributions = attributions / torch.norm(attributions)
+    return attributions
+def get_words(words, importances):
+    words_colored = []
+    for word, importance in zip(words, importances[: len(words)]):
+        word = format_special_tokens(word)
+        color = _get_color(importance)
+        unwrapped_tag = '<span style="background-color: {color}; opacity:1.0; line-height:1.75">{word}</span>'.format(
+            color=color, word=word
+        )
+        words_colored.append(unwrapped_tag)
+    return words_colored
+@st.cache_resource
+def get_model(model_name: str):
+    if model_name == "MonoT5":
+        pipe = pipeline('text2text-generation',
+                model='castorini/monot5-small-msmarco-10k',
+                tokenizer='castorini/monot5-small-msmarco-10k',
+                device='cpu')
+        def formatter(query, doc):
+            return f"Query: {query} Document: {doc} Relevant:"
+    return pipe, formatter
+def prep_func(pipe, formatter):
+    # variables that only need to be run once
+    decoder_input_ids = pipe.tokenizer(["<pad>"], return_tensors="pt", add_special_tokens=False, truncation=True).input_ids.to('cpu')
+    decoder_embedding_layer = pipe.model.base_model.decoder.embed_tokens
+    decoder_inputs_emb = decoder_embedding_layer(decoder_input_ids)
+    token_false_id = pipe.tokenizer.get_vocab()['▁false']
+    token_true_id = pipe.tokenizer.get_vocab()["▁true"]
+    # this function needs to be run for each combination
+    @st.cache_data
+    def get_saliency(query, doc):
+        input_ids = pipe.tokenizer(
+                [formatter(query, doc)],
+                padding=False,
+                truncation=True,
+                return_tensors="pt",
+                max_length=pipe.tokenizer.model_max_length,
+        )["input_ids"].to('cpu')
+        embedding_layer = pipe.model.base_model.encoder.embed_tokens
+        inputs_emb = embedding_layer(input_ids)
+        def forward_from_embeddings(inputs_embeds, decoder_inputs_embeds):
+            logits = pipe.model.forward(inputs_embeds=inputs_embeds, decoder_inputs_embeds=decoder_inputs_embeds)['logits'][:, -1, :]
+            batch_scores = logits[:, [token_false_id, token_true_id]]
+            batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
+            scores = batch_scores[:, 1].exp() # relevant token
+            return scores
+        lig = attr.Saliency(forward_from_embeddings)
+        attributions_ig, delta = lig.attribute(
+            inputs=(inputs_emb, decoder_inputs_emb)
+        )
+        attributions_normed = summarize_attributions(attributions_ig)
+        return "\n".join(get_words(pipe.tokenizer.convert_ids_to_tokens(input_ids.squeeze(0).tolist()), attributions_normed))
+    return get_saliency
+if __name__ == "__main__":
+    query = "how to add dll to visual studio?"
+    doc = "StackOverflow In the days of 16-bit Windows, a WPARAM was a 16-bit word, while LPARAM was a 32-bit long. These distinctions went away in Win32; they both became 32-bit values. ... WPARAM is defined as UINT_PTR , which in 64-bit Windows is an unsigned, 64-bit value."
+    model, formatter = get_model("MonoT5")
+    get_saliency = prep_func(model, formatter)
+    print(get_saliency(query, doc))

app.py CHANGED Viewed

@@ -13,9 +13,10 @@ import plotly.express as px
 from constants import ALL_DATASETS, ALL_METRICS
 from dataset_loading import get_dataset, load_run, load_local_qrels, load_local_corpus, load_local_queries
-from analysis import create_boxplot_1df, create_boxplot_2df, create_boxplot_diff
 st.set_page_config(layout="wide")
@@ -41,6 +42,7 @@ def check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus)
             return True
     return False
 def validate(config_option, file_loaded):
     if config_option != "None" and file_loaded is None:
         st.error("Please upload a file for " + config_option)
@@ -90,6 +92,14 @@ with st.sidebar:
     incorrect_only = st.checkbox("Show only incorrect instances", value=False)
     one_better_than_two = st.checkbox("Show only instances where run 1 is better than run 2", value=False)
     two_better_than_one = st.checkbox("Show only instances where run 2 is better than run 1", value=False)
     advanced_options1 = st.checkbox("Show advanced options for Run 1", value=False)
     doc_expansion1 = doc_expansion2 = None
     query_expansion1 = query_expansion2 = None
@@ -307,9 +317,16 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                     if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel:
                         alt_text = doc_expansion1[docid]["text"]
                         text = combine(text, alt_text, run1_uses_doc_expansion)
-                    st.text_area(f"{docid}:", text)
                 pred_doc = run1_pandas[run1_pandas.doc_id.isin(relevant_docs)]
                 rank_pred = pred_doc[pred_doc.qid == str(inst_num)]["rank"].tolist()
@@ -320,6 +337,7 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                     ranking_str = "--"
                 rank_col.metric(f"Rank of Relevant Doc(s)", ranking_str)
                 st.divider()
                 # top ranked
@@ -336,10 +354,22 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                         for d_idx, doc in enumerate(run1_top_n_docs):
                             alt_text = run1_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
-                            st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}")
                     else:
                         for d_idx, doc in enumerate(run1_top_n_docs):
-                            st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}")
                     st.divider()
             # none checked
@@ -384,20 +414,28 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                     combined_text2 = combine(query_text_og, alt_text2, run2_uses_query_expansion)
                     col_run1.markdown(combined_text1)
                     col_run2.markdown(combined_text2)
                 elif run1_uses_query_expansion != "None":
                     alt_text = query_expansion1[str(inst_num)]
                     combined_text1 = combine(query_text_og, alt_text, run1_uses_query_expansion)
                     col_run1.markdown(combined_text1)
                     col_run2.markdown(query_text_og)
                 elif run2_uses_query_expansion != "None":
                     alt_text = query_expansion2[str(inst_num)]
                     combined_text2 = combine(query_text_og, alt_text, run2_uses_query_expansion)
                     col_run1.markdown(query_text_og)
                     col_run2.markdown(combined_text2)
                 else:
                     query_text = query_text_og
                     col_run1.markdown(query_text)
                     col_run2.markdown(query_text)
                 st.divider()
@@ -420,13 +458,27 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                     if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel1:
                         alt_text = doc_expansion1[docid]["text"]
                         text = combine(text, alt_text, run1_uses_doc_expansion)
-                    col_run1.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}1")
                 for (docid, title, text) in doc_texts:
                     if doc_expansion2 is not None and run2_uses_doc_expansion != "None" and not show_orig_rel2:
                         alt_text = doc_expansion2[docid]["text"]
                         text = combine(text, alt_text, run2_uses_doc_expansion)
-                    col_run2.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}2")
                 # top ranked
                 # NOTE: BEIR calls trec_eval which ranks by score, then doc_id for ties
@@ -474,10 +526,23 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                         for d_idx, doc in enumerate(run1_top_n_docs):
                             alt_text = run1_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
-                            col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}1")
                     else:
                         for d_idx, doc in enumerate(run1_top_n_docs):
-                            col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}1")
                 if col_run2.checkbox('Show top ranked documents for Run 2', key=f"{inst_index}top-2run"):
                     col_run2.subheader("Top N Ranked Documents")
@@ -492,10 +557,22 @@ if check_valid_args(run1_file, run2_file, dataset_name, qrels, queries, corpus):
                         for d_idx, doc in enumerate(run2_top_n_docs):
                             alt_text = run2_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run2_uses_doc_expansion)
-                            col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}2")
                     else:
                         for d_idx, doc in enumerate(run2_top_n_docs):
-                            col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}2")
                 st.divider()

 from constants import ALL_DATASETS, ALL_METRICS
 from dataset_loading import get_dataset, load_run, load_local_qrels, load_local_corpus, load_local_queries
+from analysis import create_boxplot_1df, create_boxplot_2df, create_boxplot_diff, get_model, prep_func
+os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
 st.set_page_config(layout="wide")
             return True
     return False
 def validate(config_option, file_loaded):
     if config_option != "None" and file_loaded is None:
         st.error("Please upload a file for " + config_option)
     incorrect_only = st.checkbox("Show only incorrect instances", value=False)
     one_better_than_two = st.checkbox("Show only instances where run 1 is better than run 2", value=False)
     two_better_than_one = st.checkbox("Show only instances where run 2 is better than run 1", value=False)
+    use_model_saliency = st.checkbox("Use model saliency (slow!)", value=False)
+    if use_model_saliency:
+        # choose from a list of models
+        model_name = st.selectbox("Choose from a list of models", ["MonoT5"])
+        model, formatter = get_model("MonoT5")
+        get_saliency = prep_func(model, formatter)
     advanced_options1 = st.checkbox("Show advanced options for Run 1", value=False)
     doc_expansion1 = doc_expansion2 = None
     query_expansion1 = query_expansion2 = None
                     if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel:
                         alt_text = doc_expansion1[docid]["text"]
                         text = combine(text, alt_text, run1_uses_doc_expansion)
+                    if use_model_saliency:
+                        if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency", value=False):
+                            st.markdown(get_saliency(query_text, doc_texts),unsafe_allow_html=True)
+                        else:
+                            st.text_area(f"{docid}:", text)
+                    else:
+                        st.text_area(f"{docid}:", text)
                 pred_doc = run1_pandas[run1_pandas.doc_id.isin(relevant_docs)]
                 rank_pred = pred_doc[pred_doc.qid == str(inst_num)]["rank"].tolist()
                     ranking_str = "--"
                 rank_col.metric(f"Rank of Relevant Doc(s)", ranking_str)
                 st.divider()
                 # top ranked
                         for d_idx, doc in enumerate(run1_top_n_docs):
                             alt_text = run1_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
+                            if use_model_saliency:
+                                if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency", value=False):
+                                    st.markdown(get_saliency(query_text, doc_text),unsafe_allow_html=True)
+                                else:
+                                    st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}")
+                            else:
+                                st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}")
                     else:
                         for d_idx, doc in enumerate(run1_top_n_docs):
+                            if use_model_saliency:
+                                if st.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked", value=False):
+                                    st.markdown(get_saliency(query_text, doc),unsafe_allow_html=True)
+                                else:
+                                    st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}")
+                            else:
+                                st.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}")
                     st.divider()
             # none checked
                     combined_text2 = combine(query_text_og, alt_text2, run2_uses_query_expansion)
                     col_run1.markdown(combined_text1)
                     col_run2.markdown(combined_text2)
+                    query_text1 = combined_text1
+                    query_text2 = combined_text2
                 elif run1_uses_query_expansion != "None":
                     alt_text = query_expansion1[str(inst_num)]
                     combined_text1 = combine(query_text_og, alt_text, run1_uses_query_expansion)
                     col_run1.markdown(combined_text1)
                     col_run2.markdown(query_text_og)
+                    query_text1 = combined_text1
+                    query_text2 = query_text_og
                 elif run2_uses_query_expansion != "None":
                     alt_text = query_expansion2[str(inst_num)]
                     combined_text2 = combine(query_text_og, alt_text, run2_uses_query_expansion)
                     col_run1.markdown(query_text_og)
                     col_run2.markdown(combined_text2)
+                    query_text1 = query_text_og
+                    query_text2 = combined_text2
                 else:
                     query_text = query_text_og
                     col_run1.markdown(query_text)
                     col_run2.markdown(query_text)
+                    query_text1 = query_text
+                    query_text2 = query_text
                 st.divider()
                     if doc_expansion1 is not None and run1_uses_doc_expansion != "None" and not show_orig_rel1:
                         alt_text = doc_expansion1[docid]["text"]
                         text = combine(text, alt_text, run1_uses_doc_expansion)
+                    if use_model_saliency:
+                        if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{docid}relevant", value=False):
+                            col_run1.markdown(get_saliency(query_text1, text),unsafe_allow_html=True)
+                        else:
+                            col_run1.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}1")
+                    else:
+                        col_run1.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}1")
                 for (docid, title, text) in doc_texts:
                     if doc_expansion2 is not None and run2_uses_doc_expansion != "None" and not show_orig_rel2:
                         alt_text = doc_expansion2[docid]["text"]
                         text = combine(text, alt_text, run2_uses_doc_expansion)
+                    if use_model_saliency:
+                        if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{docid}relevant2", value=False):
+                            col_run2.markdown(get_saliency(query_text2, text),unsafe_allow_html=True)
+                        else:
+                            col_run2.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}2")
+                    else:
+                        col_run2.text_area(f"{docid}:", text, key=f"{inst_num}doc{docid}2")
                 # top ranked
                 # NOTE: BEIR calls trec_eval which ranks by score, then doc_id for ties
                         for d_idx, doc in enumerate(run1_top_n_docs):
                             alt_text = run1_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run1_uses_doc_expansion)
+                            if use_model_saliency:
+                                if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked1", value=False):
+                                    col_run1.markdown(get_saliency(query_text1, doc_text),unsafe_allow_html=True)
+                                else:
+                                    col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}1")
+                            else:
+                                col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}1")
                     else:
                         for d_idx, doc in enumerate(run1_top_n_docs):
+                            if use_model_saliency:
+                                if col_run1.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked1", value=False):
+                                    col_run1.markdown(get_saliency(query_text1, doc),unsafe_allow_html=True)
+                                else:
+                                    col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}1")
+                            else:
+                                col_run1.text_area(f"{run1_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}1")
                 if col_run2.checkbox('Show top ranked documents for Run 2', key=f"{inst_index}top-2run"):
                     col_run2.subheader("Top N Ranked Documents")
                         for d_idx, doc in enumerate(run2_top_n_docs):
                             alt_text = run2_top_n_docs_alt[d_idx]["text"]
                             doc_text = combine(doc["text"], alt_text, run2_uses_doc_expansion)
+                            if use_model_saliency:
+                                if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked2", value=False):
+                                    col_run2.markdown(get_saliency(query_text2, doc_text),unsafe_allow_html=True)
+                                else:
+                                    col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}2")
+                            else:
+                                col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc_text, key=f"{inst_num}doc{d_idx}2")
                     else:
                         for d_idx, doc in enumerate(run2_top_n_docs):
+                            if use_model_saliency:
+                                if col_run2.checkbox("Show Model Saliency", key=f"{inst_index}model_saliency{d_idx}ranked2", value=False):
+                                    col_run2.markdown(get_saliency(query_text2, doc),unsafe_allow_html=True)
+                                else:
+                                    col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}2")
+                            else:
+                                col_run2.text_area(f"{run2_top_n['doc_id'].iloc[d_idx]}: ", doc["text"], key=f"{inst_num}doc{d_idx}2")
                 st.divider()

dataset_loading.py CHANGED Viewed

@@ -14,6 +14,8 @@ import ir_datasets
 from constants import BEIR, IR_DATASETS, LOCAL_DATASETS
 def load_local_corpus(corpus_file, columns_to_combine=["title", "text"]):
     if corpus_file is None:
         return None
@@ -39,6 +41,8 @@ def load_local_corpus(corpus_file, columns_to_combine=["title", "text"]):
             }
     return did2text
 def load_local_queries(queries_file):
     if queries_file is None:
         return None
@@ -60,6 +64,8 @@ def load_local_queries(queries_file):
             qid2text[inst[id_key]] = inst["text"]
     return qid2text
 def load_local_qrels(qrels_file):
     if qrels_file is None:
         return None
@@ -84,6 +90,7 @@ def load_local_qrels(qrels_file):
     return qid2did2label
 def load_run(f_run):
     run = pytrec_eval.parse_run(copy.deepcopy(f_run))
     # convert bytes to strings for keys
@@ -102,7 +109,7 @@ def load_run(f_run):
     return new_run, run_pandas
 def load_jsonl(f):
     did2text = defaultdict(list)
     sub_did2text = {}
@@ -126,7 +133,7 @@ def load_jsonl(f):
     return did2text, sub_did2text
 def get_beir(dataset: str):
     url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)
     out_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "datasets")
@@ -134,6 +141,7 @@ def get_beir(dataset: str):
     return GenericDataLoader(data_folder=data_path).load(split="test")
 def get_ir_datasets(dataset_name: str):
     dataset = ir_datasets.load(dataset_name)
     queries = {}
@@ -145,6 +153,7 @@ def get_ir_datasets(dataset_name: str):
     return dataset.doc_store(), queries, dataset.qrels_dict()
 def get_dataset(dataset_name: str):
     if dataset_name == "":
         return {}, {}, {}

 from constants import BEIR, IR_DATASETS, LOCAL_DATASETS
+@st.cache_data
 def load_local_corpus(corpus_file, columns_to_combine=["title", "text"]):
     if corpus_file is None:
         return None
             }
     return did2text
+@st.cache_data
 def load_local_queries(queries_file):
     if queries_file is None:
         return None
             qid2text[inst[id_key]] = inst["text"]
     return qid2text
+@st.cache_data
 def load_local_qrels(qrels_file):
     if qrels_file is None:
         return None
     return qid2did2label
+@st.cache_data
 def load_run(f_run):
     run = pytrec_eval.parse_run(copy.deepcopy(f_run))
     # convert bytes to strings for keys
     return new_run, run_pandas
+@st.cache_data
 def load_jsonl(f):
     did2text = defaultdict(list)
     sub_did2text = {}
     return did2text, sub_did2text
+@st.cache_data
 def get_beir(dataset: str):
     url = "https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/{}.zip".format(dataset)
     out_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "datasets")
     return GenericDataLoader(data_folder=data_path).load(split="test")
+@st.cache_data
 def get_ir_datasets(dataset_name: str):
     dataset = ir_datasets.load(dataset_name)
     queries = {}
     return dataset.doc_store(), queries, dataset.qrels_dict()
+@st.cache_data
 def get_dataset(dataset_name: str):
     if dataset_name == "":
         return {}, {}, {}

requirements.txt CHANGED Viewed

@@ -5,4 +5,6 @@ streamlit==1.24.1
 ir_datasets==0.5.5
 pyserini==0.21.0
 torch==2.0.1
-plotly==5.15.0

 ir_datasets==0.5.5
 pyserini==0.21.0
 torch==2.0.1
+plotly==5.15.0
+captum==0.6.0
+protobuf==4.21.11