Spaces:

cifkao
/

context-probing

Running

App Files Files Community

cifkao commited on May 25, 2023

Commit

3102d58

1 Parent(s): 6f46ddf

More efficient NLL implementation

Browse files

Files changed (1) hide show

app.py +31 -11

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ def get_windows_batched(examples: BatchEncoding, window_len: int, stride: int =
     return BatchEncoding({
         k: [
             t[i][j : j + window_len] + [
-                pad_id if k == "input_ids" else 0
             ] * (j + window_len - len(t[i]))
             for i in range(len(examples["input_ids"]))
             for j in range(0, len(examples["input_ids"][i]) - 1, stride)
@@ -43,7 +43,10 @@ def ids_to_readable_tokens(tokenizer, ids, strip_whitespace=False):
     return result
 def nll_score(logprobs, labels):
-    return -logprobs[:, torch.arange(len(labels)), labels]
 def kl_div_score(logprobs):
     log_p = logprobs[
@@ -75,8 +78,18 @@ if not compact_layout:
         """
     )
 model_name = st.selectbox("Model", ["distilgpt2", "gpt2", "EleutherAI/gpt-neo-125m"])
-metric_name = st.selectbox("Metric", ["KL divergence", "NLL loss"], index=0)
 tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
@@ -84,9 +97,10 @@ tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)
 MAX_MEM = 4e9 / (torch.finfo(torch.float16).bits / 8)
 # Select window lengths such that we are allowed to fill the whole window without running out of memory
 # (otherwise the window length is irrelevant)
 window_len_options = [
     w for w in [8, 16, 32, 64, 128, 256, 512, 1024]
-    if w == 8 or w * (2 * w) * tokenizer.vocab_size <= MAX_MEM
 ]
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
@@ -95,7 +109,8 @@ window_len = st.select_slider(
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM
-max_tokens = int(MAX_MEM / (tokenizer.vocab_size * window_len) - window_len)
 DEFAULT_TEXT = """
 We present context length probing, a novel explanation technique for causal
@@ -117,6 +132,7 @@ if tokenizer.eos_token:
     text += tokenizer.eos_token
 inputs = tokenizer([text])
 [input_ids] = inputs["input_ids"]
 num_user_tokens = len(input_ids) - (1 if tokenizer.eos_token else 0)
 if num_user_tokens < 1:
@@ -160,13 +176,17 @@ def run_context_length_probing(_model, _tokenizer, _inputs, window_len, metric,
         for i in range(0, num_items, batch_size):
             pbar.progress(i / num_items, f"{i}/{num_items}")
             batch = {k: v[i:i + batch_size] for k, v in inputs_sliding.items()}
-            logprobs.append(
-                get_logprobs(
-                    _model,
-                    batch,
-                    cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
-                )
             )
         logprobs = torch.cat(logprobs, dim=0)
         pbar.empty()

     return BatchEncoding({
         k: [
             t[i][j : j + window_len] + [
+                pad_id if k in ["input_ids", "labels"] else 0
             ] * (j + window_len - len(t[i]))
             for i in range(len(examples["input_ids"]))
             for j in range(0, len(examples["input_ids"][i]) - 1, stride)
     return result
 def nll_score(logprobs, labels):
+    if logprobs.shape[-1] == 1:
+        return -logprobs.squeeze(-1)
+    else:
+        return -logprobs[:, torch.arange(len(labels)), labels]
 def kl_div_score(logprobs):
     log_p = logprobs[
         """
     )
+generation_mode = False
+# st.radio("Mode", ["Standard", "Generation"], horizontal=True) == "Generation"
+# st.caption(
+#     "In standard mode, we analyze the model's predictions on the input text. "
+#     "In generation mode, we generate a continuation of the input text "
+#     "and visualize the contributions of different contexts to each generated token."
+# )
 model_name = st.selectbox("Model", ["distilgpt2", "gpt2", "EleutherAI/gpt-neo-125m"])
+metric_name = st.radio(
+    "Metric", (["KL divergence"] if not generation_mode else []) + ["NLL loss"], index=0, horizontal=True
+)
 tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)(model_name, use_fast=False)
 MAX_MEM = 4e9 / (torch.finfo(torch.float16).bits / 8)
 # Select window lengths such that we are allowed to fill the whole window without running out of memory
 # (otherwise the window length is irrelevant)
+logprobs_dim = tokenizer.vocab_size if metric_name == "KL divergence" else 1
 window_len_options = [
     w for w in [8, 16, 32, 64, 128, 256, 512, 1024]
+    if w == 8 or w * (2 * w) * logprobs_dim <= MAX_MEM
 ]
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM
+max_tokens = int(MAX_MEM / (logprobs_dim * window_len) - window_len)
+max_tokens = min(max_tokens, 2048)
 DEFAULT_TEXT = """
 We present context length probing, a novel explanation technique for causal
     text += tokenizer.eos_token
 inputs = tokenizer([text])
 [input_ids] = inputs["input_ids"]
+inputs["labels"] = [[*input_ids[1:], tokenizer.eos_token_id]]
 num_user_tokens = len(input_ids) - (1 if tokenizer.eos_token else 0)
 if num_user_tokens < 1:
         for i in range(0, num_items, batch_size):
             pbar.progress(i / num_items, f"{i}/{num_items}")
             batch = {k: v[i:i + batch_size] for k, v in inputs_sliding.items()}
+            batch_logprobs = get_logprobs(
+                _model,
+                batch,
+                cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
             )
+            batch_labels = batch["labels"]
+            if metric != "KL divergence":
+                batch_logprobs = torch.gather(
+                    batch_logprobs, dim=-1, index=batch_labels[..., None]
+                )
+            logprobs.append(batch_logprobs)
         logprobs = torch.cat(logprobs, dim=0)
         pbar.empty()