Spaces:

cifkao
/

context-probing

Running

App Files Files Community

cifkao commited on May 25, 2023

Commit

dca8e6c

1 Parent(s): 3102d58

Adjust limit, turn off caching for logprobs

Browse files

Files changed (1) hide show

app.py +7 -9

app.py CHANGED Viewed

@@ -96,11 +96,12 @@ tokenizer = st.cache_resource(AutoTokenizer.from_pretrained, show_spinner=False)
 # Make sure the logprobs do not use up more than ~4 GB of memory
 MAX_MEM = 4e9 / (torch.finfo(torch.float16).bits / 8)
 # Select window lengths such that we are allowed to fill the whole window without running out of memory
-# (otherwise the window length is irrelevant)
-logprobs_dim = tokenizer.vocab_size if metric_name == "KL divergence" else 1
 window_len_options = [
     w for w in [8, 16, 32, 64, 128, 256, 512, 1024]
-    if w == 8 or w * (2 * w) * logprobs_dim <= MAX_MEM
 ]
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
@@ -109,8 +110,7 @@ window_len = st.select_slider(
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM
-max_tokens = int(MAX_MEM / (logprobs_dim * window_len) - window_len)
-max_tokens = min(max_tokens, 2048)
 DEFAULT_TEXT = """
 We present context length probing, a novel explanation technique for causal
@@ -151,10 +151,8 @@ with st.spinner("Loading model…"):
 window_len = min(window_len, len(input_ids))
-@st.cache_data(show_spinner=False)
 @torch.inference_mode()
-def get_logprobs(_model, _inputs, cache_key):
-    del cache_key
     return _model(**_inputs).logits.log_softmax(dim=-1).to(torch.float16)
 @st.cache_data(show_spinner=False)
@@ -179,7 +177,7 @@ def run_context_length_probing(_model, _tokenizer, _inputs, window_len, metric,
             batch_logprobs = get_logprobs(
                 _model,
                 batch,
-                cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
             )
             batch_labels = batch["labels"]
             if metric != "KL divergence":

 # Make sure the logprobs do not use up more than ~4 GB of memory
 MAX_MEM = 4e9 / (torch.finfo(torch.float16).bits / 8)
 # Select window lengths such that we are allowed to fill the whole window without running out of memory
+# (otherwise the window length is irrelevant); if using NLL, memory is not a consideration, but we want
+# to limit runtime
+multiplier = tokenizer.vocab_size if metric_name == "KL divergence" else 16384  # arbitrary number
 window_len_options = [
     w for w in [8, 16, 32, 64, 128, 256, 512, 1024]
+    if w == 8 or w * (2 * w) * multiplier <= MAX_MEM
 ]
 window_len = st.select_slider(
     r"Window size ($c_\text{max}$)",
 )
 # Now figure out how many tokens we are allowed to use:
 # window_len * (num_tokens + window_len) * vocab_size <= MAX_MEM
+max_tokens = int(MAX_MEM / (multiplier * window_len) - window_len)
 DEFAULT_TEXT = """
 We present context length probing, a novel explanation technique for causal
 window_len = min(window_len, len(input_ids))
 @torch.inference_mode()
+def get_logprobs(_model, _inputs):
     return _model(**_inputs).logits.log_softmax(dim=-1).to(torch.float16)
 @st.cache_data(show_spinner=False)
             batch_logprobs = get_logprobs(
                 _model,
                 batch,
+                #cache_key=(model_name, batch["input_ids"].cpu().numpy().tobytes())
             )
             batch_labels = batch["labels"]
             if metric != "KL divergence":