Spaces:

Narsil
/

graph_spectrum

Sleeping

App Files Files Community

Narsil commited on Apr 28, 2023

Commit

cfc1bbd

1 Parent(s): 4c415fe

Adding directions exploration.

Browse files

Files changed (1) hide show

app.py +64 -7

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ alt.data_transformers.disable_max_rows()
 number_re = re.compile(r"\.[0-9]*\.")
 STATE_DICT = {}
 DATA = pd.DataFrame()
@@ -30,15 +31,20 @@ def scatter_plot_fn(group_name):
 def find_choices(state_dict):
     if not state_dict:
-        return []
     global DATA
-    layered_tensors = [k for k, v in state_dict.items() if number_re.findall(k) and len(v.shape) == 2]
     choices = set()
     data = []
     for name in layered_tensors:
         group_name = number_re.sub(".{N}.", name)
         choices.add(group_name)
         layer = int(number_re.search(name).group()[1:-1])
         svdvals = torch.linalg.svdvals(state_dict[name])
         svdvals /= svdvals.sum()
@@ -49,19 +55,64 @@ def find_choices(state_dict):
     DATA["val"] = DATA["val"].astype("float")
     DATA["layer"] = DATA["layer"].astype("category")
     DATA["rank"] = DATA["rank"].astype("int32")
-    return choices
 def weights_fn(model_id):
-    global STATE_DICT
     try:
         pipe = pipeline(model=model_id)
         STATE_DICT = pipe.model.state_dict()
     except Exception as e:
         print(e)
         STATE_DICT = {}
-    choices = find_choices(STATE_DICT)
-    return gr.Dropdown.update(choices=choices)
 with gr.Blocks() as scatter_plot:
@@ -69,10 +120,16 @@ with gr.Blocks() as scatter_plot:
         with gr.Column():
             model_id = gr.Textbox(label="model_id")
             weights = gr.Dropdown(label="weights")
         with gr.Column():
             plot = gr.LinePlot(show_label=False).style(container=True)
-    model_id.change(weights_fn, inputs=model_id, outputs=weights)
     weights.change(fn=scatter_plot_fn, inputs=weights, outputs=plot)
 if __name__ == "__main__":
     scatter_plot.launch()

 number_re = re.compile(r"\.[0-9]*\.")
 STATE_DICT = {}
+PIPE = None
 DATA = pd.DataFrame()
 def find_choices(state_dict):
     if not state_dict:
+        return [], []
     global DATA
+    layered_tensors = [
+        k for k, v in state_dict.items() if number_re.findall(k) and len(v.shape) == 2
+    ]
     choices = set()
     data = []
+    max_layer = 0
     for name in layered_tensors:
         group_name = number_re.sub(".{N}.", name)
         choices.add(group_name)
         layer = int(number_re.search(name).group()[1:-1])
+        if layer > max_layer:
+            max_layer = layer
         svdvals = torch.linalg.svdvals(state_dict[name])
         svdvals /= svdvals.sum()
     DATA["val"] = DATA["val"].astype("float")
     DATA["layer"] = DATA["layer"].astype("category")
     DATA["rank"] = DATA["rank"].astype("int32")
+    return choices, list(range(max_layer + 1))
 def weights_fn(model_id):
+    global STATE_DICT, PIPE
     try:
         pipe = pipeline(model=model_id)
+        PIPE = pipe
         STATE_DICT = pipe.model.state_dict()
     except Exception as e:
         print(e)
         STATE_DICT = {}
+    choices, layers = find_choices(STATE_DICT)
+    return [gr.Dropdown.update(choices=choices), gr.Dropdown.update(choices=layers)]
+def layer_fn(weights, layer):
+    k = 5
+    directions = 10
+    embeddings = PIPE.model.get_input_embeddings().weight
+    weight_name = weights.replace("{N}", str(layer))
+    weight = STATE_DICT[weight_name]
+    U, S, Vh = torch.linalg.svd(weight)
+    D = U if U.shape[0] == embeddings.shape[0] else Vh
+    # words = D[:directions].matmul(embeddings.T).topk(k=k)
+    # words_t = D[:, :directions].T.matmul(embeddings.T).topk(k=k)
+    # Cosine similarity
+    words = (
+        (D[:directions] / D[:directions].norm(dim=0))
+        .matmul(embeddings.T / embeddings.T.norm(dim=0))
+        .topk(k=k)
+    )
+    words_t = (
+        (D[:, :directions].T / D[:, :directions].norm(dim=1))
+        .matmul(embeddings.T / embeddings.T.norm(dim=0))
+        .topk(k=k)
+    )
+    data = [[PIPE.tokenizer.decode(w) for w in indices] for indices in words.indices]
+    data = np.array(data)
+    data = pd.DataFrame(data)
+    data_t = [
+        [PIPE.tokenizer.decode(w) for w in indices] for indices in words_t.indices
+    ]
+    data_t = np.array(data_t)
+    data_t = pd.DataFrame(data_t)
+    return (
+        gr.Dataframe.update(value=data, interactive=False),
+        gr.Dataframe.update(value=data_t, interactive=False),
+    )
 with gr.Blocks() as scatter_plot:
         with gr.Column():
             model_id = gr.Textbox(label="model_id")
             weights = gr.Dropdown(label="weights")
+            layer = gr.Dropdown(label="layer")
         with gr.Column():
             plot = gr.LinePlot(show_label=False).style(container=True)
+            directions = gr.Dataframe(interactive=False)
+            directions_t = gr.Dataframe(interactive=False)
+    model_id.change(weights_fn, inputs=model_id, outputs=[weights, layer])
     weights.change(fn=scatter_plot_fn, inputs=weights, outputs=plot)
+    layer.change(
+        fn=layer_fn, inputs=[weights, layer], outputs=[directions, directions_t]
+    )
 if __name__ == "__main__":
     scatter_plot.launch()