Spaces:

LeMaterial
/

materials_explorer

Runtime error

App Files Files Community

Ramlaoui commited on Dec 5, 2024

Commit

ddb4a97

1 Parent(s): 2dd66b7

Fix search bias + Layout

Browse files

Files changed (2) hide show

app.py +52 -53
create_index.py +6 -1

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pymatgen.core import Structure
 from pymatgen.ext.matproj import MPRester
 HF_TOKEN = os.environ.get("HF_TOKEN")
-top_k = 100
 # Load only the train split of the dataset
 dataset = load_dataset(
@@ -61,20 +61,8 @@ import periodictable
 map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
-# import re
-#
-# dataset_index = np.zeros((len(dataset), 118))
-# import tqdm
-#
-# for i, row in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
-#     for el in row["chemical_formula_descriptive"].split(" "):
-#         matches = re.findall(r"([a-zA-Z]+)([0-9]*)", el)
-#         el = matches[0][0]
-#         numb = int(matches[0][1]) if matches[0][1] else 1
-#         dataset_index[i][map_periodic_table[el]] = numb
 dataset_index = np.load("dataset_index.npy")
 # Initialize the Dash app
 app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)
@@ -83,16 +71,42 @@ server = app.server  # Expose the server for deployment
 # Define the app layout
 layout = html.Div(
     [
-        html.H1("Interactive Crystal Viewer"),
         html.Div(
             [
                 html.Div(
                     [
-                        html.H3("Search for materials by elements (eg. 'Ac,Cd,Ge')"),
                         dmp.MaterialsInput(
                             allowedInputTypes=["elements", "formula"],
                             hidePeriodicTable=False,
                             periodicTableMode="toggle",
                             showSubmitButton=True,
                             submitButtonText="Search",
                             type="elements",
@@ -106,11 +120,11 @@ layout = html.Div(
                     },
                 ),
             ],
-            style={"margin-bottom": "20px"},
         ),
         html.Div(
             [
-                html.Label("Select Material"),
                 # dcc.Dropdown(
                 #     id="material-dropdown",
                 #     options=[],  # Empty options initially
@@ -119,43 +133,32 @@ layout = html.Div(
                 dash.dash_table.DataTable(
                     id="table",
                     columns=[
-                        {"name": display_names[col], "id": col}
                         for col in display_columns
                     ],
                     data=[{}],
                     style_table={
                         "overflowX": "auto",
-                        "height": "400px",
                         "overflowY": "auto",
                     },
-                    style_cell={"textAlign": "left"},
-                ),
-            ],
-            style={"margin-bottom": "20px"},
-        ),
-        html.Button("Display Material", id="display-button", n_clicks=0),
-        html.Div(
-            [
-                html.Div(
-                    id="structure-container",
-                    style={
-                        "width": "48%",
-                        "display": "inline-block",
-                        "verticalAlign": "top",
-                    },
-                ),
-                html.Div(
-                    id="properties-container",
-                    style={
-                        "width": "48%",
-                        "display": "inline-block",
-                        "paddingLeft": "4%",
-                        "verticalAlign": "top",
-                    },
                 ),
             ],
-            style={"margin-top": "20px"},
         ),
     ],
     style={
         "margin-left": "10px",
@@ -180,10 +183,7 @@ def search_materials(query):
             numb = int(numb) if numb else 1
             query_vector[map_periodic_table[el]] = numb
-    similarity = np.dot(dataset_index, query_vector) / (
-        np.linalg.norm(dataset_index) * np.linalg.norm(query_vector)
-    )
-    print(similarity[::-1][:top_k])
     indices = np.argsort(similarity)[::-1][:top_k]
     options = [dataset[int(i)] for i in indices]
@@ -206,7 +206,6 @@ def on_submit_materials_input(n_clicks, query):
         return []
     entries = search_materials(query)
-    print(len(entries))
     return [{col: entry[col] for col in display_columns} for entry in entries]
@@ -217,11 +216,11 @@ def on_submit_materials_input(n_clicks, query):
         Output("structure-container", "children"),
         Output("properties-container", "children"),
     ],
-    Input("display-button", "n_clicks"),
     Input("table", "active_cell"),
 )
-def display_material(n_clicks, active_cell):
-    if n_clicks is None or not active_cell:
         return "", ""
     idx_active = active_cell["row"]

 from pymatgen.ext.matproj import MPRester
 HF_TOKEN = os.environ.get("HF_TOKEN")
+top_k = 500
 # Load only the train split of the dataset
 dataset = load_dataset(
 map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
 dataset_index = np.load("dataset_index.npy")
+dataset_index = dataset_index
 # Initialize the Dash app
 app = dash.Dash(__name__, assets_folder=SETTINGS.ASSETS_PATH)
 # Define the app layout
 layout = html.Div(
     [
+        html.H1(
+            html.B("Interactive Crystal Viewer"),
+            style={"textAlign": "center", "margin-top": "20px"},
+        ),
+        html.Div(
+            [
+                html.Div(
+                    id="structure-container",
+                    style={
+                        "width": "48%",
+                        "display": "inline-block",
+                        "verticalAlign": "top",
+                    },
+                ),
+                html.Div(
+                    id="properties-container",
+                    style={
+                        "width": "48%",
+                        "display": "inline-block",
+                        "paddingLeft": "4%",
+                        "verticalAlign": "top",
+                    },
+                ),
+            ],
+            style={"margin-top": "20px"},
+        ),
         html.Div(
             [
                 html.Div(
                     [
+                        html.H3("Search Materials (eg. 'Ac,Cd,Ge' or 'Ac2CdGe3')"),
                         dmp.MaterialsInput(
                             allowedInputTypes=["elements", "formula"],
                             hidePeriodicTable=False,
                             periodicTableMode="toggle",
+                            hideWildcardButton=True,
                             showSubmitButton=True,
                             submitButtonText="Search",
                             type="elements",
                     },
                 ),
             ],
+            style={"margin-top": "20px", "margin-bottom": "20px"},
         ),
         html.Div(
             [
+                html.Label("Select Material to Display"),
                 # dcc.Dropdown(
                 #     id="material-dropdown",
                 #     options=[],  # Empty options initially
                 dash.dash_table.DataTable(
                     id="table",
                     columns=[
+                        (
+                            {"name": display_names[col], "id": col}
+                            if col != "energy"
+                            else {
+                                "name": display_names[col],
+                                "id": col,
+                                "type": "numeric",
+                                "format": {"specifier": ".2f"},
+                            }
+                        )
                         for col in display_columns
                     ],
                     data=[{}],
                     style_table={
                         "overflowX": "auto",
+                        "height": "220px",
                         "overflowY": "auto",
                     },
+                    style_header={"fontWeight": "bold", "backgroundColor": "lightgrey"},
+                    style_cell={"textAlign": "center"},
+                    style_as_list_view=True,
                 ),
             ],
+            style={"margin-top": "30px"},
         ),
+        # html.Button("Display Material", id="display-button", n_clicks=0),
     ],
     style={
         "margin-left": "10px",
             numb = int(numb) if numb else 1
             query_vector[map_periodic_table[el]] = numb
+    similarity = np.dot(dataset_index, query_vector) / (np.linalg.norm(query_vector))
     indices = np.argsort(similarity)[::-1][:top_k]
     options = [dataset[int(i)] for i in indices]
         return []
     entries = search_materials(query)
     return [{col: entry[col] for col in display_columns} for entry in entries]
         Output("structure-container", "children"),
         Output("properties-container", "children"),
     ],
+    # Input("display-button", "n_clicks"),
     Input("table", "active_cell"),
 )
+def display_material(active_cell):
+    if not active_cell:
         return "", ""
     idx_active = active_cell["row"]

create_index.py CHANGED Viewed

@@ -3,6 +3,7 @@ import re
 import numpy as np
 import periodictable
 from datasets import load_dataset
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -40,7 +41,6 @@ map_periodic_table = {v.symbol: k for k, v in enumerate(periodictable.elements)}
 dataset_index = np.zeros((len(dataset), 118))
-import tqdm
 for i, row in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
     for el in row["chemical_formula_descriptive"].split(" "):
@@ -48,5 +48,10 @@ for i, row in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
         el = matches[0][0]
         numb = int(matches[0][1]) if matches[0][1] else 1
         dataset_index[i][map_periodic_table[el]] = numb
 np.save("dataset_index.npy", dataset_index)

 import numpy as np
 import periodictable
+import tqdm
 from datasets import load_dataset
 HF_TOKEN = os.environ.get("HF_TOKEN")
 dataset_index = np.zeros((len(dataset), 118))
 for i, row in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
     for el in row["chemical_formula_descriptive"].split(" "):
         el = matches[0][0]
         numb = int(matches[0][1]) if matches[0][1] else 1
         dataset_index[i][map_periodic_table[el]] = numb
+    dataset_index[i] = dataset_index[i] / np.sum(dataset_index[i])
+dataset_index = (
+    dataset_index / np.linalg.norm(dataset_index, axis=1)[:, None]
+)  # Normalize vectors
 np.save("dataset_index.npy", dataset_index)