Spaces:

MISATO-dataset
/

esm3-conformational-sampling

Running on Zero

App Files Files Community

asarigun commited on Oct 19, 2024

Commit

afd257c

verified ·

1 Parent(s): 86c05eb

Add Steric Clash (#1)

Browse files

- add steric clash (d7c9da88a98782a4a93fe74deddd36a4766d81bf)

Files changed (1) hide show

app.py +48 -1

app.py CHANGED Viewed

@@ -16,6 +16,8 @@ from dotenv import load_dotenv
 import torch
 import json
 import time
 load_dotenv()
@@ -39,6 +41,49 @@ amino3to1 = {
 }
 def read_pdb_io(pdb_file):
     if isinstance(pdb_file, io.StringIO):
         pdb_content = pdb_file.getvalue()
@@ -242,6 +287,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
         progress(0, desc="Starting prediction")
         view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
         if view_data is None:
             return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
@@ -260,7 +306,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
         </div>
         """
         progress(1.0, desc="Completed")
-        return html_content, crmsd_text
     except Exception as e:
         error_message = str(e)
         stack_trace = traceback.format_exc()
@@ -304,6 +350,7 @@ def create_demo():
         5. Click the "Run Prediction" button to start the process.
         6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
         7. The alignment result will display the best cRMSD (lower is better).
         ## About
         This demo uses the ESM3 model to predict protein structures from PDB files.

 import torch
 import json
 import time
+from Bio.PDB import PDBParser
+import itertools
 load_dotenv()
 }
+# Covalent radii dictionary
+COVALENT_RADIUS = {
+    "H": 0.31, "HE": 0.28, "LI": 1.28, "BE": 0.96, "B": 0.84, "C": 0.76, "N": 0.71, "O": 0.66, "F": 0.57, "NE": 0.58,
+    "NA": 1.66, "MG": 1.41, "AL": 1.21, "SI": 1.11, "P": 1.07, "S": 1.05, "CL": 1.02, "AR": 1.06, "K": 2.03,
+    "CA": 1.76, "SC": 1.7, "TI": 1.6, "V": 1.53, "CR": 1.39, "MN": 1.5, "FE": 1.42, "CO": 1.38, "NI": 1.24,
+    "CU": 1.32, "ZN": 1.22, "GA": 1.22, "GE": 1.2, "AS": 1.19, "SE": 1.2, "BR": 1.2, "KR": 1.16, "RB": 2.2,
+    "SR": 1.95, "Y": 1.9, "ZR": 1.75, "NB": 1.64, "MO": 1.54, "TC": 1.47, "RU": 1.46, "RH": 1.42, "PD": 1.39,
+    "AG": 1.45, "CD": 1.44, "IN": 1.42, "SN": 1.39, "SB": 1.39, "TE": 1.38, "I": 1.39, "XE": 1.4, "CS": 2.44,
+    "BA": 2.15, "LA": 2.07, "CE": 2.04, "PR": 2.03, "ND": 2.01, "PM": 1.99, "SM": 1.98, "EU": 1.98, "GD": 1.96,
+    "TB": 1.94, "DY": 1.92, "HO": 1.92, "ER": 1.89, "TM": 1.9, "YB": 1.87, "LU": 1.87, "HF": 1.75, "TA": 1.7,
+    "W": 1.62, "RE": 1.51, "OS": 1.44, "IR": 1.41, "PT": 1.36, "AU": 1.36, "HG": 1.32, "TL": 1.45, "PB": 1.46,
+    "BI": 1.48, "PO": 1.4, "AT": 1.5, "RN": 1.5, "FR": 2.6, "RA": 2.21, "AC": 2.15, "TH": 2.06, "PA": 2.0,
+    "U": 1.96, "NP": 1.9, "PU": 1.87, "AM": 1.8, "CM": 1.69, "BK": 2.0, "CF": 2.0, "ES": 2.0, "FM": 2.0,
+    "MD": 2.0, "NO": 2.0, "LR": 2.0, "RF": 2.0, "DB": 2.0, "SG": 2.0, "BH": 2.0, "HS": 2.0, "MT": 2.0,
+    "DS": 2.0, "RG": 2.0, "CN": 2.0, "UUT": 2.0, "UUQ": 2.0, "UUP": 2.0, "UUH": 2.0, "UUS": 2.0, "UUO": 2.0
+}
+# Function to get the covalent radius of an atom
+def get_covalent_radius(atom):
+    element = atom.element.upper()
+    return COVALENT_RADIUS.get(element, 2.0)  # Default to 2.0 Å if element is not in the dictionary
+def calculate_clashes_for_pdb(pdb_file):
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure("protein", pdb_file)
+    atoms = list(structure.get_atoms())
+    steric_clash_count = 0
+    num_atoms = len(atoms)
+    # Check atom pairs for steric clashes
+    for atom1, atom2 in itertools.combinations(atoms, 2):
+        covalent_radius_sum = get_covalent_radius(atom1) + get_covalent_radius(atom2)
+        distance = atom1 - atom2  # Distance between atom1 and atom2
+        # Check if the distance is less than the sum of covalent radii
+        if distance + 0.5 < covalent_radius_sum:
+            steric_clash_count += 1
+    # Normalize steric clashes per number of atoms
+    norm_ster_clash_count = steric_clash_count / num_atoms
+    return f"Total steric clashes in {pdb_file}: {steric_clash_count}", f"Normalized steric clashes per atom in {pdb_file}: {norm_ster_clash_count}"
 def read_pdb_io(pdb_file):
     if isinstance(pdb_file, io.StringIO):
         pdb_content = pdb_file.getvalue()
         progress(0, desc="Starting prediction")
         view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
+        steric_clash_text, norm_steric_clas_text = calculate_clashes_for_pdb(pdb_file)
         if view_data is None:
             return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
         </div>
         """
         progress(1.0, desc="Completed")
+        return html_content, crmsd_text, steric_clash_text, norm_steric_clas_text
     except Exception as e:
         error_message = str(e)
         stack_trace = traceback.format_exc()
         5. Click the "Run Prediction" button to start the process.
         6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
         7. The alignment result will display the best cRMSD (lower is better).
+        8. Total and Normalized (per atom) steric clashes (lower is better)
         ## About
         This demo uses the ESM3 model to predict protein structures from PDB files.