Spaces:
Running
on
Zero
Running
on
Zero
Add Steric Clash (#1)
Browse files- add steric clash (d7c9da88a98782a4a93fe74deddd36a4766d81bf)
app.py
CHANGED
|
@@ -16,6 +16,8 @@ from dotenv import load_dotenv
|
|
| 16 |
import torch
|
| 17 |
import json
|
| 18 |
import time
|
|
|
|
|
|
|
| 19 |
|
| 20 |
load_dotenv()
|
| 21 |
|
|
@@ -39,6 +41,49 @@ amino3to1 = {
|
|
| 39 |
}
|
| 40 |
|
| 41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
def read_pdb_io(pdb_file):
|
| 43 |
if isinstance(pdb_file, io.StringIO):
|
| 44 |
pdb_content = pdb_file.getvalue()
|
|
@@ -242,6 +287,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
|
|
| 242 |
|
| 243 |
progress(0, desc="Starting prediction")
|
| 244 |
view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
|
|
|
|
| 245 |
if view_data is None:
|
| 246 |
return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
|
| 247 |
|
|
@@ -260,7 +306,7 @@ def run_prediction(pdb_file, num_runs, noise_level, num_frames, progress=gr.Prog
|
|
| 260 |
</div>
|
| 261 |
"""
|
| 262 |
progress(1.0, desc="Completed")
|
| 263 |
-
return html_content, crmsd_text
|
| 264 |
except Exception as e:
|
| 265 |
error_message = str(e)
|
| 266 |
stack_trace = traceback.format_exc()
|
|
@@ -304,6 +350,7 @@ def create_demo():
|
|
| 304 |
5. Click the "Run Prediction" button to start the process.
|
| 305 |
6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
|
| 306 |
7. The alignment result will display the best cRMSD (lower is better).
|
|
|
|
| 307 |
|
| 308 |
## About
|
| 309 |
This demo uses the ESM3 model to predict protein structures from PDB files.
|
|
|
|
| 16 |
import torch
|
| 17 |
import json
|
| 18 |
import time
|
| 19 |
+
from Bio.PDB import PDBParser
|
| 20 |
+
import itertools
|
| 21 |
|
| 22 |
load_dotenv()
|
| 23 |
|
|
|
|
| 41 |
}
|
| 42 |
|
| 43 |
|
| 44 |
+
# Covalent radii dictionary
|
| 45 |
+
COVALENT_RADIUS = {
|
| 46 |
+
"H": 0.31, "HE": 0.28, "LI": 1.28, "BE": 0.96, "B": 0.84, "C": 0.76, "N": 0.71, "O": 0.66, "F": 0.57, "NE": 0.58,
|
| 47 |
+
"NA": 1.66, "MG": 1.41, "AL": 1.21, "SI": 1.11, "P": 1.07, "S": 1.05, "CL": 1.02, "AR": 1.06, "K": 2.03,
|
| 48 |
+
"CA": 1.76, "SC": 1.7, "TI": 1.6, "V": 1.53, "CR": 1.39, "MN": 1.5, "FE": 1.42, "CO": 1.38, "NI": 1.24,
|
| 49 |
+
"CU": 1.32, "ZN": 1.22, "GA": 1.22, "GE": 1.2, "AS": 1.19, "SE": 1.2, "BR": 1.2, "KR": 1.16, "RB": 2.2,
|
| 50 |
+
"SR": 1.95, "Y": 1.9, "ZR": 1.75, "NB": 1.64, "MO": 1.54, "TC": 1.47, "RU": 1.46, "RH": 1.42, "PD": 1.39,
|
| 51 |
+
"AG": 1.45, "CD": 1.44, "IN": 1.42, "SN": 1.39, "SB": 1.39, "TE": 1.38, "I": 1.39, "XE": 1.4, "CS": 2.44,
|
| 52 |
+
"BA": 2.15, "LA": 2.07, "CE": 2.04, "PR": 2.03, "ND": 2.01, "PM": 1.99, "SM": 1.98, "EU": 1.98, "GD": 1.96,
|
| 53 |
+
"TB": 1.94, "DY": 1.92, "HO": 1.92, "ER": 1.89, "TM": 1.9, "YB": 1.87, "LU": 1.87, "HF": 1.75, "TA": 1.7,
|
| 54 |
+
"W": 1.62, "RE": 1.51, "OS": 1.44, "IR": 1.41, "PT": 1.36, "AU": 1.36, "HG": 1.32, "TL": 1.45, "PB": 1.46,
|
| 55 |
+
"BI": 1.48, "PO": 1.4, "AT": 1.5, "RN": 1.5, "FR": 2.6, "RA": 2.21, "AC": 2.15, "TH": 2.06, "PA": 2.0,
|
| 56 |
+
"U": 1.96, "NP": 1.9, "PU": 1.87, "AM": 1.8, "CM": 1.69, "BK": 2.0, "CF": 2.0, "ES": 2.0, "FM": 2.0,
|
| 57 |
+
"MD": 2.0, "NO": 2.0, "LR": 2.0, "RF": 2.0, "DB": 2.0, "SG": 2.0, "BH": 2.0, "HS": 2.0, "MT": 2.0,
|
| 58 |
+
"DS": 2.0, "RG": 2.0, "CN": 2.0, "UUT": 2.0, "UUQ": 2.0, "UUP": 2.0, "UUH": 2.0, "UUS": 2.0, "UUO": 2.0
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
# Function to get the covalent radius of an atom
|
| 62 |
+
def get_covalent_radius(atom):
|
| 63 |
+
element = atom.element.upper()
|
| 64 |
+
return COVALENT_RADIUS.get(element, 2.0) # Default to 2.0 Å if element is not in the dictionary
|
| 65 |
+
|
| 66 |
+
def calculate_clashes_for_pdb(pdb_file):
|
| 67 |
+
parser = PDBParser(QUIET=True)
|
| 68 |
+
structure = parser.get_structure("protein", pdb_file)
|
| 69 |
+
atoms = list(structure.get_atoms())
|
| 70 |
+
steric_clash_count = 0
|
| 71 |
+
|
| 72 |
+
num_atoms = len(atoms)
|
| 73 |
+
|
| 74 |
+
# Check atom pairs for steric clashes
|
| 75 |
+
for atom1, atom2 in itertools.combinations(atoms, 2):
|
| 76 |
+
covalent_radius_sum = get_covalent_radius(atom1) + get_covalent_radius(atom2)
|
| 77 |
+
distance = atom1 - atom2 # Distance between atom1 and atom2
|
| 78 |
+
|
| 79 |
+
# Check if the distance is less than the sum of covalent radii
|
| 80 |
+
if distance + 0.5 < covalent_radius_sum:
|
| 81 |
+
steric_clash_count += 1
|
| 82 |
+
|
| 83 |
+
# Normalize steric clashes per number of atoms
|
| 84 |
+
norm_ster_clash_count = steric_clash_count / num_atoms
|
| 85 |
+
return f"Total steric clashes in {pdb_file}: {steric_clash_count}", f"Normalized steric clashes per atom in {pdb_file}: {norm_ster_clash_count}"
|
| 86 |
+
|
| 87 |
def read_pdb_io(pdb_file):
|
| 88 |
if isinstance(pdb_file, io.StringIO):
|
| 89 |
pdb_content = pdb_file.getvalue()
|
|
|
|
| 287 |
|
| 288 |
progress(0, desc="Starting prediction")
|
| 289 |
view_data, crmsd_text = prediction_visualization(pdb_file, num_runs, noise_level, num_frames, progress)
|
| 290 |
+
steric_clash_text, norm_steric_clas_text = calculate_clashes_for_pdb(pdb_file)
|
| 291 |
if view_data is None:
|
| 292 |
return "No successful predictions were made. Try adjusting the parameters or check the PDB file.", crmsd_text
|
| 293 |
|
|
|
|
| 306 |
</div>
|
| 307 |
"""
|
| 308 |
progress(1.0, desc="Completed")
|
| 309 |
+
return html_content, crmsd_text, steric_clash_text, norm_steric_clas_text
|
| 310 |
except Exception as e:
|
| 311 |
error_message = str(e)
|
| 312 |
stack_trace = traceback.format_exc()
|
|
|
|
| 350 |
5. Click the "Run Prediction" button to start the process.
|
| 351 |
6. The 3D visualization will show the original structure (grey) and the best predicted structure (green).
|
| 352 |
7. The alignment result will display the best cRMSD (lower is better).
|
| 353 |
+
8. Total and Normalized (per atom) steric clashes (lower is better)
|
| 354 |
|
| 355 |
## About
|
| 356 |
This demo uses the ESM3 model to predict protein structures from PDB files.
|