as-cle-bert commited on
Commit
ba0514b
Β·
verified Β·
1 Parent(s): eae90c3

Delete proteins_viz.py

Browse files
Files changed (1) hide show
  1. proteins_viz.py +0 -136
proteins_viz.py DELETED
@@ -1,136 +0,0 @@
1
- import pandas as pd
2
- from biopandas.pdb import PandasPdb
3
- from prody import parsePDBHeader
4
-
5
-
6
-
7
-
8
- def read_pdb_to_dataframe(
9
- pdb_path,
10
- model_index: int = 1,
11
- parse_header: bool = True,
12
- ) -> pd.DataFrame:
13
- """
14
- Read a PDB file, and return a Pandas DataFrame containing the atomic coordinates and metadata.
15
-
16
- Args:
17
- pdb_path (str, optional): Path to a local PDB file to read. Defaults to None.
18
- model_index (int, optional): Index of the model to extract from the PDB file, in case
19
- it contains multiple models. Defaults to 1.
20
- parse_header (bool, optional): Whether to parse the PDB header and extract metadata.
21
- Defaults to True.
22
-
23
- Returns:
24
- pd.DataFrame: A DataFrame containing the atomic coordinates and metadata, with one row
25
- per atom
26
- """
27
- atomic_df = PandasPdb().read_pdb(pdb_path)
28
- if parse_header:
29
- header = parsePDBHeader(pdb_path)
30
- else:
31
- header = None
32
- atomic_df = atomic_df.get_model(model_index)
33
- if len(atomic_df.df["ATOM"]) == 0:
34
- raise ValueError(f"No model found for index: {model_index}")
35
-
36
- return pd.concat([atomic_df.df["ATOM"], atomic_df.df["HETATM"]]), header
37
-
38
- from graphein.protein.graphs import label_node_id
39
-
40
- def process_dataframe(df: pd.DataFrame, granularity='CA') -> pd.DataFrame:
41
- """
42
- Process a DataFrame of protein structure data to reduce ambiguity and simplify analysis.
43
-
44
- This function performs the following steps:
45
- 1. Handles alternate locations for an atom, defaulting to keep the first one if multiple exist.
46
- 2. Assigns a unique node_id to each residue in the DataFrame, using a helper function label_node_id.
47
- 3. Filters the DataFrame based on specified granularity (defaults to 'CA' for alpha carbon).
48
-
49
- Parameters
50
- ----------
51
- df : pd.DataFrame
52
- The DataFrame containing protein structure data to process. It is expected to contain columns 'alt_loc' and 'atom_name'.
53
-
54
- granularity : str, optional
55
- The level of detail or perspective at which the DataFrame should be analyzed. Defaults to 'CA' (alpha carbon).
56
- """
57
- # handle the case of alternative locations,
58
- # if so default to the 1st one = A
59
- if 'alt_loc' in df.columns:
60
- df['alt_loc'] = df['alt_loc'].replace('', 'A')
61
- df = df.loc[(df['alt_loc']=='A')]
62
- df = label_node_id(df, granularity)
63
- df = df.loc[(df['atom_name']==granularity)]
64
- return df
65
-
66
-
67
- from graphein.protein.graphs import initialise_graph_with_metadata
68
- from graphein.protein.graphs import add_nodes_to_graph
69
- from graphein.protein.visualisation import plotly_protein_structure_graph
70
- from PIL import Image
71
- import networkx as nx
72
-
73
- def take_care(pdb_path):
74
-
75
-
76
- df, header = read_pdb_to_dataframe(pdb_path)
77
- process_df = process_dataframe(df)
78
-
79
- g = initialise_graph_with_metadata(protein_df=process_df, # from above cell
80
- raw_pdb_df=df, # Store this for traceability
81
- pdb_code = '3nir', #and again
82
- granularity = 'CA' # Store this so we know what kind of graph we have
83
- )
84
- g = add_nodes_to_graph(g)
85
-
86
-
87
- def add_backbone_edges(G: nx.Graph) -> nx.Graph:
88
- # Iterate over every chain
89
- for chain_id in G.graph["chain_ids"]:
90
- # Find chain residues
91
- chain_residues = [
92
- (n, v) for n, v in G.nodes(data=True) if v["chain_id"] == chain_id
93
- ]
94
- # Iterate over every residue in chain
95
- for i, residue in enumerate(chain_residues):
96
- try:
97
- # Checks not at chain terminus
98
- if i == len(chain_residues) - 1:
99
- continue
100
- # Asserts residues are on the same chain
101
- cond_1 = ( residue[1]["chain_id"] == chain_residues[i + 1][1]["chain_id"])
102
- # Asserts residue numbers are adjacent
103
- cond_2 = (abs(residue[1]["residue_number"] - chain_residues[i + 1][1]["residue_number"])== 1)
104
-
105
- # If this checks out, we add a peptide bond
106
- if (cond_1) and (cond_2):
107
- # Adds "peptide bond" between current residue and the next
108
- if G.has_edge(i, i + 1):
109
- G.edges[i, i + 1]["kind"].add('backbone_bond')
110
- else:
111
- G.add_edge(residue[0],chain_residues[i + 1][0],kind={'backbone_bond'},)
112
- except IndexError as e:
113
- print(e)
114
- return G
115
-
116
- g = add_backbone_edges(g)
117
-
118
-
119
-
120
- p = plotly_protein_structure_graph(
121
- g,
122
- colour_edges_by="kind",
123
- colour_nodes_by="seq_position",
124
- label_node_ids=False,
125
- plot_title="Backbone Protein Graph",
126
- node_size_multiplier=1,
127
- )
128
- image_file = "protein_graph.png"
129
- p.write_image(image_file, format='png')
130
-
131
-
132
- # Load the PNG image into a PIL image
133
- image = Image.open(image_file)
134
-
135
-
136
- return image