Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
8afe788
adding support for multiple chains but need to make some more changes…
ftclark3 Apr 15, 2025
b233cd8
stopped resetting resnums for the _cleaned.pdb because we're now tell…
ftclark3 Apr 16, 2025
d02040c
added start_mask to the mask computation for electrostatics and the m…
ftclark3 Apr 16, 2025
aedecc8
fixed multichain sequence separation mask computation
ftclark3 Apr 18, 2025
680ad01
added local density test for multichain structures. need to add frust…
ftclark3 Apr 19, 2025
97805af
think everything is working now, but should still add some tests. Als…
ftclark3 Apr 22, 2025
c60e0f4
fixed variable name
ftclark3 Apr 23, 2025
5fef165
fixed chain parsing--should actually be the same for both prody and b…
ftclark3 Apr 23, 2025
5826d35
allow pdbfixer to renumber residues but keep chain ids
ftclark3 Apr 24, 2025
4dc945f
added comment on testing
ftclark3 Apr 24, 2025
63671f3
improved error handling
ftclark3 Apr 27, 2025
d356890
added tcl script write constraint to nearest neighbors for highly fru…
ftclark3 Apr 29, 2025
ad3ba59
quick and easy fix to allow frustration indices of nearest neighbor p…
ftclark3 Apr 30, 2025
804516d
fixed previous commit
ftclark3 Apr 30, 2025
be72f41
added capability to pass alternative minimum contact distance to writ…
ftclark3 May 1, 2025
270e172
added min_contact_distance attribute to AWSEM class
ftclark3 May 1, 2025
f7eff94
pointless commit
ftclark3 May 1, 2025
03221c7
Merge branch 'amyloid_atlas' of https://github.com/ftclark3/Frustrato…
ftclark3 May 1, 2025
b60a632
fixed simple error
ftclark3 May 1, 2025
2f52c27
Merge remote-tracking branch 'ftclark3/amyloid_atlas' into amyloid_atlas
ftclark3 May 1, 2025
584bc8f
added more helpful error message for something
ftclark3 May 1, 2025
3d2be27
modified AWSEM.compute_configurational_energies to save indices of pa…
ftclark3 May 9, 2025
bdf1c21
also now record 'orphaned' contacts whose pair energy is computed but…
ftclark3 May 9, 2025
ca078d7
logging state of the code that worked correctly (as far as I can tell…
ftclark3 May 16, 2025
8bb0a5c
improved and now it's passing all python frustratometer tests
ftclark3 May 17, 2025
d8c06d7
removed unnecessary prody selection dependency from draw_write_loop; …
ftclark3 May 17, 2025
a9d36df
added coordinates of midpoints between contacts represented by distan…
ftclark3 May 18, 2025
1ef9aa6
removed unnecessary parameters from write_tcl_script_v2
ftclark3 May 18, 2025
97ed1b8
added option to use separate k_burial
ftclark3 May 18, 2025
b87958f
improved handling of user input solid_mask and dashed_mask in write_t…
ftclark3 May 18, 2025
85bdc38
added option to approximate density affects of fibril above and below…
ftclark3 May 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
565 changes: 480 additions & 85 deletions frustratometer/classes/AWSEM.py

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions frustratometer/classes/Frustratometer.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,10 @@ def scores(self):
"""
return frustration.compute_scores(self.potts_model)

def frustration(self, sequence:str = None, kind:str = 'singleresidue', mask:np.array = None, aa_freq:np.array = None, correction:int = 0) -> np.array:
def frustration(self, sequence:str = None, kind:str = 'singleresidue',
mask:np.array = None, aa_freq:np.array = None,
correction:int = 0, n_decoys=4000,
) -> np.array:
"""
Calculates frustration index values.

Expand All @@ -225,6 +228,8 @@ def frustration(self, sequence:str = None, kind:str = 'singleresidue', mask:np.a
A 2D Boolean array that determines which residue pairs should be considered in the energy computation. The mask should have dimensions (L, L), where L is the length of the sequence.
aa_freq: np.array
Array of frequencies of all 21 possible amino acids within sequence
n_decoys: int
Number of decoys

Returns
-------
Expand All @@ -243,8 +248,10 @@ def frustration(self, sequence:str = None, kind:str = 'singleresidue', mask:np.a
return frustration_values
elif kind in ['mutational', 'configurational', 'contact']:
if kind == 'configurational' and 'configurational_frustration' in dir(self):
#TODO: Correct this function for different aa_freq than WT
return self.configurational_frustration(None, correction)
#TODO: Correct this function for different aa_freq than WT --- think this has been done already?
frustration_values = self.configurational_frustration(aa_freq=aa_freq, correction=correction, n_decoys=n_decoys,)
assert np.all(frustration_values==frustration_values.T), f"configurational frustration matrix was not symmetric! Max difference: {np.max(frustration_values-frustration_values.T)}"
return frustration_values
if aa_freq is None:
aa_freq = self.contact_freq
frustration_values=frustration.compute_pair_frustration(decoy_fluctuation, aa_freq, correction)
Expand Down Expand Up @@ -321,7 +328,8 @@ def vmd(self, sequence: str = None, single:Union[str,np.array] = 'singleresidue'
tcl_script = frustration.write_tcl_script(self.pdb_file, self.chain, self.mask, self.distance_matrix, self.distance_cutoff,
-self.frustration(kind=single, sequence=sequence, aa_freq=aa_freq),
-self.frustration(kind=pair, sequence=sequence, aa_freq=aa_freq),
max_connections=max_connections, movie_name=movie_name, still_image_name=still_image_name)
max_connections=max_connections, movie_name=movie_name, still_image_name=still_image_name,
min_contact_distance=self.min_contact_distance)
frustration.call_vmd(self.pdb_file, tcl_script)

def view_pair_frustration(self, sequence:str = None, pair:str = 'mutational', aa_freq:np.array = None):
Expand All @@ -338,6 +346,8 @@ def view_pair_frustration(self, sequence:str = None, pair:str = 'mutational', aa
aa_freq: np.array
Array of frequencies of all 21 possible amino acids within sequence
"""
if type(self.chain) == list:
raise NotImplementedError("This function not supported for multichain systems")
import py3Dmol

if sequence is None:
Expand Down
43 changes: 28 additions & 15 deletions frustratometer/classes/Structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
class Structure:

def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_selection: str = None, aligned_sequence: str = None, filtered_aligned_sequence: str = None,
distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True)->object:
distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True, return_distance_midpoints:bool = False)->object:

"""
Generates structure object. Both PDB and CIF format files are accepted as input.
Expand Down Expand Up @@ -55,6 +55,11 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s
If True, provided pdb file will be repaired with missing residues inserted and heteroatoms removed.
Note that a pdb file will be produced, regardless of input file format.

return_distance_midpoints: bool
Whether to return a matrix of the same shape as distance_matrix representing the same contacts as distance_matrix
that indicates the absolute coordinates of the midpoint between the pair of atoms. This helps us compute the pair distribution
functions of the different classes of contacts. So this matrix isn't really a matrix because each "element" has 3 channels: x, y, and z

Returns
-------
Structure object
Expand All @@ -77,7 +82,7 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s

self.pdbID=pdb_file.stem
self.pdb_file=pdb_file
self.chain=chain
self.chain=chain # will be None if no chain supplied
self.distance_matrix_method=distance_matrix_method
self.filtered_aligned_sequence=filtered_aligned_sequence
self.aligned_sequence=aligned_sequence
Expand All @@ -88,18 +93,18 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s
self.init_index_shift=0

if repair_pdb:
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory)
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory) # for this function, chain can be str or list (or None)
self.pdb_file=str(pdb_directory/f"{self.pdbID}_cleaned.pdb")

if ".pdb" in str(pdb_file) or repair_pdb==True:
self.structure = prody.parsePDB(str(self.pdb_file), chain=self.chain).select(f"protein")
self.structure = prody.parsePDB(str(self.pdb_file), chain=self.chain).select(f"protein") # for this function, chain should be a string containing the chain ids like "AB" or "A B"
else:
self.structure=prody.parseMMCIF(str(self.pdb_file),chain=self.chain).select(f"protein")
self.structure=prody.parseMMCIF(str(self.pdb_file),chain=self.chain).select(f"protein") # for this function, chain should be a string containing the chain ids like "AB" or "A B"
else:
assert len(self.seq_selection.replace("to"," to ").replace(":"," : ").split())>=4, "Please correctly input your residue selection"

if self.chain==None:
raise ValueError("Please provide a chain name")
raise ValueError("self.chain==None. Please provide chain name(s)")

self.init_index=int(self.seq_selection.replace("to"," to ").replace(":"," : ").split()[1].replace("`",""))
self.fin_index=int(self.seq_selection.replace("to"," to ").replace(":"," : ").split()[3].replace("`",""))
Expand All @@ -116,7 +121,7 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s

with open(pdb_file,"r") as f:
for line in f:
if line.split()[0]=="ATOM" and line.split()[4+shift]==self.chain:
if line.split()[0]=="ATOM" and (line.split()[4+shift] in self.chain):
try:
res_index=''.join(i for i in line.split()[5+index_shift] if i.isdigit())
next_res_index=''.join(i for i in next(f).split()[5+index_shift] if i.isdigit())
Expand All @@ -133,27 +138,35 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s
self.init_index_shift=self.init_index-self.pdb_init_index
self.fin_index_shift=self.fin_index-self.pdb_init_index+1
if repair_pdb:
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory)
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory) # for this function, chain can be str or list (or None)
self.pdb_file=f"{pdb_directory}/{self.pdbID}_cleaned.pdb"
self.select_gap_indices=[i for i in gap_indices if self.init_index<=i<=self.fin_index]
self.fin_index_shift-=len(self.select_gap_indices)
self.seq_selection=f"resnum `{self.init_index_shift+1}to{self.fin_index_shift}`"
#self.seq_selection=f"resnum `{self.init_index_shift+1}to{self.fin_index_shift}`" # WE'RE KEEPING IDs NOW SO DON'T WANT TO DO THIS!!!!
elif "resindex" in self.seq_selection:
self.init_index_shift=self.init_index
self.fin_index_shift=self.fin_index+1
if repair_pdb:
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory)
fixer=pdb.repair_pdb(pdb_file, chain, pdb_directory) # for this function, chain can be str or list (or None)
self.pdb_file=f"{pdb_directory}/{self.pdbID}_cleaned.pdb"
self.chain="A"
# self.chain="A" I don't know why we would want to change the chain ID here

if ".pdb" in str(pdb_file) or repair_pdb==True:
self.structure = prody.parsePDB(str(self.pdb_file), chain=self.chain).select(f"protein and {self.seq_selection}")
else:
self.structure=prody.parseMMCIF(str(self.pdb_file),chain=self.chain).select(f"protein and {self.seq_selection}")

self.sequence=pdb.get_sequence(self.pdb_file,self.chain)
self.distance_matrix=pdb.get_distance_matrix(pdb_file=self.pdb_file,chain=self.chain,
method=self.distance_matrix_method)
self.sequence, self.start_mask = pdb.get_sequence(self.pdb_file,self.chain,return_start_mask=True) # this function can now accept chain as list or string
if return_distance_midpoints:
self.distance_matrix, self.midpoint_matrix = pdb.get_distance_matrix(pdb_file=self.pdb_file,chain=self.chain, # for this function, chain should be a string containing the chain ids
method=self.distance_matrix_method, # separated by a space, like "A B"
return_distance_midpoints=True)
else:
self.distance_matrix=pdb.get_distance_matrix(pdb_file=self.pdb_file,chain=self.chain, # for this function, chain should be a string containing the chain ids
method=self.distance_matrix_method, # separated by a space, like "A B"
return_distance_midpoints=False)
self.midpoint_matrix = None

self.full_pdb_distance_matrix=self.distance_matrix

self.z_coordinates=self.structure.select('((name CB) or (resname GLY and name CA))').getCoords()
Expand All @@ -180,7 +193,7 @@ def __init__(self, pdb_file: Union[Path,str], chain: Union[str,None]=None, seq_s
else:
self.full_to_aligned_index_dict=dict(zip(range(self.init_index_shift,self.fin_index_shift+1), range(len(self.sequence))))
self.mapped_distance_matrix=self.distance_matrix

@classmethod
def full_pdb(cls,pdb_file: Union[Path,str], chain: Union[str,None]=None, aligned_sequence: str = None, filtered_aligned_sequence: str = None,
distance_matrix_method:str = 'CB', pdb_directory: Path = Path.cwd(), repair_pdb:bool = True):
Expand Down
Loading
Loading