Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ ignore = E203, E266, E501, E711, E741, W503, B902, B009, B010
max-line-length = 80
max-complexity = 18
select = B,C,E,F,W,T4,B9

per-file-ignores =
tmol/io/pose_stack_from_atomworks.py:E201,E231,E241
17 changes: 5 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,26 +1,19 @@
repos:
- repo: local
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v21.1.8
hooks:
- id: clang-format
name: clang-format
args: ['-i']
description: 'Format files with ClangFormat.'
entry: clang-format
language: system
files: \.(c|cc|cpp|h|hh|cu|cuh)$
exclude: >
(?x)^(
tmol/extern/.* |
dev/.*
)$
- repo: https://github.com/psf/black
rev: 24.1.1
hooks:
- id: black
name: black
description: 'Black: The uncompromising Python code formatter'
entry: black
language: python
language_version: python3.11
require_serial: true
types: [python]
exclude: >
(?x)^(
tmol/extern/.* |
Expand Down
1 change: 0 additions & 1 deletion tmol/chemical/restypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from tmol.chemical.ideal_coords import build_coords_from_icoors
from tmol.chemical.all_bonds import bonds_and_bond_ranges


AtomIndex = NewType("AtomIndex", int)
ConnectionIndex = NewType("ConnectionIndex", int)
BondCount = NewType("BondCount", int)
Expand Down
12 changes: 5 additions & 7 deletions tmol/extern/openfold/residue_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,13 +438,11 @@


@functools.lru_cache(maxsize=None)
def load_stereo_chemical_props() -> (
Tuple[
Mapping[str, List[Bond]],
Mapping[str, List[Bond]],
Mapping[str, List[BondAngle]],
]
):
def load_stereo_chemical_props() -> Tuple[
Mapping[str, List[Bond]],
Mapping[str, List[Bond]],
Mapping[str, List[BondAngle]],
]:
"""Load stereo_chemical_props.txt into a nice structure.

Load literature values for bond lengths and bond angles and translate
Expand Down
8 changes: 6 additions & 2 deletions tmol/extern/pysmiles/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,9 @@

from .read_smiles import read_smiles
from .write_smiles import write_smiles
from .smiles_helper import (fill_valence, add_explicit_hydrogens,
remove_explicit_hydrogens, correct_aromatic_rings)
from .smiles_helper import (
fill_valence,
add_explicit_hydrogens,
remove_explicit_hydrogens,
correct_aromatic_rings,
)
130 changes: 78 additions & 52 deletions tmol/extern/pysmiles/read_smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,22 @@

import networkx as nx

from .smiles_helper import (add_explicit_hydrogens, remove_explicit_hydrogens,
parse_atom, fill_valence, mark_aromatic_edges,
mark_aromatic_atoms)
from .smiles_helper import (
add_explicit_hydrogens,
remove_explicit_hydrogens,
parse_atom,
fill_valence,
mark_aromatic_edges,
mark_aromatic_atoms,
)

LOGGER = logging.getLogger(__name__)


@enum.unique
class TokenType(enum.Enum):
"""Possible SMILES token types"""

ATOM = 1
BOND_TYPE = 2
BRANCH_START = 3
Expand All @@ -53,47 +60,52 @@ def _tokenize(smiles):
tuple(TokenType, str)
A tuple describing the type of token and the associated data
"""
organic_subset = 'B C N O P S F Cl Br I * b c n o s p'.split()
organic_subset = "B C N O P S F Cl Br I * b c n o s p".split()
smiles = iter(smiles)
token = ''
token = ""
peek = None
while True:
char = peek if peek else next(smiles, '')
char = peek if peek else next(smiles, "")
peek = None
if not char:
break
if char == '[':
if char == "[":
token = char
for char in smiles:
token += char
if char == ']':
if char == "]":
break
yield TokenType.ATOM, token
elif char in organic_subset:
peek = next(smiles, '')
peek = next(smiles, "")
if char + peek in organic_subset:
yield TokenType.ATOM, char + peek
peek = None
else:
yield TokenType.ATOM, char
elif char in '-=#$:.':
elif char in "-=#$:.":
yield TokenType.BOND_TYPE, char
elif char == '(':
yield TokenType.BRANCH_START, '('
elif char == ')':
yield TokenType.BRANCH_END, ')'
elif char == '%':
elif char == "(":
yield TokenType.BRANCH_START, "("
elif char == ")":
yield TokenType.BRANCH_END, ")"
elif char == "%":
# If smiles is too short this will raise a ValueError, which is
# (slightly) prettier than a StopIteration.
yield TokenType.RING_NUM, int(next(smiles, '') + next(smiles, ''))
elif char in '/\\':
yield TokenType.RING_NUM, int(next(smiles, "") + next(smiles, ""))
elif char in "/\\":
yield TokenType.EZSTEREO, char
elif char.isdigit():
yield TokenType.RING_NUM, int(char)


def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
reinterpret_aromatic=True, do_fill_valence=True):
def read_smiles(
smiles,
explicit_hydrogen=False,
zero_order_bonds=True,
reinterpret_aromatic=True,
do_fill_valence=True,
):
"""
Parses a SMILES string.

Expand All @@ -118,7 +130,7 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
information.
Edges will have an 'order'.
"""
bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
bond_to_order = {"-": 1, "=": 2, "#": 3, "$": 4, ":": 1.5, ".": 0}
mol = nx.Graph()
anchor = None
idx = 0
Expand All @@ -143,8 +155,10 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
anchor = branches.pop()
elif tokentype == TokenType.BOND_TYPE:
if next_bond is not None:
raise ValueError('Previous bond (order {}) not used. '
'Overwritten by "{}"'.format(next_bond, token))
raise ValueError(
"Previous bond (order {}) not used. "
'Overwritten by "{}"'.format(next_bond, token)
)
next_bond = bond_to_order[token]
elif tokentype == TokenType.RING_NUM:
if token in ring_nums:
Expand All @@ -156,30 +170,39 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
elif next_bond is None:
next_bond = order
elif next_bond != order: # Both are not None
raise ValueError('Conflicting bond orders for ring '
'between indices {}'.format(token))
raise ValueError(
"Conflicting bond orders for ring "
"between indices {}".format(token)
)
# idx is the index of the *next* atom we're adding. So: -1.
if mol.has_edge(idx-1, jdx):
raise ValueError('Edge specified by marker {} already '
'exists'.format(token))
if idx-1 == jdx:
raise ValueError('Marker {} specifies a bond between an '
'atom and itself'.format(token))
if mol.has_edge(idx - 1, jdx):
raise ValueError(
"Edge specified by marker {} already " "exists".format(token)
)
if idx - 1 == jdx:
raise ValueError(
"Marker {} specifies a bond between an "
"atom and itself".format(token)
)
if next_bond or zero_order_bonds:
mol.add_edge(idx - 1, jdx, order=next_bond)
next_bond = None
del ring_nums[token]
else:
if idx == 0:
raise ValueError("Can't have a marker ({}) before an atom"
"".format(token))
raise ValueError(
"Can't have a marker ({}) before an atom" "".format(token)
)
# idx is the index of the *next* atom we're adding. So: -1.
ring_nums[token] = (idx - 1, next_bond)
next_bond = None
elif tokentype == TokenType.EZSTEREO:
LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token)
LOGGER.warning(
'E/Z stereochemical information, which is specified by "%s", will be discarded',
token,
)
if ring_nums:
raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
raise KeyError("Unmatched ring indices {}".format(list(ring_nums.keys())))

# Time to deal with aromaticity. This is a mess, because it's not super
# clear what aromaticity information has been provided, and what should be
Expand All @@ -191,22 +214,25 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
ring_idxs.update(cycle)
non_ring_idxs = set(mol.nodes) - ring_idxs
for n_idx in non_ring_idxs:
if mol.nodes[n_idx].get('aromatic', False):
raise ValueError("You specified an aromatic atom outside of a"
" ring. This is impossible")

if mol.nodes[n_idx].get("aromatic", False):
raise ValueError(
"You specified an aromatic atom outside of a"
" ring. This is impossible"
)

mark_aromatic_edges(mol)
if (do_fill_valence):
if do_fill_valence:
fill_valence(mol)

if reinterpret_aromatic:
mark_aromatic_atoms(mol)
mark_aromatic_edges(mol)
for idx, jdx in mol.edges:
if ((not mol.nodes[idx].get('aromatic', False) or
not mol.nodes[jdx].get('aromatic', False))
and mol.edges[idx, jdx].get('order', 1) == 1.5):
mol.edges[idx, jdx]['order'] = 1
if (
not mol.nodes[idx].get("aromatic", False)
or not mol.nodes[jdx].get("aromatic", False)
) and mol.edges[idx, jdx].get("order", 1) == 1.5:
mol.edges[idx, jdx]["order"] = 1

if explicit_hydrogen:
add_explicit_hydrogens(mol)
Expand All @@ -216,18 +242,18 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
# FD: add autogenerated atom name
counts_by_elt = {}
for n_idx in list(mol.nodes):
tag = '*'
if ('element' in mol.nodes[n_idx]):
tag = mol.nodes[n_idx]['element']
if (tag not in counts_by_elt):
if (tag[0] == '{' and tag[-1] == '}'):
counts_by_elt[tag] = ''
tag = "*"
if "element" in mol.nodes[n_idx]:
tag = mol.nodes[n_idx]["element"]
if tag not in counts_by_elt:
if tag[0] == "{" and tag[-1] == "}":
counts_by_elt[tag] = ""
else:
counts_by_elt[tag] = 1
else:
if (counts_by_elt[tag]==''):
raise ValueError('Duplicate connection {} specified!'.format(tag))
if counts_by_elt[tag] == "":
raise ValueError("Duplicate connection {} specified!".format(tag))
counts_by_elt[tag] += 1
mol.nodes[n_idx]['name'] = tag+str(counts_by_elt[tag])
mol.nodes[n_idx]["name"] = tag + str(counts_by_elt[tag])

return mol
Loading