"""Molecular topology: residues, atoms, bonds, and connectivity."""
import uuid
from typing import Optional, Union, List
import contextlib
import numpy as np
from .element import ELEMENTS
from .template import ResidueTemplate
[docs]
class Residue:
"""
A residue (e.g. amino acid) containing atoms and optional link to a :class:`Topology`.
"""
[docs]
def __init__(self, name: str, number: int, chain: str = "A", insertionCode: str = ""):
"""
Parameters
----------
name : str
Residue name (e.g. ``"ALA"``).
number : int
Residue sequence number.
chain : str, optional
Chain identifier.
insertionCode : str, optional
PDB insertion code.
"""
self._atoms = []
self._atoms_by_name = {}
self._name = name
self.number = number
self.chain = chain
self.insertionCode = insertionCode
self._topology = None
self._idx = -1
self._stdname = name
@property
def name(self) -> str:
return self._name
[docs]
def setName(self, newName: str):
self._name = newName
@property
def stdName(self) -> str:
return self._stdname
[docs]
def setStdName(self, newName: str):
self._stdname = newName
@property
def topology(self):
return self._topology
[docs]
def setTopology(self, topology):
self._topology = topology
[docs]
def hasTopology(self):
return self.topology is not None
@property
def idx(self) -> int:
if self.hasTopology():
return self._idx
else:
return -1
[docs]
def setIdx(self, newIdx: int):
self._idx = newIdx
[docs]
def addAtom(self, atom):
atom.setResidue(self)
self._atoms.append(atom)
if self.getAtomWithName(atom.name) is not None:
raise RuntimeError(f"Atom {atom.name} already exists.")
else:
self._atoms_by_name[atom.name] = atom
[docs]
def getAtomWithName(self, name: str):
if name in self._atoms_by_name:
return self._atoms_by_name[name]
else:
return None
[docs]
def removeAtom(self, atom):
atom.setResidue(None)
self._atoms.remove(atom)
del self._atoms_by_name[atom.name]
@property
def numAtoms(self):
return len(self._atoms)
@property
def atoms(self):
return self._atoms
def __repr__(self):
rep = f"<{self.__class__.__name__} {self.name}[{self.number}]; chain={self.chain}"
if self.insertionCode:
rep += f"; insertionCode={self.insertionCode}"
if self.idx != -1:
rep += f"; id={self.idx}"
rep += ">"
return rep
[docs]
def matchTemplate(self, template: ResidueTemplate, stdResidueName: bool = True):
"""
Match this residue to a template: align atom names (including alternates) and add template bonds to topology.
Parameters
----------
template : ResidueTemplate
Template to match (atom names and bonds).
stdResidueName : bool, optional
If True, set residue name to template name.
Returns
-------
bool
True if match succeeded.
"""
# match atoms
if len(self.atoms) != len(template.atoms):
return False
for atomName, atomInfo in template.atoms.items():
if self.getAtomWithName(atomName) is not None:
continue
else:
for altname in atomInfo['altNames']:
atomMatch = self.getAtomWithName(altname)
if atomMatch is not None:
# standardize atom name
del self._atoms_by_name[altname]
self._atoms_by_name[atomName] = atomMatch
atomMatch.setName(atomName)
break
else:
return False
#matched
if stdResidueName:
self.setName(template.name)
self.setStdName(template.name)
# match bonds
for bond in template.bonds:
if bond['atom1'].startswith("-"):
prevRes = self.topology.getResidueWithIdx(self.idx - 1)
atom1 = prevRes.getAtomWithName(bond['atom1'][1:])
atom2 = self.getAtomWithName(bond['atom2'])
elif bond['atom2'].startswith("-"):
prevRes = self.topology.getResidueWithIdx(self.idx - 1)
atom1 = self.getAtomWithName(bond['atom1'])
atom2 = prevRes.getAtomWithName(bond['atom2'][1:])
else:
atom1 = self.getAtomWithName(bond['atom1'])
atom2 = self.getAtomWithName(bond['atom2'])
bondToAdd = Bond(atom1, atom2, bond['order'])
self.topology.addBond(bondToAdd)
return True
[docs]
class Atom:
"""
An atom with element, position, optional residue/topology, bonds, and force-field type/class.
"""
[docs]
def __init__(self, name: str, element: str):
"""
Parameters
----------
name : str
Atom name (e.g. ``"CA"``).
element : str
Element symbol (key in :data:`ELEMENTS`).
"""
self._name = name
self.element = ELEMENTS[element]
self._residue = None
self._bonds = []
self._idx = -1
self._neighbors = []
self._atype = None
self._aclass = None
self._uuid = uuid.uuid4()
self._top_info = None
self._polarization_group = set([self])
self.xx = 0.0
self.xy = 0.0
self.xz = 0.0
[docs]
def setPosition(self, pos):
self.xx = pos[0]
self.xy = pos[1]
self.xz = pos[2]
@property
def polarizationGroup(self):
return self._polarization_group
[docs]
def setPolarizationGroup(self, setOfAtoms):
self._polarization_group = setOfAtoms
[docs]
def addToPolarizationGroup(self, other):
grp = self.polarizationGroup.union(other.polarizationGroup)
self._polarization_group = grp
other._polairzation_group = grp
def __hash__(self):
return hash(self._uuid)
@property
def mass(self) -> float:
return self.element.mass
@property
def symbol(self) -> str:
return self.element.symbol
@property
def atomicNum(self) -> int:
return self.element.atomicNum
@property
def atomType(self):
return self._atype
@property
def atomClass(self):
return self._aclass
[docs]
def setAtomType(self, atype):
self._atype = atype
[docs]
def setAtomClass(self, aclass):
self._aclass = aclass
@property
def parametrized(self):
return self.atomType is not None
@property
def name(self) -> str:
return self._name
[docs]
def setName(self, newname: str):
self._name = newname
def __repr__(self) -> str:
if self.atomType is not None:
typeStr = f" type={self.atomType}"
else:
typeStr = ""
return f"<Atom {self.name} [{self.idx}]{typeStr}>"
@property
def idx(self) -> int:
if self.hasTopology():
return self._idx
else:
return -1
[docs]
def setIdx(self, newIdx: int):
self._idx = newIdx
@property
def residue(self):
return self._residue
[docs]
def hasResidue(self):
return self.residue is not None
[docs]
def setResidue(self, residue: Union[None, Residue]):
self._residue = residue
@property
def topology(self):
if self.hasResidue():
return self.residue.topology
else:
return None
[docs]
def hasTopology(self):
if self.hasResidue():
return self.residue.hasTopology()
else:
return False
@property
def bonds(self):
return self._bonds
[docs]
def generateNeighbors(self):
neighbors = []
for bond in self.bonds:
if bond.atom1 is self:
neighbors.append(bond.atom2)
else:
neighbors.append(bond.atom1)
self._neighbors = neighbors
[docs]
def getNeighbors(self):
return self._neighbors
[docs]
def getHighOrderNeighbors(self, order: int):
assert order > 0
hoNeis = [set(self.getNeighbors())]
for _ in range(order - 1):
nblist = set()
for atom in hoNeis[-1]:
for nei in atom.getNeighbors():
nblist.add(nei)
for prevList in hoNeis:
nblist -= prevList
nblist -= set([self])
hoNeis.append(nblist)
hoNeisResult = []
for i, atoms in enumerate(hoNeis):
for atom in atoms:
hoNeisResult.append((atom, i+1))
return hoNeisResult
[docs]
def generateTopologicalInfo(self, maxConnect: int):
paths = {i+1: set() for i in range(maxConnect)}
paths[1] = set(BondedAtoms([self, nei]) for nei in self.getNeighbors())
for i in range(2, maxConnect + 1):
for path in paths[i-1]:
for atom in path[-1].getNeighbors():
if not (atom is path[-2]):
paths[i].add(path.extend([atom]))
nbList = {}
for numConnect, path in paths.items():
for p in path:
if p[-1] not in nbList:
nbList[p[-1]] = numConnect
self._top_info = {
"bondedAtoms": nbList,
"pathToBondedAtoms": paths
}
@property
def pathsToBondedAtoms(self):
return self.getTopologicalInfo()['pathToBondedAtoms']
@property
def bondedAtoms(self):
return self.getTopologicalInfo()['bondedAtoms']
[docs]
def getTopologicalInfo(self):
if self._top_info is None:
raise RuntimeError("Topological Information is None")
else:
return self._top_info
[docs]
def addBond(self, bond):
if (bond.atom1 is not self) and (bond.atom2 is not self):
raise RuntimeError("The bond to add does not contain this atom")
elif (bond.atom1 is self) and (bond.atom2 is self):
raise RuntimeError("Atoms in a bond are the same")
self._bonds.append(bond)
[docs]
def pruneBonds(self):
rmlist = []
for i, bo in enumerate(self.bonds):
if not bo.hasTopology():
rmlist.append(i)
rmlist.reverse()
for i in rmlist:
del self._bonds[i]
self.generateNeighbors()
[docs]
class Bond:
"""
A bond between two atoms with a bond order.
"""
[docs]
def __init__(self, atom1: Atom, atom2: Atom, order: float):
"""
Parameters
----------
atom1, atom2 : Atom
Bonded atoms.
order : float
Bond order (e.g. 1.0, 2.0).
"""
self.atom1 = atom1
self.atom2 = atom2
self.order = order
self._idx = -1
[docs]
def hasTopology(self) -> bool:
return self.atom1.hasTopology() and self.atom2.hasTopology()
[docs]
def require_editable(func):
"""Decorator: raise if topology is not editable."""
def new_func(self, *args, **kwargs):
if not self.editable:
raise RuntimeError("Topolgy is not editable")
else:
return func(self, *args, **kwargs)
return new_func
[docs]
def require_noneditable(func):
"""Decorator: raise if topology is editable."""
def new_func(self, *args, **kwargs):
if self.editable:
raise RuntimeError("Topology is editable")
else:
return func(self, *args, **kwargs)
return new_func
[docs]
class BondedAtoms:
"""
Ordered list of bonded atoms, with hash/equality invariant under reversal (for undirected paths).
"""
[docs]
def __init__(self, atoms: List[Atom]):
"""
Parameters
----------
atoms : List[Atom]
Ordered sequence of bonded atoms.
"""
self.atoms = atoms if isinstance(atoms, list) else list(atoms)
tuple1 = tuple(atom for atom in self.atoms)
tuple2 = tuple(self.atoms[i-1] for i in range(len(self), 0, -1))
hash1 = hash(tuple1)
hash2 = hash(tuple2)
self._hash = min(hash1, hash2)
def __hash__(self):
return self._hash
def __len__(self):
return len(self.atoms)
def __eq__(self, other):
if self.atoms == other.atoms:
return True
elif self.atoms == [other.atoms[i-1] for i in range(len(other.atoms), 0, -1)]:
return True
else:
return False
def __getitem__(self, idx: int):
return self.atoms[idx]
[docs]
def extend(self, atoms):
return BondedAtoms(self.atoms + atoms)
def __repr__(self):
return repr(self.atoms)
[docs]
class Topology:
"""
Molecular topology: list of residues, bonds, and connectivity up to :attr:`_maxConnect` bonds.
"""
[docs]
def __init__(self, name: str, maxConnect: int = 5):
"""
Parameters
----------
name : str
Topology name (e.g. from PDB).
maxConnect : int, optional
Maximum bond connectivity distance for generating bonded-atom lists.
"""
self._residues = []
self._bonds = []
self.name = name
self._editable = False
assert maxConnect > 0, "maxConnect must be positive"
self._maxConnect = maxConnect
self._topInfo = None
@property
def editable(self):
return self._editable
[docs]
@contextlib.contextmanager
def setEditable(self):
self._topInfo = None
self._editable = True
yield
self.assignIndex()
self.pruneBonds()
self.generateTopologicalInfo()
self._editable = False
[docs]
def assignIndex(self):
atomId = 0
for resId, res in enumerate(self.residues):
res.setIdx(resId)
for atom in res.atoms:
atom.setIdx(atomId)
atomId += 1
[docs]
def pruneBonds(self):
rmlist = []
for i, bo in enumerate(self.bonds):
if not bo.hasTopology():
rmlist.append(i)
rmlist.reverse()
for i in rmlist:
del self._bonds[i]
for atom in self.atoms():
atom.pruneBonds()
@property
def residues(self) -> List[Residue]:
return self._residues
@property
def numResidues(self):
return len(self._residues)
[docs]
def getResidueWithIdx(self, idx: int) -> Residue:
return self.residues[idx]
[docs]
@require_editable
def addResidue(self, residue: Residue):
residue.setIdx(self.numResidues)
residue.setTopology(self)
self._residues.append(residue)
[docs]
@require_editable
def insertResidue(self, residue: Residue, resId: int):
residue.setIdx(resId)
residue.setTopology(self)
for res in self.residues[resId:]:
res.setIdx(res.id + 1)
self._residues.insert(resId, residue)
[docs]
@require_editable
def removeResidue(self, resId: int):
residue = self.getResidueWithIdx(resId)
residue.setTopology(None)
for res in self.residues[resId:]:
res.setIdx(res.id - 1)
self._residues.remove(residue)
@property
def numAtoms(self):
return sum([res.numAtoms for res in self.residues])
@property
def numBonds(self):
return len(self.bonds)
[docs]
def atoms(self) -> Atom:
for res in self.residues:
for atom in res.atoms:
yield atom
[docs]
def getAtomWithIdx(self, idx: int) -> Atom:
if idx >= self.numAtoms:
raise IndexError("Index out of range")
else:
resId = 0
while idx >= self.residues[resId].numAtoms:
resId += 1
idx -= self.residues[resId].numAtoms
return self.residues[resId].atoms[idx]
[docs]
@require_editable
def addAtomToResidue(self, atom: Atom, resId: int):
res = self.getResidueWithIdx(resId)
res.addAtom(atom)
def __repr__(self):
rep = f"<Topology[{self.name}]; {self.numAtoms} atoms; {self.numBonds} bonds>"
return rep
@property
def bonds(self):
return self._bonds
[docs]
@require_editable
def addBond(self, bond: Bond):
# TODO (Eric): Here we need to first check if there has already been a bond
# between these two atoms
bond.atom1.addBond(bond)
bond.atom2.addBond(bond)
self._bonds.append(bond)
@property
def connTable(self):
return self._topInfo['connectivity']
@property
def bondedAtoms(self):
return self._topInfo['bondedAtoms']
[docs]
def generateTopologicalInfo(self):
topInfo = {
"connectivity": {},
"bondedAtoms": {i+1: set() for i in range(self._maxConnect)},
}
for atom in self.atoms():
atom.generateTopologicalInfo(self._maxConnect)
atomTopInfo = atom.getTopologicalInfo()
for numConnect, paths in atomTopInfo['pathToBondedAtoms'].items():
topInfo['bondedAtoms'][numConnect] = topInfo['bondedAtoms'][numConnect].union(paths)
for nei, numConnect in atom.bondedAtoms.items():
atomPair = BondedAtoms([atom, nei])
topInfo['connectivity'][atomPair] = numConnect
conn = []
for p, numConnect in topInfo['connectivity'].items():
if p.atoms[0].idx < p.atoms[1].idx:
conn.append([p.atoms[0].idx, p.atoms[1].idx, numConnect])
else:
conn.append([p.atoms[1].idx, p.atoms[0].idx, numConnect])
conn.sort(key=lambda x: (x[-1], x[0], x[1]))
conn = np.array(conn, dtype=int)
topInfo["connectivity"] = conn
self._topInfo = topInfo
[docs]
def matchTemplates(self):
from .template import TEMPLATES
resCYX = []
for residue in self.residues:
# TODO (Eric): Add N/C terminals in the template.xml file
if residue.name not in TEMPLATES:
raise KeyError(f"ResidueTemplate {residue.name} not defined")
succ = residue.matchTemplate(TEMPLATES[residue.name])
if not succ:
succ = residue.matchTemplate(TEMPLATES[f'N{residue.name}'], stdResidueName=False)
if not succ:
succ = residue.matchTemplate(TEMPLATES[f'C{residue.name}'], stdResidueName=False)
if not succ:
protonatedStates = [
"HIS", "HIP", "HID", "HIE",
"ASP", "ASH",
"GLU", "GLH",
"LYS", "LYD",
"CYS", "CYD",
"TYR", "TYD"
]
for key in protonatedStates:
succ = residue.matchTemplate(TEMPLATES[key])
if succ:
break
if not succ:
raise RuntimeError(f"Fail to match template for residue #{residue.idx} {residue.name}")
if residue.name == "CYX":
resCYX.append(residue.idx)
if len(resCYX) > 0:
self.processDisulfideBond()
[docs]
def processDisulfideBond(self):
raise NotImplementedError()
@property
def coordinates(self) -> np.ndarray:
return np.array([[at.xx, at.xy, at.xz] for at in self.atoms()])