import os
import json
import copy
import shutil
import subprocess
from pathlib import Path
import parmed
import ties.helpers
from ties.generator import get_atoms_bonds_from_mol2
from ties.topology_superimposer import superimpose_topologies
import ties.config
import ties.ligand
[docs]class Pair():
"""
Facilitates the creation of morphs.
It offers functionality related to a pair of ligands (a transformation).
:param ligA: The ligand to be used as the starting state for the transformation.
:type ligA: :class:`Ligand` or string
:param ligZ: The ligand to be used as the ending point of the transformation.
:type ligZ: :class:`Ligand` or string
:param config: The configuration object holding all settings.
:type config: :class:`Config`
"""
[docs] def __init__(self, ligA, ligZ, config=None, **kwargs):
"""
Please use the Config class for the documentation of the possible kwargs.
Each kwarg is passed to the config class.
fixme - list all relevant kwargs here
param ligand_net_charge: integer, net charge of each ligand (has to be the same)
"""
# create a new config if it is not provided
self.config = ties.config.Config() if config is None else config
# channel all config variables to the config class
self.config.set_configs(**kwargs)
# tell Config about the ligands if necessary
if self.config.ligands is None:
self.config.ligands = [ligA, ligZ]
# create ligands if they're just paths
if isinstance(ligA, ties.ligand.Ligand):
self.ligA = ligA
else:
self.ligA = ties.ligand.Ligand(ligA, self.config)
if isinstance(ligZ, ties.ligand.Ligand):
self.ligZ = ligZ
else:
self.ligZ = ties.ligand.Ligand(ligZ, self.config)
# initialise the handles to the molecules that morph
self.current_ligA = self.ligA.current
self.current_ligZ = self.ligZ.current
self.internal_name = f'{self.ligA.internal_name}_{self.ligZ.internal_name}'
self.mol2 = None
self.pdb = None
self.summary = None
self.suptop = None
self.mda_l1 = None
self.mda_l2 = None
self.distance = None
def __repr__(self):
if self.distance is None:
return self.internal_name
return f'{self.internal_name} ({self.distance:.2f})'
def set_distance(self, value):
self.distance = value
[docs] def superimpose(self, **kwargs):
"""
Please see :class:`Config` class for the documentation of kwargs. The passed kwargs overwrite the config
object passed in the constructor.
fixme - list all relevant kwargs here
:param use_element_in_superimposition: bool whether the superimposition should rely on the element initially,
before refining the results with a more specific check of the atom type.
:param manually_matched_atom_pairs:
:param manually_mismatched_pairs:
:param redistribute_q_over_unmatched:
"""
self.config.set_configs(**kwargs)
# use ParmEd to load the files
# fixme - move this to the Morph class instead of this place,
# fixme - should not squash all messsages. For example, wrong type file should not be squashed
leftlig_atoms, leftlig_bonds, rightlig_atoms, rightlig_bonds, parmed_ligA, parmed_ligZ = \
get_atoms_bonds_from_mol2(self.current_ligA, self.current_ligZ,
use_general_type=self.config.use_element_in_superimposition)
# fixme - manual match should be improved here and allow for a sensible format.
# in case the atoms were renamed, pass the names via the map renaming map
# TODO
# ligZ_old_new_atomname_map
new_mismatch_names = []
for a, z in self.config.manually_mismatched_pairs:
new_names = (self.ligA.rev_renaming_map[a], self.ligZ.rev_renaming_map[z])
print(f'Selecting mismatching atoms. The mismatch {(a, z)}) was renamed to {new_names}')
new_mismatch_names.append(new_names)
# assign
# fixme - Ideally I would reuse the ParmEd data for this,
# ParmEd can use bonds if they are present - fixme
# map atom IDs to their objects
ligand1_nodes = {}
for atomNode in leftlig_atoms:
ligand1_nodes[atomNode.id] = atomNode
# link them together
for nfrom, nto, btype in leftlig_bonds:
ligand1_nodes[nfrom].bind_to(ligand1_nodes[nto], btype)
ligand2_nodes = {}
for atomNode in rightlig_atoms:
ligand2_nodes[atomNode.id] = atomNode
for nfrom, nto, btype in rightlig_bonds:
ligand2_nodes[nfrom].bind_to(ligand2_nodes[nto], btype)
# fixme - this should be moved out of here,
# ideally there would be a function in the main interface for this
manual_match = [] if self.config.manually_matched_atom_pairs is None else self.config.manually_matched_atom_pairs
starting_node_pairs = []
for l_aname, r_aname in manual_match:
# find the starting node pairs, ie the manually matched pair(s)
found_left_node = None
for id, ln in ligand1_nodes.items():
if l_aname == ln.name:
found_left_node = ln
if found_left_node is None:
raise ValueError(f'Manual Matching: could not find an atom name: "{l_aname}" in the left molecule')
found_right_node = None
for id, ln in ligand2_nodes.items():
if r_aname == ln.name:
found_right_node = ln
if found_right_node is None:
raise ValueError(f'Manual Matching: could not find an atom name: "{r_aname}" in the right molecule')
starting_node_pairs.append([found_left_node, found_right_node])
if starting_node_pairs:
print('Starting nodes will be used:', starting_node_pairs)
# fixme - simplify to only take the ParmEd as input
suptops = superimpose_topologies(ligand1_nodes.values(), ligand2_nodes.values(),
disjoint_components=self.config.allow_disjoint_components,
net_charge_filter=True,
pair_charge_atol=self.config.atom_pair_q_atol,
net_charge_threshold=self.config.net_charge_threshold,
redistribute_charges_over_unmatched=self.config.redistribute_q_over_unmatched,
ignore_charges_completely=self.config.ignore_charges_completely,
ignore_bond_types=True,
ignore_coords=False,
align_molecules=self.config.align_molecules_using_mcs,
use_general_type=self.config.use_element_in_superimposition,
# fixme - not the same ... use_element_in_superimposition,
use_only_element=False,
check_atom_names_unique=True, # fixme - remove?
starting_pairs_heuristics=True, # fixme - add to config
force_mismatch=new_mismatch_names,
starting_node_pairs=starting_node_pairs,
parmed_ligA=parmed_ligA, parmed_ligZ=parmed_ligZ,
starting_pair_seed=self.config.superimposition_starting_pair)
assert len(suptops) == 1
self.set_suptop(suptops[0], parmed_ligA, parmed_ligZ)
# attach the used config to the suptop
suptops[0].config = self.config
# attach the morph to the suptop
suptops[0].morph = self
return suptops[0]
[docs] def set_suptop(self, suptop, parmed_ligA, parmed_ligZ):
"""
Attach a SuperimposedTopology object along with the ParmEd objects for the ligA and ligZ.
:param suptop: :class:`SuperimposedTopology`
:param parmed_ligA: An ParmEd for the ligA
:param parmed_ligZ: An ParmEd for the ligZ
"""
self.suptop = suptop
self.parmed_ligA = parmed_ligA
self.parmed_ligZ = parmed_ligZ
[docs] def make_atom_names_unique(self, out_ligA_filename=None, out_ligZ_filename=None, save=True):
"""
Ensure that each that atoms across the two ligands have unique names.
While renaming atoms, start with the element (C, N, ..) followed by
the count so far (e.g. C1, C2, N1).
Resnames are set to "INI" and "FIN", this is useful for the hybrid dual topology.
:param out_ligA_filename: The new filenames for the ligands with renamed atoms. If None, the default
naming convention is used.
:type out_ligA_filename: string or bool
:param out_ligZ_filename: The new filenames for the ligands with renamed atoms. If None, the default
naming convention is used.
:type out_ligZ_filename: string or bool
:param save: Whether to save to the disk the ligands after renaming the atoms
:type save: bool
"""
# The A ligand is a template for the renaming
self.ligA.correct_atom_names()
# load both ligands
left = parmed.load_file(str(self.ligA.current), structure=True)
right = parmed.load_file(str(self.ligZ.current), structure=True)
common_atom_names = {a.name for a in right.atoms}.intersection({a.name for a in left.atoms})
atom_names_overlap = len(common_atom_names) > 0
if atom_names_overlap or not self.ligZ.are_atom_names_correct():
print(f'Renaming right molecule ({self.ligZ.internal_name}) atom names are either reused or do not follow the correct format. ')
if atom_names_overlap:
print(f'Common atom names: {common_atom_names}')
name_counter_L_nodes = ties.helpers.get_atom_names_counter(left.atoms)
_, renaming_map = ties.helpers.get_new_atom_names(right.atoms, name_counter=name_counter_L_nodes)
self.ligZ.renaming_map = renaming_map
# rename the residue names to INI and FIN
for atom in left.atoms:
atom.residue = 'INI'
for atom in right.atoms:
atom.residue = 'FIN'
# fixme - instead of using the save parameter, have a method pair.save(filename1, filename2) and
# call it when necessary.
# prepare the destination directory
if not save:
return
if out_ligA_filename is None:
cwd = self.config.pair_unique_atom_names_dir / f'{self.ligA.internal_name}_{self.ligZ.internal_name}'
cwd.mkdir(parents=True, exist_ok=True)
self.current_ligA = cwd / (self.ligA.internal_name + '.mol2')
self.current_ligZ = cwd / (self.ligZ.internal_name + '.mol2')
else:
self.current_ligA = out_ligA_filename
self.current_ligZ = out_ligZ_filename
# save the updated atom names
left.save(str(self.current_ligA))
right.save(str(self.current_ligZ))
[docs] def check_json_file(self):
"""
Performance optimisation in case TIES is rerun again. Return the first matched atoms which
can be used as a seed for the superimposition.
:return: If the superimposition was computed before, and the .json file is available,
gets one of the matched atoms.
:rtype: [(ligA_atom, ligZ_atom)]
"""
matching_json = self.config.workdir / f'fep_{self.ligA.internal_name}_{self.ligZ.internal_name}.json'
if not matching_json.is_file():
return None
return [list(json.load(matching_json.open())['matched'].items())[0]]
[docs] def merge_frcmod_files(self, ligcom=None):
"""
Merges the .frcmod files generated for each ligand separately, simply by adding them together.
The duplication has no effect on the final generated topology parm7 top file.
We are also testing the .frcmod here with the user's force field in order to check if
the merge works correctly.
:param ligcom: Either "lig" if only ligands are present, or "com" if the complex is present.
Helps with the directory structure.
:type ligcom: string "lig" or "com"
"""
ambertools_tleap = self.config.ambertools_tleap
ambertools_script_dir = self.config.ambertools_script_dir
if self.config.protein is None:
protein_ff = None
else:
protein_ff = self.config.protein_ff
ligand_ff = self.config.ligand_ff
frcmod_info1 = ties.helpers.parse_frcmod_sections(self.ligA.frcmod)
frcmod_info2 = ties.helpers.parse_frcmod_sections(self.ligZ.frcmod)
cwd = self.config.workdir
# fixme: use the provided cwd here, otherwise this will not work if the wrong cwd is used
# have some conf module instead of this
if ligcom:
morph_frcmod = cwd / f'ties-{self.ligA.internal_name}-{self.ligZ.internal_name}' / ligcom / 'build' / 'hybrid.frcmod'
else:
# fixme - clean up
morph_frcmod = cwd / f'ties-{self.ligA.internal_name}-{self.ligZ.internal_name}' / 'build' / 'hybrid.frcmod'
morph_frcmod.parent.mkdir(parents=True, exist_ok=True)
with open(morph_frcmod, 'w') as FOUT:
FOUT.write('merged frcmod\n')
for section in ['MASS', 'BOND', 'ANGLE',
'DIHE', 'IMPROPER', 'NONBON']:
section_lines = frcmod_info1[section] + frcmod_info2[section]
FOUT.write('{0:s}\n'.format(section))
for line in section_lines:
FOUT.write('{0:s}'.format(line))
FOUT.write('\n')
FOUT.write('\n\n')
# this is our current frcmod file
self.frcmod = morph_frcmod
# as part of the .frcmod writing
# insert dummy angles/dihedrals if a morph .frcmod requires
# new terms between the appearing/disappearing atoms
# this is a trick to make sure tleap has everything it needs to generate the .top file
correction_introduced = self._check_hybrid_frcmod(ambertools_tleap, ambertools_script_dir, protein_ff, ligand_ff)
if correction_introduced:
# move the .frcmod which turned out to be insufficient according to the test
shutil.move(morph_frcmod, str(self.frcmod) + '.uncorrected' )
# now copy in place the corrected version
shutil.copy(self.frcmod, morph_frcmod)
def _check_hybrid_frcmod(self, ambertools_tleap, ambertools_script_dir, protein_ff, ligand_ff):
"""
Previous code: https://github.com/UCL-CCS/BacScratch/blob/master/agastya/ties_hybrid_topology_creator/output.py
Check that the output library can be used to create a valid amber topology.
Add missing terms with no force to pass the topology creation.
Returns the corrected .frcmod content, otherwise throws an exception.
"""
# prepare the working directory
cwd = self.config.pair_morphfrmocs_tests_dir / self.internal_name
if not cwd.is_dir():
cwd.mkdir(parents=True, exist_ok=True)
if protein_ff is None:
protein_ff = '# no protein ff needed'
else:
protein_ff = 'source ' + protein_ff
# prepare the superimposed .mol2 file if needed
if not hasattr(self.suptop, 'mol2'):
self.suptop.write_mol2()
# prepare tleap input
leap_in_test = 'leap_test_morph.in'
leap_in_conf = open(ambertools_script_dir / leap_in_test).read()
open(cwd / leap_in_test, 'w').write(leap_in_conf.format(
mol2=os.path.relpath(self.suptop.mol2, cwd),
frcmod=os.path.relpath(self.frcmod, cwd),
protein_ff=protein_ff, ligand_ff=ligand_ff))
# attempt generating the .top
print('Create amber7 topology .top')
try:
tleap_process = subprocess.run([ambertools_tleap, '-s', '-f', leap_in_test],
cwd=cwd, text=True, timeout=20,
capture_output=True, check=True)
except subprocess.CalledProcessError as err:
raise Exception(
f'ERROR: Testing the topology with tleap broke. Return code: {err.returncode} '
f'ERROR: Ambertools output: {err.stdout}') from err
# save stdout and stderr
open(cwd / 'tleap_scan_check.log', 'w').write(tleap_process.stdout + tleap_process.stderr)
if 'Errors = 0' in tleap_process.stdout:
print('Test hybrid .frcmod: OK, no dummy angle/dihedrals inserted.')
return False
# extract the missing angles/dihedrals
missing_bonds = set()
missing_angles = []
missing_dihedrals = []
for line in tleap_process.stdout.splitlines():
if "Could not find bond parameter for:" in line:
bond = line.split(':')[-1].strip()
missing_bonds.add(bond)
elif "Could not find angle parameter:" in line:
cols = line.split(':')
angle = cols[-1].strip()
if angle not in missing_angles:
missing_angles.append(angle)
elif "No torsion terms for" in line:
cols = line.split()
torsion = cols[-1].strip()
if torsion not in missing_dihedrals:
missing_dihedrals.append(torsion)
modified_hybrid_frcmod = cwd / f'{self.internal_name}_corrected.frcmod'
if missing_angles or missing_dihedrals:
print('WARNING: Adding dummy dihedrals/angles to frcmod to generate .top')
# read the original frcmod
frcmod_lines = open(self.frcmod).readlines()
# overwriting the .frcmod with dummy angles/dihedrals
with open(modified_hybrid_frcmod, 'w') as NEW_FRCMOD:
for line in frcmod_lines:
NEW_FRCMOD.write(line)
if 'BOND' in line:
for bond in missing_bonds:
dummy_bond = f'{bond:<14}0 180 \t\t# Dummy bond\n'
NEW_FRCMOD.write(dummy_bond)
print(f'Added dummy bond: "{dummy_bond}"')
if 'ANGLE' in line:
for angle in missing_angles:
dummy_angle = f'{angle:<14}0 120.010 \t\t# Dummy angle\n'
NEW_FRCMOD.write(dummy_angle)
print(f'Added dummy angle: "{dummy_angle}"')
if 'DIHE' in line:
for dihedral in missing_dihedrals:
dummy_dihedral = f'{dihedral:<14}1 0.00 180.000 2.000 \t\t# Dummy dihedrals\n'
NEW_FRCMOD.write(dummy_dihedral)
print(f'Added dummy dihedral: "{dummy_dihedral}"')
# update our tleap input test to use the corrected file
leap_in_test_corrected = cwd / 'leap_test_morph_corrected.in'
open(leap_in_test_corrected, 'w').write(leap_in_conf.format(
mol2=os.path.relpath(self.suptop.mol2, cwd),
frcmod=os.path.relpath(modified_hybrid_frcmod, cwd),
protein_ff=protein_ff, ligand_ff=ligand_ff))
# verify that adding the dummy angles/dihedrals worked
tleap_process = subprocess.run([ambertools_tleap, '-s', '-f', leap_in_test_corrected],
cwd=cwd, text=True, timeout=60 * 10, capture_output=True, check=True)
if not "Errors = 0" in tleap_process.stdout:
raise Exception('ERROR: Could not generate the .top file after adding dummy angles/dihedrals')
print('Morph .frcmod after the insertion of dummy angle/dihedrals: OK')
# set this .frcmod as the correct one now,
self.frcmod_before_correction = self.frcmod
self.frcmod = modified_hybrid_frcmod
return True
[docs] def overlap_fractions(self):
"""
Calculate the size of the common area.
:return: Four decimals capturing: 1) the fraction of the common size with respect to the ligA topology,
2) the fraction of the common size with respect to the ligZ topology,
3) the percentage of the disappearing atoms in the disappearing molecule
4) the percentage of the appearing atoms in the appearing molecule
:rtype: [float, float, float, float]
"""
matched_fraction_left = len(self.suptop.matched_pairs) / float(len(self.suptop.top1))
matched_fraction_right = len(self.suptop.matched_pairs) / float(len(self.suptop.top2))
disappearing_atoms_fraction = (len(self.suptop.top1) - len(self.suptop.matched_pairs)) \
/ float(len(self.suptop.top1)) * 100
appearing_atoms_fraction = (len(self.suptop.top2) - len(self.suptop.matched_pairs)) \
/ float(len(self.suptop.top2)) * 100
return matched_fraction_left, matched_fraction_right, disappearing_atoms_fraction, appearing_atoms_fraction