import sys
import os
import subprocess
import shutil
from pathlib import Path
import numpy as np
import parmed
import ties.helpers
from ties.config import Config
[docs]class Ligand:
"""
The ligand helper class. Helps to load and manage the different copies of the ligand file.
Specifically, it tracks the different copies of the original input files as it is transformed (e.g. charge assignment).
:param ligand: ligand filepath
:type ligand: string
:param config: Optional configuration from which the relevant ligand settings can be used
:type config: :class:`Config`
:param save: write a file with unique atom names for further inspection
:type save: bool
"""
LIG_COUNTER = 0
_USED_FILENAMES = set()
[docs] def __init__(self, ligand, config=None, save=True):
"""Constructor method
"""
self.save = save
# save workplace root
self.config = Config() if config is None else config
self.config.ligand_files = ligand
self.original_input = Path(ligand).absolute()
# internal name without an extension
self.internal_name = self.original_input.stem
# ligand names have to be unique
if self.internal_name in Ligand._USED_FILENAMES and self.config.uses_cmd:
print(f'ERROR: the ligand filename {self.internal_name} is not unique in the list of ligands. ')
sys.exit(1)
else:
Ligand._USED_FILENAMES.add(self.internal_name)
# last used representative Path file
self.current = self.original_input
# internal index
# TODO - move to config
self.index = Ligand.LIG_COUNTER
Ligand.LIG_COUNTER += 1
self._renaming_map = None
self.ligand_with_uniq_atom_names = None
# If .ac format (ambertools, similar to .pdb), convert it to .mol2 using antechamber
self.convert_acprep_to_mol2()
def __repr__(self):
# return self.original_input.stem
return self.internal_name
[docs] def convert_acprep_to_mol2(self):
"""
If the file is not a prep/ac file, this function does not do anything.
Antechamber is called to convert the .prepi/.prep/.ac file into a .mol2 file.
Returns: the name of the original file, or of it was .prepi, a new filename with .mol2
"""
if self.current.suffix.lower() not in ('.ac', '.prep'):
return
filetype = {'.ac': 'ac', '.prep': 'prepi'}[self.current.suffix.lower()]
cwd = self.config.lig_acprep_dir / self.internal_name
if not cwd.is_dir():
cwd.mkdir(parents=True, exist_ok=True)
# prepare the .mol2 files with antechamber (ambertools), assign BCC charges if necessary
print(f'Antechamber: converting {filetype} to mol2')
new_current = cwd / (self.internal_name + '.mol2')
log_filename = cwd / "antechamber_conversion.log"
with open(log_filename, 'w') as LOG:
try:
subprocess.run([self.config.ambertools_antechamber,
'-i', self.current, '-fi', filetype,
'-o', new_current, '-fo', 'mol2',
'-dr', self.config.antechamber_dr],
stdout=LOG, stderr=LOG,
check=True, text=True,
cwd=cwd, timeout=30)
except subprocess.CalledProcessError as E:
print('ERROR: An error occurred during the antechamber conversion from .ac to .mol2 data type. ')
print(f'ERROR: The output was saved in the directory: {cwd}')
print(f'ERROR: Please see the log file for the exact error information: {log_filename}')
raise E
# update
self.original_ac = self.current
self.current = new_current
print(f'Converted .ac file to .mol2. The location of the new file: {self.current}')
[docs] def are_atom_names_correct(self):
"""
Checks if atom names:
- are unique
- have a correct format "LettersNumbers" e.g. C17
"""
ligand = parmed.load_file(str(self.current), structure=True)
atom_names = [a.name for a in ligand.atoms]
are_uniqe = len(set(atom_names)) == len(atom_names)
return are_uniqe and self._do_atom_names_have_correct_format(atom_names)
@staticmethod
def _do_atom_names_have_correct_format(names):
"""
Check if the atom name is followed by a number, e.g. "C15"
Note that the full atom name cannot be more than 4 characters.
This is because the PDB format does not allow for more
characters which can lead to inconsistencies.
:param names: a list of atom names
:type names: list[str]
:return True if they all follow the correct format.
"""
for name in names:
# cannot exceed 4 characters
if len(name) > 4:
return False
# count letters before any digit
letter_count = 0
for letter in name:
if not letter.isalpha():
break
letter_count += 1
# at least one character
if letter_count == 0:
return False
# extrac the number suffix
atom_number = name[letter_count:]
try:
int(atom_number)
except:
return False
return True
[docs] def correct_atom_names(self):
"""
Ensure that each atom name:
- is unique
- has letter followed by digits
- has max 4 characters
E.g. C17, NX23
:param self.save: if the path is provided, the updated file
will be saved with the unique names and a handle to the new file (ParmEd) will be returned.
"""
if self.are_atom_names_correct():
return
print(f'Ligand {self.internal_name} will have its atom names renamed. ')
ligand = parmed.load_file(str(self.current), structure=True)
print(f'Atom names in the molecule ({self.original_input}/{self.internal_name}) are either not unique '
f'or do not follow NameDigit format (e.g. C15). Renaming')
_, renaming_map = ties.helpers.get_new_atom_names(ligand.atoms)
self._renaming_map = renaming_map
print(f'Rename map: {renaming_map}')
# save the output here
os.makedirs(self.config.lig_unique_atom_names_dir, exist_ok=True)
ligand_with_uniq_atom_names = self.config.lig_unique_atom_names_dir / (self.internal_name + self.current.suffix)
if self.save:
ligand.save(str(ligand_with_uniq_atom_names))
self.ligand_with_uniq_atom_names = ligand_with_uniq_atom_names
self.parmed = ligand
# this object is now represented by the updated ligand
self.current = ligand_with_uniq_atom_names
@property
def renaming_map(self):
"""
Otherwise, key: newName, value: oldName.
If None, means no renaming took place.
"""
return self._renaming_map
@property
def rev_renaming_map(self):
return {b: c for c, b in self._renaming_map.items()}
@renaming_map.setter
def renaming_map(self, dict):
if self._renaming_map is None:
self._renaming_map = dict
else:
# this ligand was already renamed before.
# so B -> A, where A is the original value,
# and B is the new value (guarantted to be unique, therefore the key)
# Now B is renamed to C, so we need to have C -> A
# For each renamed B value here, we have to find the A value
# fixme: this works only if Bs are unique
# dict: C -> B. We know that C and B are unique. Therefore, reverse for convenience
rev_dict = {b: c for c, b in dict.items()}
self._renaming_map = {rev_dict[b]: a for b, a in self._renaming_map.items()}
# make this into a python property
def suffix(self):
return self.current.suffix.lower()
[docs] def antechamber_prepare_mol2(self, **kwargs):
"""
Converts the ligand into a .mol2 format.
BCC charges are generated if missing or requested.
It calls antechamber (the charge type -c is not used if user prefers to use their charges).
Any DU atoms created in the antechamber call are removed.
:param atom_type: Atom type bla bla
:type atom_type:
:param net_charge:
:type net_charge: int
"""
self.config.set_configs(**kwargs)
print('Antechamber: converting to .mol2 and generating charges if necessary')
if self.config.ligands_contain_q:
print('Antechamber: Generating .mol2 file with BCC charges')
if not self.config.antechamber_charge_type:
print('Antechamber: Ignoring atom charges. The user-provided atom charges will be used. ')
mol2_cwd = self.config.lig_dir / self.internal_name
# prepare the directory
mol2_cwd.mkdir(parents=True, exist_ok=True)
mol2_target = mol2_cwd / f'{self.internal_name}.mol2'
# copy the existing file if the file is already .mol2
if self.current.suffix == '.mol2' and self.config.ligands_contain_q:
print(f'Already .mol2 used. Copying {self.current} to {mol2_cwd}. ')
shutil.copy(self.current, mol2_target)
# do not redo if the target file exists
if not (mol2_target).is_file():
log_filename = mol2_cwd / "antechamber.log"
with open(log_filename, 'w') as LOG:
try:
subprocess.run([self.config.ambertools_antechamber,
'-i', self.current, '-fi', self.current.suffix[1:],
'-o', mol2_target, '-fo', 'mol2',
'-at', self.config.ligand_ff_name, '-nc', str(self.config.ligand_net_charge),
'-dr', str(self.config.antechamber_dr)] + self.config.antechamber_charge_type,
cwd=mol2_cwd,
stdout=LOG, stderr=LOG,
check=True, text=True,
timeout=60 * 30 # 30 minutes
)
except subprocess.CalledProcessError as E:
print('ERROR: occured when creating the input .mol2 file with antechamber. ')
print(f'ERROR: The output was saved in the directory: {mol2_cwd}')
print(f'ERROR: can be found in the file: {log_filename}')
raise E
print(f'Converted {self.original_input} into .mol2, Log: {log_filename}')
else:
print(f'File {mol2_target} already exists. Skipping. ')
self.antechamber_mol2 = mol2_target
self.current = mol2_target
# remove any DUMMY DU atoms in the .mol2 atoms
self.removeDU_atoms()
[docs] def removeDU_atoms(self):
"""
Ambertools antechamber creates sometimes DU dummy atoms.
These are not created when BCC charges are computed from scratch.
They are only created if you reuse existing charges.
They appear to be a side effect. We remove the dummy atoms therefore.
"""
mol2 = parmed.load_file(str(self.current), structure=True)
# check if there are any DU atoms
has_DU = any(a.type == 'DU' for a in mol2.atoms)
if not has_DU:
return
# make a backup copy before (to simplify naming)
shutil.move(self.current, self.current.parent / ('lig.beforeRemovingDU' + self.current.suffix))
# remove DU type atoms and save the file
for atom in mol2.atoms:
if atom.name != 'DU':
continue
atom.residue.delete_atom(atom)
# save the updated molecule
mol2.save(str(self.current))
print('Removed dummy atoms with type "DU"')
[docs] def generate_frcmod(self, **kwargs):
"""
params
- parmchk2
- atom_type
"""
self.config.set_configs(**kwargs)
print(f'INFO: frcmod for {self} was computed before. Not repeating.')
if hasattr(self, 'frcmod'):
return
# fixme - work on the file handles instaed of the constant stitching
print(f'Parmchk2: generate the .frcmod for {self.internal_name}.mol2')
# prepare cwd
cwd = self.config.lig_frcmod_dir / self.internal_name
if not cwd.is_dir():
cwd.mkdir(parents=True, exist_ok=True)
target_frcmod = f'{self.internal_name}.frcmod'
log_filename = cwd / "parmchk2.log"
with open(log_filename, 'w') as LOG:
try:
subprocess.run([self.config.ambertools_parmchk2,
'-i', self.current,
'-o', target_frcmod,
'-f', 'mol2',
'-s', self.config.ligand_ff_name],
stdout=LOG, stderr=LOG,
check= True, text=True,
cwd= cwd, timeout=20, # 20 seconds
)
except subprocess.CalledProcessError as E:
print('ERROR file content: ', open(log_filename).read())
print('ERROR: An error occured during the antechamber conversion from .ac to .mol2 data type. ')
print(f'ERROR: The output was saved in the directory: {cwd}')
print(f'ERROR: Please see the log file for the exact error information: {log_filename}')
raise E
print(f'Parmchk2: created .frcmod: {target_frcmod}')
self.frcmod = cwd / target_frcmod
[docs] def overwrite_coordinates_with(self, file, output_file):
"""
Load coordinates from another file and overwrite the coordinates in the current file.
"""
# load the current atoms with ParmEd
template = parmed.load_file(str(self.current), structure=True)
# load the file with the coordinates we want to use
coords = parmed.load_file(str(file), structure=True)
# fixme: use the atom names
by_atom_name = True
by_index = False
by_general_atom_type = False
# mol2_filename will be overwritten!
print(f'Writing to {self.current} the coordinates from {file}. ')
coords_sum = np.sum(coords.atoms.positions)
if by_atom_name and by_index:
raise ValueError('Cannot have both. They are exclusive')
elif not by_atom_name and not by_index:
raise ValueError('Either option has to be selected.')
if by_general_atom_type:
for mol2_atom in template.atoms:
found_match = False
for ref_atom in coords.atoms:
if element_from_type[mol2_atom.type.upper()] == element_from_type[ref_atom.type.upper()]:
found_match = True
mol2_atom.position = ref_atom.position
break
assert found_match, "Could not find the following atom in the original file: " + mol2_atom.name
if by_atom_name:
for mol2_atom in template.atoms:
found_match = False
for ref_atom in coords.atoms:
if mol2_atom.name.upper() == ref_atom.name.upper():
found_match = True
mol2_atom.position = ref_atom.position
break
assert found_match, "Could not find the following atom name across the two files: " + mol2_atom.name
elif by_index:
for mol2_atom, ref_atom in zip(template.atoms, coords.atoms):
atype = element_from_type[mol2_atom.type.upper()]
reftype = element_from_type[ref_atom.type.upper()]
if atype != reftype:
raise Exception(
f"The found general type {atype} does not equal to the reference type {reftype} ")
mol2_atom.position = ref_atom.position
if np.testing.assert_almost_equal(coords_sum, np.sum(mda_template.atoms.positions), decimal=2):
print('Different positions sums:', coords_sum, np.sum(mda_template.atoms.positions))
raise Exception('Copying of the coordinates did not work correctly')
# save the output file
mda_template.atoms.write(output_file)