Source code for ties.ligand

import sys
import os
import subprocess
import shutil
from pathlib import Path

import numpy as np
import parmed

import ties.helpers
from ties.config import Config


[docs]class Ligand:
    """
    The ligand helper class. Helps to load and manage the different copies of the ligand file.
    Specifically, it tracks the different copies of the original input files as it is transformed (e.g. charge assignment).

    :param ligand: ligand filepath
    :type ligand: string
    :param config: Optional configuration from which the relevant ligand settings can be used
    :type config: :class:`Config`
    :param save: write a file with unique atom names for further inspection
    :type save: bool
    """
    LIG_COUNTER = 0

    _USED_FILENAMES = set()

[docs]    def __init__(self, ligand, config=None, save=True):
        """Constructor method
        """

        self.save = save
        # save workplace root
        self.config = Config() if config is None else config
        self.config.ligand_files = ligand

        self.original_input = Path(ligand).absolute()

        # internal name without an extension
        self.internal_name = self.original_input.stem

        # ligand names have to be unique
        if self.internal_name in Ligand._USED_FILENAMES and self.config.uses_cmd:
            print(f'ERROR: the ligand filename {self.internal_name} is not unique in the list of ligands. ')
            sys.exit(1)
        else:
            Ligand._USED_FILENAMES.add(self.internal_name)

        # last used representative Path file
        self.current = self.original_input

        # internal index
        # TODO - move to config
        self.index = Ligand.LIG_COUNTER
        Ligand.LIG_COUNTER += 1

        self._renaming_map = None
        self.ligand_with_uniq_atom_names = None

        # If .ac format (ambertools, similar to .pdb), convert it to .mol2 using antechamber
        self.convert_acprep_to_mol2()

    def __repr__(self):
        # return self.original_input.stem
        return self.internal_name

[docs]    def convert_acprep_to_mol2(self):
        """
        If the file is not a prep/ac file, this function does not do anything.
        Antechamber is called to convert the .prepi/.prep/.ac file into a .mol2 file.

        Returns: the name of the original file, or of it was .prepi, a new filename with .mol2
        """

        if self.current.suffix.lower() not in ('.ac', '.prep'):
            return

        filetype = {'.ac': 'ac', '.prep': 'prepi'}[self.current.suffix.lower()]

        cwd = self.config.lig_acprep_dir / self.internal_name
        if not cwd.is_dir():
            cwd.mkdir(parents=True, exist_ok=True)

        # prepare the .mol2 files with antechamber (ambertools), assign BCC charges if necessary
        print(f'Antechamber: converting {filetype} to mol2')
        new_current = cwd / (self.internal_name + '.mol2')

        log_filename = cwd / "antechamber_conversion.log"
        with open(log_filename, 'w') as LOG:
            try:
                subprocess.run([self.config.ambertools_antechamber,
                                '-i', self.current, '-fi', filetype,
                                '-o', new_current, '-fo', 'mol2',
                                '-dr', self.config.antechamber_dr],
                               stdout=LOG, stderr=LOG,
                               check=True, text=True,
                               cwd=cwd, timeout=30)
            except subprocess.CalledProcessError as E:
                print('ERROR: An error occurred during the antechamber conversion from .ac to .mol2 data type. ')
                print(f'ERROR: The output was saved in the directory: {cwd}')
                print(f'ERROR: Please see the log file for the exact error information: {log_filename}')
                raise E

        # update
        self.original_ac = self.current
        self.current = new_current
        print(f'Converted .ac file to .mol2. The location of the new file: {self.current}')

[docs]    def are_atom_names_correct(self):
        """
        Checks if atom names:
         - are unique
         - have a correct format "LettersNumbers" e.g. C17
        """
        ligand = parmed.load_file(str(self.current), structure=True)
        atom_names = [a.name for a in ligand.atoms]

        are_uniqe = len(set(atom_names)) == len(atom_names)

        return are_uniqe and self._do_atom_names_have_correct_format(atom_names)

    @staticmethod
    def _do_atom_names_have_correct_format(names):
        """
        Check if the atom name is followed by a number, e.g. "C15"
        Note that the full atom name cannot be more than 4 characters.
        This is because the PDB format does not allow for more
        characters which can lead to inconsistencies.

        :param names: a list of atom names
        :type names: list[str]
        :return True if they all follow the correct format.
        """
        for name in names:
            # cannot exceed 4 characters
            if len(name) > 4:
                return False

            # count letters before any digit
            letter_count = 0
            for letter in name:
                if not letter.isalpha():
                    break

                letter_count += 1

            # at least one character
            if letter_count == 0:
                return False

            # extrac the number suffix
            atom_number = name[letter_count:]
            try:
                int(atom_number)
            except:
                return False

        return True

[docs]    def correct_atom_names(self):
        """
        Ensure that each atom name:
         - is unique
         - has letter followed by digits
         - has max 4 characters
        E.g. C17, NX23

        :param self.save: if the path is provided, the updated file
            will be saved with the unique names and a handle to the new file (ParmEd) will be returned.
        """
        if self.are_atom_names_correct():
            return

        print(f'Ligand {self.internal_name} will have its atom names renamed. ')

        ligand = parmed.load_file(str(self.current), structure=True)

        print(f'Atom names in the molecule ({self.original_input}/{self.internal_name}) are either not unique '
              f'or do not follow NameDigit format (e.g. C15). Renaming')
        _, renaming_map = ties.helpers.get_new_atom_names(ligand.atoms)
        self._renaming_map = renaming_map
        print(f'Rename map: {renaming_map}')

        # save the output here
        os.makedirs(self.config.lig_unique_atom_names_dir, exist_ok=True)

        ligand_with_uniq_atom_names = self.config.lig_unique_atom_names_dir / (self.internal_name + self.current.suffix)
        if self.save:
            ligand.save(str(ligand_with_uniq_atom_names))

        self.ligand_with_uniq_atom_names = ligand_with_uniq_atom_names
        self.parmed = ligand
        # this object is now represented by the updated ligand
        self.current = ligand_with_uniq_atom_names

    @property
    def renaming_map(self):
        """
        Otherwise, key: newName, value: oldName.

        If None, means no renaming took place.
        """
        return self._renaming_map

    @property
    def rev_renaming_map(self):
        return {b: c for c, b in self._renaming_map.items()}

    @renaming_map.setter
    def renaming_map(self, dict):
        if self._renaming_map is None:
            self._renaming_map = dict
        else:
            # this ligand was already renamed before.
            # so B -> A, where A is the original value,
            # and B is the new value (guarantted to be unique, therefore the key)
            # Now B is renamed to C, so we need to have C -> A
            # For each renamed B value here, we have to find the A value
            # fixme: this works only if Bs are unique

            # dict: C -> B. We know that C and B are unique. Therefore, reverse for convenience
            rev_dict = {b: c for c, b in dict.items()}
            self._renaming_map = {rev_dict[b]: a for b, a in self._renaming_map.items()}

    # make this into a python property
    def suffix(self):
        return self.current.suffix.lower()

[docs]    def antechamber_prepare_mol2(self, **kwargs):
        """
        Converts the ligand into a .mol2 format.

        BCC charges are generated if missing or requested.
        It calls antechamber (the charge type -c is not used if user prefers to use their charges).
        Any DU atoms created in the antechamber call are removed.

        :param atom_type: Atom type bla bla
        :type atom_type:
        :param net_charge:
        :type net_charge: int
        """
        self.config.set_configs(**kwargs)

        print('Antechamber: converting to .mol2 and generating charges if necessary')
        if self.config.ligands_contain_q:
            print('Antechamber: Generating .mol2 file with BCC charges')
        if not self.config.antechamber_charge_type:
            print('Antechamber: Ignoring atom charges. The user-provided atom charges will be used. ')
        
        mol2_cwd = self.config.lig_dir / self.internal_name

        # prepare the directory
        mol2_cwd.mkdir(parents=True, exist_ok=True)
        mol2_target = mol2_cwd / f'{self.internal_name}.mol2'

        # copy the existing file if the file is already .mol2
        if self.current.suffix == '.mol2' and self.config.ligands_contain_q:
            print(f'Already .mol2 used. Copying {self.current} to {mol2_cwd}. ')
            shutil.copy(self.current, mol2_target)

        # do not redo if the target file exists
        if not (mol2_target).is_file():
            log_filename = mol2_cwd / "antechamber.log"
            with open(log_filename, 'w') as LOG:
                try:
                    subprocess.run([self.config.ambertools_antechamber,
                                    '-i', self.current, '-fi', self.current.suffix[1:],
                                    '-o', mol2_target, '-fo', 'mol2',
                                    '-at', self.config.ligand_ff_name, '-nc', str(self.config.ligand_net_charge),
                                    '-dr', str(self.config.antechamber_dr)] + self.config.antechamber_charge_type,
                                   cwd=mol2_cwd,
                                   stdout=LOG, stderr=LOG,
                                   check=True, text=True,
                                   timeout=60 * 30  # 30 minutes
                                   )
                except subprocess.CalledProcessError as E:
                    print('ERROR: occured when creating the input .mol2 file with antechamber. ')
                    print(f'ERROR: The output was saved in the directory: {mol2_cwd}')
                    print(f'ERROR: can be found in the file: {log_filename}')
                    raise E
            print(f'Converted {self.original_input} into .mol2, Log: {log_filename}')
        else:
            print(f'File {mol2_target} already exists. Skipping. ')

        self.antechamber_mol2 = mol2_target
        self.current = mol2_target

        # remove any DUMMY DU atoms in the .mol2 atoms
        self.removeDU_atoms()

[docs]    def removeDU_atoms(self):
        """
        Ambertools antechamber creates sometimes DU dummy atoms.
        These are not created when BCC charges are computed from scratch.
        They are only created if you reuse existing charges.
        They appear to be a side effect. We remove the dummy atoms therefore.
        """
        mol2 = parmed.load_file(str(self.current), structure=True)
        # check if there are any DU atoms
        has_DU = any(a.type == 'DU' for a in mol2.atoms)
        if not has_DU:
            return

        # make a backup copy before (to simplify naming)
        shutil.move(self.current, self.current.parent / ('lig.beforeRemovingDU' + self.current.suffix))

        # remove DU type atoms and save the file
        for atom in mol2.atoms:
            if atom.name != 'DU':
                continue

            atom.residue.delete_atom(atom)
        # save the updated molecule
        mol2.save(str(self.current))
        print('Removed dummy atoms with type "DU"')

[docs]    def generate_frcmod(self, **kwargs):
        """
            params
             - parmchk2
             - atom_type
        """
        self.config.set_configs(**kwargs)

        print(f'INFO: frcmod for {self} was computed before. Not repeating.')
        if hasattr(self, 'frcmod'):
            return

        # fixme - work on the file handles instaed of the constant stitching
        print(f'Parmchk2: generate the .frcmod for {self.internal_name}.mol2')

        # prepare cwd
        cwd = self.config.lig_frcmod_dir / self.internal_name
        if not cwd.is_dir():
            cwd.mkdir(parents=True, exist_ok=True)

        target_frcmod = f'{self.internal_name}.frcmod'
        log_filename = cwd / "parmchk2.log"
        with open(log_filename, 'w') as LOG:
            try:
                subprocess.run([self.config.ambertools_parmchk2,
                                '-i', self.current,
                                '-o', target_frcmod,
                                '-f', 'mol2',
                                '-s', self.config.ligand_ff_name],
                               stdout=LOG, stderr=LOG,
                               check= True, text=True,
                               cwd= cwd, timeout=20,  # 20 seconds
                                )
            except subprocess.CalledProcessError as E:
                print('ERROR file content: ', open(log_filename).read())
                print('ERROR: An error occured during the antechamber conversion from .ac to .mol2 data type. ')
                print(f'ERROR: The output was saved in the directory: {cwd}')
                print(f'ERROR: Please see the log file for the exact error information: {log_filename}')
                raise E

        print(f'Parmchk2: created .frcmod: {target_frcmod}')
        self.frcmod = cwd / target_frcmod

[docs]    def overwrite_coordinates_with(self, file, output_file):
        """
        Load coordinates from another file and overwrite the coordinates in the current file.
        """

        # load the current atoms with ParmEd
        template = parmed.load_file(str(self.current), structure=True)

        # load the file with the coordinates we want to use
        coords = parmed.load_file(str(file), structure=True)

        # fixme: use the atom names
        by_atom_name = True
        by_index = False
        by_general_atom_type = False

        # mol2_filename will be overwritten!
        print(f'Writing to {self.current} the coordinates from {file}. ')

        coords_sum = np.sum(coords.atoms.positions)

        if by_atom_name and by_index:
            raise ValueError('Cannot have both. They are exclusive')
        elif not by_atom_name and not by_index:
            raise ValueError('Either option has to be selected.')

        if by_general_atom_type:
            for mol2_atom in template.atoms:
                found_match = False
                for ref_atom in coords.atoms:
                    if element_from_type[mol2_atom.type.upper()] == element_from_type[ref_atom.type.upper()]:
                        found_match = True
                        mol2_atom.position = ref_atom.position
                        break
                assert found_match, "Could not find the following atom in the original file: " + mol2_atom.name
        if by_atom_name:
            for mol2_atom in template.atoms:
                found_match = False
                for ref_atom in coords.atoms:
                    if mol2_atom.name.upper() == ref_atom.name.upper():
                        found_match = True
                        mol2_atom.position = ref_atom.position
                        break
                assert found_match, "Could not find the following atom name across the two files: " + mol2_atom.name
        elif by_index:
            for mol2_atom, ref_atom in zip(template.atoms, coords.atoms):
                atype = element_from_type[mol2_atom.type.upper()]
                reftype = element_from_type[ref_atom.type.upper()]
                if atype != reftype:
                    raise Exception(
                        f"The found general type {atype} does not equal to the reference type {reftype} ")

                mol2_atom.position = ref_atom.position

        if np.testing.assert_almost_equal(coords_sum, np.sum(mda_template.atoms.positions), decimal=2):
            print('Different positions sums:', coords_sum, np.sum(mda_template.atoms.positions))
            raise Exception('Copying of the coordinates did not work correctly')

        # save the output file
        mda_template.atoms.write(output_file)
Source code for ties.ligand

TIES

Navigation

Related Topics