Source code for ties.config

import os
import sys
import pathlib
import csv
import subprocess
import tempfile
from collections.abc import Iterable

import parmed

from ties.helpers import ArgparseChecker


[docs]class Config: """ The configuration with parameters that can be used to define the entire protocol. The settings can be overridden later in the actual classes. The settings are stored as properties in the object and can be overwritten. """
[docs] def __init__(self, **kwargs): # set the path to the scripts self.code_root = pathlib.Path(os.path.dirname(__file__)) # scripts/input files, # these are specific to the host self.script_dir = self.code_root / 'scripts' self.namd_script_dir = self.script_dir / 'namd' self.ambertools_script_dir = self.script_dir / 'ambertools' self.tleap_check_protein = self.ambertools_script_dir / 'check_prot.in' self.vmd_vis_script = self.script_dir / 'vmd' / 'vis_morph.vmd' self.vmd_vis_script_sh = self.script_dir / 'vmd' / 'vis_morph.sh' self._workdir = None self._antechamber_dr = False self._ambertools_home = None self._protein = None self._ligand_net_charge = None self._atom_pair_q_atol = 0.1 self._net_charge_threshold = 0.1 self._redistribute_q_over_unmatched = True self._allow_disjoint_components = False # use only the element in the superimposition rather than the specific atom type self._use_element = False self._use_element_in_superimposition = True # coordinates self._align_molecules_using_mcs = False self._use_original_coor = False self._coordinates_file = None self._ligand_files = set() self._manually_matched_atom_pairs = None self._manually_mismatched_pairs = None self._ligands_contain_q = None self._ligand_tleap_in = None self._complex_tleap_in = None self._superimposition_starting_pair = None self._protein_ff = None self._ligand_ff = 'leaprc.gaff' self._ligand_ff_name = 'gaff' # MD/NAMD production input file self._md_engine = 'namd' #default to modern CPU version self.namd_version = '2.14' self._lambda_rep_dir_tree = False # experimental self._use_hybrid_single_dual_top = False self._ignore_charges_completely = False self.ligands = None # if True, do not allow ligands with the same ligand name self.uses_cmd = False # assign all the initial configuration values for key, value in kwargs.items(): setattr(self, key, value)
@property def workdir(self): """ Working directory for antechamber calls. If None, a temporary directory in /tmp/ will be used. :return: Work dir :rtype: str """ if self._workdir is None: # this is API, so avoid making a directory # keep the 'ties' in case the output directory structured needs to be generated/harvested self._workdir = pathlib.Path(tempfile.TemporaryDirectory().name) / 'ties' return self._workdir @workdir.setter def workdir(self, cwd): if cwd is not None: if type(cwd) is str: cwd = pathlib.Path(cwd) # user provided self._workdir = cwd.absolute() else: # current directory self._workdir = pathlib.Path(os.getcwd()) / 'ties' self._workdir.mkdir(exist_ok=True) print(f'Working Directory: {self._workdir}') # --------------- general @property def protein(self): """ Path to the protein :return: Protein filename :rtype: str """ return self._protein @protein.setter def protein(self, path): if path is None: print('No protein was select. Skipping protein dG.') return # can be loaded by parmed? print(f'Trying to open the protein file {path} with ParmEd..') parmed.load_file(str(path), structure=True) self._protein = pathlib.Path(path) @property def ligand_files(self): """ A list of ligand filenames. :return: """ return self._ligand_files @ligand_files.setter def ligand_files(self, files): """ str or Path, a Ligand object will converted into a Path """ if isinstance(files, str) or isinstance(files, pathlib.Path): files = [pathlib.Path(files)] if len(files) < 1: print('Please supply at least one ligand file with -l (--ligands). E.g. -l file1.pdb file2.pdb') sys.exit(1) # check if the ligands have the same extension if len({l.suffix for l in files}) != 1: # fixme - does it actually matter? you parse it anyway? print('Error: ligands (-l) have different extensions. ' 'Please ensure all ligands have the same extension') sys.exit() # files must have unique names filenames = [l.stem for l in files] if len(filenames) != len(set(filenames)): print('Error: some ligand (-l) names are the same. ' 'Error: ensure your ligands have unique names. ') sys.exit() # verify the files with ParmEd if possible for ligand in files: if ligand.suffix.lower() in ('.ac', '.prep'): print(f'Cannot verify .ac/.prep {ligand} with ParmEd. Skipping. ') else: print(f'Trying to open the ligand {ligand} with ParmEd..') lig = parmed.load_file(str(ligand), structure=True) # there should be one residue if len({a.residue.name for a in lig.atoms}) > 1: print(f'Warning: more than one residue name detected in the ligand {ligand}') # TODO - ensure that it is a connected component and there is no extra atoms self._ligand_files = self._ligand_files.union(set(files)) # --------------- ambertools @property def ambertools_home(self): """ Ambertools HOME path. If not configured, the env variable AMBERHOME as AMBER_PREFIX will be checked. :return: ambertools path """ # ambertools path given? path = None if self._ambertools_home is None: # otherwise check env paths if os.getenv('AMBERHOME'): path = pathlib.Path(os.getenv('AMBERHOME')) elif os.getenv('AMBER_PREFIX'): path = pathlib.Path(os.getenv('AMBER_PREFIX')) else: # try to deduce the env from the location of antechamber binary print('WARNING: $AMBERHOME not found. Guessing the location by looking up antechamber. ') proc = subprocess.run(['which', 'antechamber'], capture_output=True) decode = proc.stdout.decode('utf-8').strip() if "not found" not in decode: ant_path = pathlib.Path(decode) if ant_path.is_file(): path = ant_path.parent.parent if path is None: print('Error: Cannot find ambertools. $AMBERHOME and $AMBER_PREFIX are empty') print('Option 1: source your ambertools script amber.sh') print('Option 2: specify manually the path to amberhome with -ambertools option') raise Exception('No ambertools') assert path.exists() self._ambertools_home = path return self._ambertools_home @property def ambertools_antechamber(self): """ Antechamber path based on the .ambertools_home :return: """ return self.ambertools_home / 'bin' / 'antechamber' @property def ambertools_parmchk2(self): """ Parmchk2 path based on the .ambertools_home :return: """ return self.ambertools_home / 'bin' / 'parmchk2' @property def ambertools_tleap(self): """ Tleap path based on the .ambertools_home :return: """ return self.ambertools_home / 'bin' / 'tleap' @ambertools_home.setter def ambertools_home(self, path): if path is None: self._ambertools_home = None return path = pathlib.Path(path) if not path.exists(): print('Error: The provided ambertools home path does not point towards the directory.' f'{path}') self._ambertools_home = path @property def antechamber_dr(self): """ Whether to use -dr setting when calling antechamber. :return: """ return self._antechamber_dr @antechamber_dr.setter def antechamber_dr(self, bool): # using ambertools antechamber dr mode? if bool is True: self._antechamber_dr = 'yes' else: self._antechamber_dr = 'no' print(f'Antechamber dr: {self._antechamber_dr}') @property def ligand_net_charge(self): """ The ligand charge. If not provided, neutral charge is assumed. The charge is necessary for calling antechamber (-nc). :return: """ if self._ligand_net_charge is None: print(f'WARNING: Ligand net charge not provided (-nc) by the user. Assuming 0. ') self._ligand_net_charge = 0 return self._ligand_net_charge @ligand_net_charge.setter def ligand_net_charge(self, net_charge): if net_charge is None: print('Ligand net charge was not supplied (-nc). Only limited functionalities will be available. ') self._ligand_net_charge = net_charge print(f'Ligand net charge (-nc): {net_charge}') @property def coordinates_file(self): """ A file from which coordinate can be taken. :return: """ return self._coordinates_file @coordinates_file.setter def coordinates_file(self, file): if file is None: return print(f'ParmEd: verifying the coordinate file {file}') parmed.load_file(file, structure=True) # fixme - warn if the atom names are not uniq, warn if there is more than one residue, no water, etc self._coordinates_file = file @property def atom_pair_q_atol(self): """ It defines the maximum difference in charge between any two superimposed atoms a1 and a2. If the two atoms differ in charge more than this value, they will be unmatched and added to the alchemical regions. :return: default (0.1e) :rtype: float """ return self._atom_pair_q_atol @atom_pair_q_atol.setter def atom_pair_q_atol(self, atol): self._atom_pair_q_atol = atol print(f'The maximum acceptable difference in charge between two paired atoms: {atol:.2f}') @property def net_charge_threshold(self): """ Defines how much the superimposed regions can, in total, differ in charge. If the total exceeds the thresholds, atom pairs will be unmatched until the threshold is met. :return: default (0.1e) :rtype: float """ return self._net_charge_threshold @net_charge_threshold.setter def net_charge_threshold(self, threshold): self._net_charge_threshold = threshold print(f'Using MCS net charge difference threshold of {threshold}') @property def ignore_charges_completely(self): """ Ignore the charges during the superimposition. Useful for debugging. :return: default (False) :rtype: bool """ return self._ignore_charges_completely @ignore_charges_completely.setter def ignore_charges_completely(self, bool): self._ignore_charges_completely = bool if bool: print('Ignoring the charges. ') self.redistribute_q_over_unmatched = False @property def allow_disjoint_components(self): """ Defines whether there might be multiple superimposed areas that are separated by alchemical region. :return: default (False) :rtype: bool """ return self._allow_disjoint_components @allow_disjoint_components.setter def allow_disjoint_components(self, boolean): self._allow_disjoint_components = boolean print(f'Allowing disjoint components: {self._allow_disjoint_components}') @property def use_element_in_superimposition(self): """ Use element rather than the actual atom type for the superimposition during the joint-traversal of the two molecules. :return: default (False) :rtype: bool """ return self._use_element_in_superimposition @use_element_in_superimposition.setter def use_element_in_superimposition(self, bool): self._use_element_in_superimposition = bool if bool: print('Warning. Ignoring the specific atom types during superimposition. ' 'The results should not be used in production simulations.') @property def align_molecules_using_mcs(self): """ After determining the maximum common substructure (MCS), use it to align the coordinates of the second molecule to the first. :return: default (False) :rtype: bool """ return self._align_molecules_using_mcs @align_molecules_using_mcs.setter def align_molecules_using_mcs(self, boolean): # align the coordinates in ligZ to the ligA using the MCS self._align_molecules_using_mcs = boolean # fixme - should be using the MCS before charges change print(f'Will align the coordinates using the final MCS: {boolean}') @property def use_original_coor(self): """ Antechamber when assigning charges can modify the charges slightly. If that's the case, use the original charges in order to correct this slight divergence in coordinates. :return: default (?) :rtype: bool """ return self._use_original_coor @use_original_coor.setter def use_original_coor(self, boolean): # use the original coords because antechamber can change them slightly self._use_original_coor = boolean def _guess_ligands_contain_q(self): # if all ligands are .mol2, then charges are provided if all(l.suffix.lower() == '.mol2' for l in self.ligand_files): # if all atoms have q = 0 that means they're a placeholder u = parmed.load_file(str(list(self.ligand_files)[0]), structure=True) all_q_0 = all(a.charge == 0 for a in u.atoms) if all_q_0: return False return True return False @property def ligands_contain_q(self): """ If not provided, it tries to deduce whether charges are provided. If all charges are set to 0, then it assumes that charges are not provided. If set to False explicitly, charges are ignored and computed again. :return: default (None) :rtype: bool """ if self._ligand_files is None: print('The fact whether the ligands contain charges has not been configured. Guessing. ') self._ligand_files = self._guess_ligands_contain_q() # does the file type contain charges? ligand_ext = set(l.suffix.lower() for l in self.ligand_files).pop() if self._ligands_contain_q is True: if ligand_ext in {'.mol2', '.ac'}: self._ligands_contain_q = True else: print('ERROR: If charges are provided with the ligands, ' 'the filetypes .mol2 or .ac have to be used.') sys.exit(1) elif self._ligands_contain_q is False: self._ligands_contain_q = False elif self._ligands_contain_q is None: # determine whether charges are provided using the file extensions if ligand_ext in {'.mol2', '.ac', '.prep'}: # if all charges are 0, then recompute self._ligands_contain_q = self._guess_ligands_contain_q() print('Assuming that charges are provided based on the filetype .ac/.mol2') elif ligand_ext == '.pdb': self._ligands_contain_q = False print('Assuming that charges are not provided in the given .pdb ligand files. ') else: print(f'Error: unrecognized file type {ligand_ext}. ') sys.exit(1) # fixme? if self._ligands_contain_q: # leave charges the way they are in the files # TODO - ensure that when antechamber_charge_type is accessed ,this function is used? implement in another self.antechamber_charge_type = [] print(f'Ligand files already contain charges: {self._ligands_contain_q}') return self._ligands_contain_q @ligands_contain_q.setter def ligands_contain_q(self, boolean): self._ligands_contain_q = boolean @property def superimposition_starting_pair(self): """ Set a starting pair for the superimposition to narrow down the MCS search. E.g. "C2-C12" :rtype: str """ return self._superimposition_starting_pair @superimposition_starting_pair.setter def superimposition_starting_pair(self, value): if value == None: self._superimposition_starting_pair = None else: atom_name_disappearing, atom_name_appearing = value.split('-') self. _superimposition_starting_pair = (atom_name_disappearing, atom_name_appearing) @property def manually_matched_atom_pairs(self): """ Either a list of pairs or a file with a list of pairs of atoms that should be superimposed/matched. :return: """ return self._manually_matched_atom_pairs @manually_matched_atom_pairs.setter def manually_matched_atom_pairs(self, file_or_pairs): # A user-provided list of pairs that should be matched # This functionality works only if there are two ligands if file_or_pairs is None: self._manually_matched_atom_pairs = [] return if self._ligand_files is None: raise ValueError('Wrong use of Config class. Please set the ._ligand_files attribute first. ') elif len(self._ligand_files) != 2: raise ValueError('Error: Too many ligands. ' 'Manually matching atom pairs works only with 2 ligands. ') # TODO # pair_format: C1-C7 or C1-C7,C2-C8 # if different format, it is likely a file, check file manually_matched = [] if file_or_pairs is not None and pathlib.Path(file_or_pairs).is_file(): with open(file_or_pairs) as IN: for left_atom, right_atom in csv.reader(IN, delimiter='-'): manually_matched.append((left_atom.strip(), right_atom.strip())) if len(manually_matched) > 1: raise NotImplementedError('Currently only one atom pair can be matched - others were not tested') self._manually_matched_atom_pairs = manually_matched @property def manually_mismatched_pairs(self): """ A path to a file with a list of a pairs that should be mismatched. """ if self._manually_mismatched_pairs is None: return [] return self._manually_mismatched_pairs @manually_mismatched_pairs.setter def manually_mismatched_pairs(self, value): mismatch = [] # only allow the file for now if value is not None: path = pathlib.Path(value) if not path.is_file(): print(f'Exception: the provided file for mismatching pairs cannot be found: {value}') raise Exception('Could not find the file. ') with open(path) as IN: for left_atom, right_atom in csv.reader(IN, delimiter='-'): mismatch.append((left_atom.strip(), right_atom.strip())) self._manually_mismatched_pairs = mismatch return self._manually_mismatched_pairs @property def protein_ff(self): """ The protein forcefield to be used by ambertools for the protein parameterisation. :return: default (leaprc.ff19SB) :rtype: string """ if self.protein is None: print('INFO: Protein FF was requested even though protein was not provided. ' 'Ignoring the protein ff request.') return None if self._protein_ff is None: print('WARNING: Protein FF is not configured in the config.protein_ff. ' 'Setting the default leaprc.ff19SB') # fixme - update to a later ff self._protein_ff = 'leaprc.protein.ff19SB' return self._protein_ff @protein_ff.setter def protein_ff(self, ff): self._protein_ff = ff print(f'Protein force field name: {self._protein_ff}') @property def md_engine(self): """ The MD engine, with the supported values NAMD2.13, NAMD2.14, NAMD3 and OpenMM :return: NAMD2.13, NAMD2.14, NAMD3 and OpenMM :rtype: string """ return self._md_engine @md_engine.setter def md_engine(self, value): supported = ['NAMD2.13', 'NAMD2.14', 'NAMD3', 'OpenMM'] if type(value) == str and value.lower() == 'namd2.14': self._md_engine = 'namd' self.namd_version = '2.14' elif type(value) == str and value.lower() == 'namd2.13': self._md_engine = 'namd' self.namd_version = '2.13' elif type(value) == str and value.lower() == 'namd3': self._md_engine = 'namd3' self.namd_version = '3' elif type(value) == str and value.lower() == 'openmm': self._md_engine = 'openmm' self.namd_version = '' else: raise ValueError('Unknown engine {}. Supported engines {}'.format(value, supported)) print(f'MD Engine: {value}') @property def lambda_rep_dir_tree(self): return self._lambda_rep_dir_tree @lambda_rep_dir_tree.setter def lambda_rep_dir_tree(self, value): self._lambda_rep_dir_tree = value @property def ligand_ff(self): """ The forcefield for the ligand. """ return self._ligand_ff @property def ligand_ff_name(self): """ Either GAFF or GAFF2 :return: """ return self._ligand_ff_name @ligand_ff_name.setter def ligand_ff_name(self, atom_type): # save also the atom type if atom_type == 'gaff': # they both use the same ff self._ligand_ff = 'leaprc.gaff' elif atom_type == 'gaff2': self._ligand_ff = 'leaprc.gaff2' else: raise ValueError('Argument -lff cannot be anything else but "gaff" or "gaff2". ') self._ligand_ff_name = atom_type @property def redistribute_q_over_unmatched(self): """ The superimposed and matched atoms have every slightly different charges. Taking an average charge between any two atoms introduces imbalances in the net charge of the alchemical regions, due to the different charge distribution. :return: default(True) """ return self._redistribute_q_over_unmatched @redistribute_q_over_unmatched.setter def redistribute_q_over_unmatched(self, boolean): # Redistribute charge imbalances created due to atom-pair averaging self._redistribute_q_over_unmatched = boolean print(f'Distribute the introduced charge disparity in the alchemical region: ' f'{self._redistribute_q_over_unmatched}') @property def use_hybrid_single_dual_top(self): """ Hybrid single dual topology (experimental). Currently not implemented. :return: default(False). """ # hybrid single dual topology return self._use_hybrid_single_dual_top @use_hybrid_single_dual_top.setter def use_hybrid_single_dual_top(self, boolean): if boolean: self._use_hybrid_single_dual_top = True self._complex_tleap_in = 'leap_complex_sdtop.in' if self._ignore_charges_completely != True: raise Exception('Charges have to be ignored completely when using hybrid single-dual topology.') else: self._complex_tleap_in = 'leap_complex.in' self._use_hybrid_single_dual_top = boolean @property def ligand_tleap_in(self): """ The name of the tleap input file for ambertools for the ligand. :return: Default ('leap_ligand.in') :rtype: string """ if self._ligand_tleap_in is not None: # return the user provided filename return self._ligand_tleap_in if self.use_hybrid_single_dual_top: # return the default option for the hybrid return 'leap_ligand_sdtop.in' # return the default return 'leap_ligand.in' @property def complex_tleap_in(self): """ The tleap input file for the complex. :return: Default 'leap_complex.in' :type: string """ if self._complex_tleap_in is None: # assume that hybrid single-dual topology is not used # this will initiate the standard leap_ligand.in # self._complex_tleap_in = 'leap_complex_sdtop.in' self.use_hybrid_single_dual_top = False self._complex_tleap_in = 'leap_complex.in' return self._complex_tleap_in # PAIR constants configuration @property def prep_dir(self): """ Path to the `prep` directory. Currently in the `workdir` :return: Default (workdir/prep) """ return self.workdir / pathlib.Path('prep') @property def pair_morphfrcmods_dir(self): """ Path to the .frcmod files for the morph. :return: Default (workdir/prep/morph_frcmods) """ return self.prep_dir / 'morph_frcmods' @property def pair_morphfrmocs_tests_dir(self): """ Path to the location where a test is carried out with .frcmod :return: Default (workdir/prep/morph_frcmods/tests) """ return self.pair_morphfrcmods_dir / 'tests' @property def pair_unique_atom_names_dir(self): """ Location of the morph files with unique filenames. :return: Default (workdir/prep/morph_unique_atom_names) """ return self.prep_dir / 'morph_unique_atom_names' @property def lig_unique_atom_names_dir(self): """ Directory location for files with unique atom names. :return: Default (workdir/prep/unique_atom_names) """ return self.prep_dir / 'unique_atom_names' @property def lig_frcmod_dir(self): """ Directory location with the .frcmod created for each ligand. :return: Default (workdir/prep/ligand_frcmods) """ return self.prep_dir / 'ligand_frcmods' @property def lig_acprep_dir(self): """ Directory location where the .ac charges are converted into the .mol2 format. :return: Default (workdir/prep/acprep_to_mol2) """ return self.prep_dir / 'acprep_to_mol2' @property def lig_dir(self): """ Directory location with the .mol2 files. :return: Default (workdir/mol2) """ return self.workdir / 'mol2'
[docs] @staticmethod def get_element_map(): """ :return: """ # Get the mapping of atom types to elements element_map_filename = pathlib.Path(os.path.dirname(__file__)) / 'data' / 'element_atom_type_map.txt' # remove the comments lines with # lines = filter(lambda l: not l.strip().startswith('#') and not l.strip() == '', open(element_map_filename).readlines()) # convert into a dictionary element_map = {} for line in lines: element, atom_types = line.split('=') for atom_type in atom_types.split(): element_map[atom_type.strip()] = element.strip() return element_map
# fixme - this should be determined at the location where it is relevant rather than here in the conf # antechamber parameters, by default compute AM1-BCC charges antechamber_charge_type = ['-c', 'bcc']
[docs] def get_serializable(self): """ Get a JSON serializable structure of the config. pathlib.Path is not JSON serializable, so replace it with str todo - consider capturing all information about the system here, including each suptop.get_serializable() so that you can record specific information such as the charge changes etc. :return: Dictionary {key:value} with the settings :rtype: Dictionary """ host_specific = ['code_root', 'script_dir0', 'namd_script_dir', 'ambertools_script_dir', 'tleap_check_protein', 'vmd_vis_script'] ser = {} for k, v in self.__dict__.items(): if k in host_specific: continue if type(v) is pathlib.PosixPath: v = str(v) # account for the ligands being pathlib objects if k == 'ligands' and v is not None: # a list of ligands, convert to strings v = [str(l) for l in v] if k == '_ligand_files': continue ser[k] = v return ser
def set_configs(self, **kwargs): # set all the configs one by one for k,v in kwargs.items(): setattr(self, k, v)