Skip to content

generator

Functions:

join_frcmod_files

join_frcmod_files(f1, f2, output_filepath)

This implementation should be used. Switch to join_frcmod_files2. This version might be removed if the simple approach is fine.

Source code in ties/generator.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def join_frcmod_files(f1, f2, output_filepath):
    """
    This implementation should be used. Switch to join_frcmod_files2.
    This version might be removed if the simple approach is fine.
    """
    # fixme - load f1 and f2

    def get_section(name, rlines):
        """
        Chips away from the lines until the section is ready

        fixme is there a .frcmod reader in ambertools?
        http://ambermd.org/FileFormats.php#frcmod
        """
        section_names = ["MASS", "BOND", "ANGLE", "DIHE", "IMPROPER", "NONBON"]
        assert name in rlines.pop().strip()

        section = []
        while not (
            len(rlines) == 0
            or any(rlines[-1].startswith(sname) for sname in section_names)
        ):
            nextl = rlines.pop().strip()
            if nextl == "":
                continue
            # depending on the column name, parse differently
            if name == "ANGLE":
                # e.g.
                # c -cc-na   86.700     123.270   same as c2-cc-na, penalty score=  2.6
                atom_types = nextl[:8]
                other = nextl[9:].split()[::-1]
                # The harmonic force constants for the angle "ITT"-"JTT"-
                #                     "KTT" in units of kcal/mol/(rad**2) (radians are the
                #                     traditional unit for angle parameters in force fields).
                harmonicForceConstant = float(other.pop())
                # TEQ        The equilibrium bond angle for the above angle in degrees.
                eq_bond_angle = float(other.pop())
                # the overall angle
                section.append([atom_types, harmonicForceConstant, eq_bond_angle])
            elif name == "DIHE":
                # e.g.
                # ca-ca-cd-cc   1    0.505       180.000           2.000      same as c2-ce-ca-ca, penalty score=229.0
                atom_types = nextl[:11]
                other = nextl[11:].split()[::-1]
                """
                IDIVF      The factor by which the torsional barrier is divided.
                    Consult Weiner, et al., JACS 106:765 (1984) p. 769 for
                    details. Basically, the actual torsional potential is

                           (PK/IDIVF) * (1 + cos(PN*phi - PHASE))

                 PK         The barrier height divided by a factor of 2.

                 PHASE      The phase shift angle in the torsional function.

                            The unit is degrees.

                 PN         The periodicity of the torsional barrier.
                            NOTE: If PN .lt. 0.0 then the torsional potential
                                  is assumed to have more than one term, and the
                                  values of the rest of the terms are read from the
                                  next cards until a positive PN is encountered.  The
                                  negative value of pn is used only for identifying
                                  the existence of the next term and only the
                                  absolute value of PN is kept.
                """
                IDIVF = float(other.pop())
                PK = float(other.pop())
                PHASE = float(other.pop())
                PN = float(other.pop())
                section.append([atom_types, IDIVF, PK, PHASE, PN])
            elif name == "IMPROPER":
                # e.g.
                # cc-o -c -o          1.1          180.0         2.0          Using general improper torsional angle  X- o- c- o, penalty score=  3.0)
                # ...  IDIVF , PK , PHASE , PN
                atom_types = nextl[:11]
                other = nextl[11:].split()[::-1]
                # fixme - what is going on here? why is not generated this number?
                # IDIVF = float(other.pop())
                PK = float(other.pop())
                PHASE = float(other.pop())
                PN = float(other.pop())
                if PN < 0:
                    raise Exception("Unimplemented - ordering using with negative 0")
                section.append([atom_types, PK, PHASE, PN])
            else:
                section.append(nextl.split())
        return {name: section}

    def load_frcmod(filepath):
        # remark line
        rlines = open(filepath).readlines()[::-1]
        assert "Remark" in rlines.pop()

        parsed = OrderedDict()
        for section_name in ["MASS", "BOND", "ANGLE", "DIHE", "IMPROPER", "NONBON"]:
            parsed.update(get_section(section_name, rlines))

        return parsed

    def join_frcmod(left_frc, right_frc):
        joined = OrderedDict()
        for left, right in zip(left_frc.items(), right_frc.items()):
            lname, litems = left
            rname, ritems = right
            assert lname == rname

            joined[lname] = copy.copy(litems)

            if lname == "MASS":
                if len(litems) > 0 or len(ritems) > 0:
                    raise Exception("Unimplemented")
            elif lname == "BOND":
                for ritem in ritems:
                    if len(litems) > 0 or len(ritems) > 0:
                        if ritem not in joined[lname]:
                            raise Exception("Unimplemented")
            # ANGLE, e.g.
            # c -cc-na   86.700     123.270   same as c2-cc-na, penalty score=  2.6
            elif lname == "ANGLE":
                for ritem in ritems:
                    # if the item is not in the litems, add it there
                    # extra the first three terms to determine if it is present
                    # fixme - note we are ignoring the "same as" note
                    if ritem not in joined[lname]:
                        joined[lname].append(ritem)
            elif lname == "DIHE":
                for ritem in ritems:
                    if ritem not in joined[lname]:
                        joined[lname].append(ritem)
            elif lname == "IMPROPER":
                for ritem in ritems:
                    if ritem not in joined[lname]:
                        joined[lname].append(ritem)
            elif lname == "NONBON":
                # if they're empty
                if not litems and not ritems:
                    continue

                raise Exception("Unimplemented")
            else:
                raise Exception("Unimplemented")
        return joined

    def write_frcmod(frcmod, filename):
        with open(filename, "w") as FOUT:
            FOUT.write("GENERATED .frcmod by joining two .frcmod files" + os.linesep)
            for sname, items in frcmod.items():
                FOUT.write(f"{sname}" + os.linesep)
                for item in items:
                    atom_types = item[0]
                    FOUT.write(atom_types)
                    numbers = " \t".join([str(n) for n in item[1:]])
                    FOUT.write(" \t" + numbers)
                    FOUT.write(os.linesep)
                # the ending line
                FOUT.write(os.linesep)

    left_frc = load_frcmod(f1)
    right_frc = load_frcmod(f2)
    joined_frc = join_frcmod(left_frc, right_frc)
    write_frcmod(joined_frc, output_filepath)

correct_fep_tempfactor

correct_fep_tempfactor(suptop, source_pdb_filename, new_pdb_filename, hybrid_topology=False)

fixme - this function does not need to use the file? we have the json information available here.

Sets the temperature column in the PDB file So that the number reflects the alchemical information Requires by NAMD in order to know which atoms appear (1) and which disappear (-1).

Source code in ties/generator.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
def correct_fep_tempfactor(
    suptop, source_pdb_filename, new_pdb_filename, hybrid_topology=False
):
    """
    fixme - this function does not need to use the file?
    we have the json information available here.

    Sets the temperature column in the PDB file
    So that the number reflects the alchemical information
    Requires by NAMD in order to know which atoms
    appear (1) and which disappear (-1).
    """

    pmdpdb = parmed.load_file(str(source_pdb_filename), structure=True)
    if "HYB" not in {a.residue.name for a in pmdpdb.atoms}:
        raise Exception('Missing the resname "HYB" in the pdb file prepared for fep')

    # the IDs (0-based) in this PDB correspond to the rank in the MOL2 (1-based)
    id_to_tom = {idx - 1: atom for atom, idx in suptop.internal_ids.items()}

    # dual-topology info
    appearing_atoms = suptop.get_appearing_atoms()
    disappearing_atoms = suptop.get_disappearing_atoms()

    # update the Temp column
    for atom in pmdpdb.atoms:
        # ignore water and ions and non-ligand resname
        # we only modify the protein, so ignore the ligand resnames
        # fixme .. why is it called mer, is it tleap?
        if atom.residue.name != "HYB":
            continue

        # recover the atom (or pair) via the ID
        internal_atom = id_to_tom[atom.idx]

        # if the atom was "matched", meaning present in both ligands (left and right)
        # then ignore
        # note: we only use the left ligand
        if internal_atom in suptop.matched_pairs:
            continue
        elif internal_atom in appearing_atoms:
            # appearing atoms should
            atom.bfactor = 1
        elif internal_atom in disappearing_atoms:
            atom.bfactor = -1
        else:
            raise Exception("This should never happen. It has to be one of the cases")

    pmdpdb.save(
        str(new_pdb_filename), use_hetatoms=False, overwrite=True
    )  # , file_format='PDB') - fixme?

extract_PBC_from_tleap_log

extract_PBC_from_tleap_log(leap_log)

http://ambermd.org/namd/namd_amber.html Return the 9 numbers for the truncated octahedron unit cell in namd cellBasisVector1 d 0.0 0.0 cellBasisVector2 (-1/3)d (2/3)sqrt(2)d 0.0 cellBasisVector3 (-1/3)d (-1/3)sqrt(2)d (-1/3)sqrt(6)*d

Source code in ties/generator.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
def extract_PBC_from_tleap_log(leap_log):
    """
    http://ambermd.org/namd/namd_amber.html
    Return the 9 numbers for the truncated octahedron unit cell in namd
    cellBasisVector1  d         0.0            0.0
    cellBasisVector2  (-1/3)*d (2/3)sqrt(2)*d  0.0
    cellBasisVector3  (-1/3)*d (-1/3)sqrt(2)*d (-1/3)sqrt(6)*d
    """

    leap_log = open(leap_log).read()

    # octahedral box
    if "> solvateoct" in leap_log:
        leapl_log_lines = leap_log.split(os.linesep)
        line_to_extract = "Total bounding box for atom centers:"
        line_of_interest = list(
            filter(lambda line: line_to_extract in line, leapl_log_lines)
        )
        d1, d2, d3 = line_of_interest[-1].split(line_to_extract)[1].split()
        d1, d2, d3 = float(d1), float(d2), float(d3)
        assert d1 == d2 == d3
        # scale the d since after minimisation the system turns out to be much smaller?
        d = d1 * 0.8
        return {
            "cbv1": d,
            "cbv2": 0,
            "cbv3": 0,
            "cbv4": (1 / 3.0) * d,
            "cbv5": (2 / 3.0) * np.sqrt(2) * d,
            "cbv6": 0,
            "cbv7": (-1 / 3.0) * d,
            "cbv8": (1 / 3.0) * np.sqrt(2) * d,
            "cbv9": (1 / 3) * np.sqrt(6) * d,
        }

    # rectangular cuboid
    elif re.search(r"> solvateBox", leap_log, flags=re.RegexFlag.IGNORECASE):
        dims_str = re.search(
            "Total vdw box size:\s+(\d+.\d+)\s+(\d+.\d+)\s+(\d+.\d+)", leap_log
        ).groups()
        x, y, z = [float(dim) for dim in dims_str]
        return {
            "cbv1": x,
            "cbv2": 0,
            "cbv3": 0,
            "cbv4": 0,
            "cbv5": y,
            "cbv6": 0,
            "cbv7": 0,
            "cbv8": 0,
            "cbv9": z,
        }
    else:
        raise NotImplementedError(
            "Only octahedral and regular boxes are implemented atm. "
        )

prepare_antechamber_parmchk2

prepare_antechamber_parmchk2(source_script, target_script, net_charge)

Prepare the ambertools scripts. Particularly, create the scritp so that it has the net charge

fixme - run antechamber directly with the right settings from here?

fixme - check if antechamber has a python interface?

Source code in ties/generator.py
431
432
433
434
435
436
437
438
439
def prepare_antechamber_parmchk2(source_script, target_script, net_charge):
    """
    Prepare the ambertools scripts.
    Particularly, create the scritp so that it has the net charge
    # fixme - run antechamber directly with the right settings from here?
    # fixme - check if antechamber has a python interface?
    """
    net_charge_set = open(source_script).read().format(net_charge=net_charge)
    open(target_script, "w").write(net_charge_set)

get_protein_net_charge

get_protein_net_charge(working_dir, protein_file, ambertools_tleap, leap_input_file, prot_ff)

Use automatic ambertools solvation of a single component to determine what is the next charge of the system. This should be replaced with pka/propka or something akin. Note that this is unsuitable for the hybrid ligand: ambertools does not understand a hybrid ligand and might assign the wront net charge.

Source code in ties/generator.py
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
def get_protein_net_charge(
    working_dir, protein_file, ambertools_tleap, leap_input_file, prot_ff
):
    """
    Use automatic ambertools solvation of a single component to determine what is the next charge of the system.
    This should be replaced with pka/propka or something akin.
    Note that this is unsuitable for the hybrid ligand: ambertools does not understand a hybrid ligand
    and might assign the wront net charge.
    """
    cwd = working_dir / "prep" / "prep_protein_to_find_net_charge"
    if not cwd.is_dir():
        cwd.mkdir()

    # copy the protein
    shutil.copy(working_dir / protein_file, cwd)

    # use ambertools to solvate the protein: set ion numbers to 0 so that they are determined automatically
    # fixme - consider moving out of the complex
    leap_in_conf = open(leap_input_file).read()
    ligand_ff = "leaprc.gaff"  # ignored but must be provided
    open(cwd / "solv_prot.in", "w").write(
        leap_in_conf.format(
            protein_ff=prot_ff, ligand_ff=ligand_ff, protein_file=protein_file
        )
    )

    log_filename = cwd / "ties_tleap.log"
    with open(log_filename, "w") as LOG:
        try:
            subprocess.run(
                [ambertools_tleap, "-s", "-f", "solv_prot.in"],
                cwd=cwd,
                stdout=LOG,
                stderr=LOG,
                check=True,
                text=True,
                timeout=60 * 2,  # 2 minutes
            )
        except subprocess.CalledProcessError as E:
            print(
                "ERROR: tleap could generate a simple topology for the protein to check the number of ions. "
            )
            print(f"ERROR: The output was saved in the directory: {cwd}")
            print(f"ERROR: can be found in the file: {log_filename}")
            raise E

    # read the file to see how many ions were added
    newsys = parmed.load_file(str(cwd / "prot_solv.pdb"), structure=True)
    names = [a.name for a in newsys.atoms]
    cl = names.count("Cl-")
    na = names.count("Na+")

    if cl > na:
        return cl - na
    elif cl < na:
        return -(na - cl)

    return 0

prepareFile

prepareFile(src, dst, symbolic=True)

Either copies or sets up a relative link between the files. This allows for a quick switch in how the directory structure is organised. Using relative links means that the entire TIES ligand or TIES complex has to be moved together. However, one might want to be able to send a single replica anywhere and execute it independantly (suitable for BOINC).

@type: 'sym' or 'copy'

Source code in ties/generator.py
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
def prepareFile(src, dst, symbolic=True):
    """
    Either copies or sets up a relative link between the files.
    This allows for a quick switch in how the directory structure is organised.
    Using relative links means that the entire TIES ligand or TIES complex
    has to be moved together.
    However, one might want to be able to send a single replica anywhere and
    execute it independantly (suitable for BOINC).

    @type: 'sym' or 'copy'
    """
    if symbolic:
        # note that deleting all the files is intrusive, todo
        if os.path.isfile(dst):
            os.remove(dst)
        os.symlink(src, dst)
    else:
        if os.path.isfile(dst):
            os.remove(dst)
        shutil.copy(src, dst)

update_PBC_in_namd_input

update_PBC_in_namd_input(namd_filename, new_pbc_box, structure_filename, constraint_lines='')

fixme - rename this file since it generates the .eq files These are the lines we modify: cellBasisVector1 {cell_x} 0.000 0.000 cellBasisVector2 0.000 {cell_y} 0.000 cellBasisVector3 0.000 0.000 {cell_z}

With x/y/z replacing the 3 values

Source code in ties/generator.py
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
def update_PBC_in_namd_input(
    namd_filename, new_pbc_box, structure_filename, constraint_lines=""
):
    """
    fixme - rename this file since it generates the .eq files
    These are the lines we modify:
    cellBasisVector1	{cell_x}  0.000  0.000
    cellBasisVector2	 0.000  {cell_y}  0.000
    cellBasisVector3	 0.000  0.000 {cell_z}

    With x/y/z replacing the 3 values
    """
    assert len(new_pbc_box) == 3

    reformatted_namd_in = (
        open(namd_filename)
        .read()
        .format(
            cell_x=new_pbc_box[0],
            cell_y=new_pbc_box[1],
            cell_z=new_pbc_box[2],
            constraints=constraint_lines,
            output="test_output",
            structure=structure_filename,
        )
    )

    # write to the file
    open(namd_filename, "w").write(reformatted_namd_in)

create_constraint_files

create_constraint_files(original_pdb, output)

:param original_pdb: :param output: :return:

Source code in ties/generator.py
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
def create_constraint_files(original_pdb, output):
    """

    :param original_pdb:
    :param output:
    :return:
    """
    sys = parmed.load_file(str(original_pdb), structure=True)
    # for each atom, give the B column the right value
    for atom in sys.atoms:
        # ignore water
        if atom.residue.name in ["WAT", "Na+", "TIP3W", "TIP3", "HOH", "SPC", "TIP4P"]:
            continue

        # set each atom depending on whether it is a H or not
        if atom.name.upper().startswith("H"):
            atom.bfactor = 0
        else:
            # restrain the heavy atom
            atom.bfactor = 4

    sys.save(output, use_hetatoms=False, overwrite=True)

init_namd_file_min

init_namd_file_min(from_dir, to_dir, filename, structure_name, pbc_box, protein)

:param from_dir: :param to_dir: :param filename: :param structure_name: :param pbc_box: :param protein: :return:

Source code in ties/generator.py
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
def init_namd_file_min(from_dir, to_dir, filename, structure_name, pbc_box, protein):
    """

    :param from_dir:
    :param to_dir:
    :param filename:
    :param structure_name:
    :param pbc_box:
    :param protein:
    :return:
    """
    if protein is not None:
        cons = f"""
constraints  on
consexp  2
# use the same file for the position reference and the B column
consref  ../build/{structure_name}.pdb ;#need all positions
conskfile  ../build/cons.pdb
conskcol  B
        """
    else:
        cons = "constraints  off"

    min_namd_initialised = (
        open(os.path.join(from_dir, filename))
        .read()
        .format(structure_name=structure_name, constraints=cons, **pbc_box)
    )
    out_name = "eq0.conf"
    open(os.path.join(to_dir, out_name), "w").write(min_namd_initialised)

generate_namd_prod

generate_namd_prod(namd_prod, dst_dir, structure_name)

:param namd_prod: :param dst_dir: :param structure_name: :return:

Source code in ties/generator.py
634
635
636
637
638
639
640
641
642
643
644
645
646
def generate_namd_prod(namd_prod, dst_dir, structure_name):
    """

    :param namd_prod:
    :param dst_dir:
    :param structure_name:
    :return:
    """
    input_data = open(namd_prod).read()
    reformatted_namd_in = input_data.format(
        output="sim1", structure_name=structure_name
    )
    open(dst_dir, "w").write(reformatted_namd_in)

generate_namd_eq

generate_namd_eq(namd_eq, dst_dir, structure_name, engine, protein)

:param namd_eq: :param dst_dir: :param structure_name: :param engine: :param protein: :return:

Source code in ties/generator.py
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
def generate_namd_eq(namd_eq, dst_dir, structure_name, engine, protein):
    """

    :param namd_eq:
    :param dst_dir:
    :param structure_name:
    :param engine:
    :param protein:
    :return:
    """
    input_data = open(namd_eq).read()
    for i in range(1, 3):
        if i == 1:
            run = """
constraintScaling 1
run 10000
            """
            pressure = ""
        else:
            run = """
# protocol - minimization
set factor 1
set nall 10
set n 1

while {$n <= $nall} {
   constraintScaling $factor
   run 40000
   set n [expr $n + 1]
   set factor [expr $factor * 0.5]
}

constraintScaling 0
run 600000
            """
            if engine.lower() == "namd" or engine.lower() == "namd2":
                pressure = """
useGroupPressure      yes ;# needed for 2fs steps
useFlexibleCell       no  ;# no for water box, yes for membrane
useConstantArea       no  ;# no for water box, yes for membrane
BerendsenPressure                       on
BerendsenPressureTarget                 1.0
BerendsenPressureCompressibility        4.57e-5
BerendsenPressureRelaxationTime         100
BerendsenPressureFreq                   2
                """
            else:
                pressure = """
useGroupPressure      yes ;# needed for 2fs steps
useFlexibleCell       no  ;# no for water box, yes for membrane
useConstantArea       no  ;# no for water box, yes for membrane
langevinPiston          on             # Nose-Hoover Langevin piston pressure control
langevinPistonTarget  1.01325          # target pressure in bar 1atm = 1.01325bar
langevinPistonPeriod  50.0             # oscillation period in fs. correspond to pgamma T=50fs=0.05ps
langevinPistonTemp    300              # f=1/T=20.0(pgamma)
langevinPistonDecay   25.0             # oscillation decay time. smaller value correspons to larger random
                                       # forces and increased coupling to the Langevin temp bath.
                                       # Equall or smaller than piston period
                """

        if protein is not None:
            cons = f"""
        constraints  on
        consexp  2
        # use the same file for the position reference and the B column
        consref  ../build/{structure_name}.pdb ;#need all positions
        conskfile  ../build/cons.pdb
        conskcol  B
                """
        else:
            cons = "constraints  off"

        prev_output = "eq{}".format(i - 1)

        reformatted_namd_in = input_data.format(
            constraints=cons,
            output="eq%d" % (i),
            prev_output=prev_output,
            structure_name=structure_name,
            pressure=pressure,
            run=run,
        )

        next_eq_step_filename = dst_dir / ("eq%d.conf" % (i))
        open(next_eq_step_filename, "w").write(reformatted_namd_in)

redistribute_charges

redistribute_charges(mda)

Calculate the original charges in the matched component.

Source code in ties/generator.py
736
737
738
739
740
741
def redistribute_charges(mda):
    """
    Calculate the original charges in the matched component.
    """

    return