Skip to content

overlay

Functions:

merge_compatible_suptops_faster

merge_compatible_suptops_faster(pairing_suptop: dict, min_bonds: int)

:param pairing_suptop: :param min_bonds: if the End molecule at this point has only two bonds, they can be mapped to two other bonds in the start molecule. :return:

Source code in ties/overlay.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def merge_compatible_suptops_faster(pairing_suptop: dict, min_bonds: int):
    """

    :param pairing_suptop:
    :param min_bonds: if the End molecule at this point has only two bonds, they can be mapped to two other bonds
        in the start molecule.
    :return:
    """

    if len(pairing_suptop) == 1:
        return [pairing_suptop.popitem()[1]]

    # any to any
    all_pairings = list(itertools.combinations(pairing_suptop.keys(), r=min_bonds))

    if min_bonds == 3:
        all_pairings += list(itertools.combinations(pairing_suptop.keys(), r=2))
    selected_pairings = all_pairings

    # selected_pairings = []
    # for pairings in all_pairings:
    #     n = set()
    #     for pairing in pairings:
    #         n.add(pairing[0])
    #         n.add(pairing[1])
    #     #
    #     if 2 * len(pairings) == len(n):
    #         selected_pairings.append(pairings)

    # start with all the suptops as starting points
    # this is because it might be impossible to merge
    # any of the paths
    # in which case the default paths will be the best
    built_topologies = list(pairing_suptop.values())

    # attempt to combine the different traversals
    for mapping in selected_pairings:
        # mapping the different bonds to different bonds

        # check if the suptops are consistent with each other
        if not are_consistent_topologies([pairing_suptop[key] for key in mapping]):
            continue

        # merge them!
        large_suptop = copy.copy(pairing_suptop[mapping[0]])
        for next_map in mapping[1:]:
            next_suptop = pairing_suptop[next_map]

            # add both the pairs and the bonds that are not present in st1
            large_suptop.merge(next_suptop)

        built_topologies.append(large_suptop)

    return built_topologies

extract_best_suptop

extract_best_suptop(suptops, use_rmsd=True, weights=None, get_list=False)

Assumes that any merging possible already took place. We now have a set of solutions and have to select the best ones.

:param suptops: :param use_rmsd: use RMSD to decide which SupTop is better when they are the same length. Ie use the MCS size only. :param weights: Weights (tuple[]) in a combined score (MCS size with RMSD) for an automated selection. Note this feature was causing tricky issues. None means it is not used :return:

Source code in ties/overlay.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def extract_best_suptop(suptops, use_rmsd=True, weights=None, get_list=False):
    """
    Assumes that any merging possible already took place.
    We now have a set of solutions and have to select the best ones.

    :param suptops:
    :param use_rmsd: use RMSD to decide which SupTop is better when they are the same length.
        Ie use the MCS size only.
    :param weights: Weights (tuple[]) in a combined score (MCS size with RMSD) for an
        automated selection. Note this feature was causing tricky issues.
        None means it is not used
    :return:
    """

    # fixme - uses coordinates to decide which mapping is better.
    #  - Improve: use dihedral angles to decide which mapping is better too
    def item_or_list(suptops):
        if get_list:
            return suptops
        else:
            return suptops[0]

    if len(suptops) == 0:
        warnings.warn("Cannot decide on the best mapping without any suptops...")
        return None

    elif len(suptops) == 1:
        return item_or_list(suptops)

    # candidates = copy.copy(suptops)

    # sort from largest to smallest
    suptops.sort(key=lambda st: len(st), reverse=True)

    if not use_rmsd:
        return item_or_list(suptops)

    # when length is the same, take the smaller RMSD
    # most likely this is about hydrogens
    length_rmsd_sorted_suptops = []
    for key, same_length_suptops in itertools.groupby(suptops, key=lambda st: len(st)):
        # order by RMSD
        sorted_by_rmsd = sorted(
            same_length_suptops, key=lambda st: st.align_ligands_using_mcs()
        )

        best_rmsd_suptop = sorted_by_rmsd[0]

        # add the best RMSD solution first
        length_rmsd_sorted_suptops.append(best_rmsd_suptop)

        # compare the best RMSD suptop with others
        for next_suptop in sorted_by_rmsd[1:]:
            if next_suptop.is_mirror_of(best_rmsd_suptop):
                best_rmsd_suptop.add_mirror_suptop(next_suptop)
            else:
                # alternative solution
                length_rmsd_sorted_suptops.append(next_suptop)

    # sort using weights
    # score = mcs_score * weight - rmsd * weight ;
    def score(st):
        # inverse for 0 to be optimal
        mcs_score = (1 - st.mcs_score()) * weights[0]

        # rmsd 0 is best as well
        rmsd_score = st.align_ligands_using_mcs() * weights[1]

        return (mcs_score + rmsd_score) / len(weights)

    if weights:
        length_rmsd_sorted_suptops.sort(key=score)

    return item_or_list(length_rmsd_sorted_suptops)