Example 5.1 GBCA Benchmark

The Average Steric Occupancy (ASO) descriptor was originally developed in the Denmark lab to capture the dynamic nature of sterics in a molecule. This measures whether an indiviudal conformer is occupying a grid point or not and assigns it a value of 0 if unoccupied and a value of 1 if occupied. This then averages the amount a grid-point was occupied over the number of conformers calculated, giving a value between 0 and 1. More information can be found at DOI:10.1126/science.aau5631

We have significantly accelerated this descriptor calculation such that it can operate on massive ConformerLibraries. This has also been made availble to parallelize for further acceleration. An example of an ASO calculation run through the command line is shown below

Hardware Specification for Rerun

Desktop workstation with 2x (AMD EPYC 7702 64-Core) with total of 128 physical and 256 logical cores, 1024 GB DDR4 with Ubuntu 22.04 LTS operating system.

[1]:
import molli as ml
import numpy as np
import h5py
import timeit
import molli_xt
from scipy.spatial.distance import cdist


# function definitions for benchmarking comparison
def aso_naive(ens: ml.ConformerEnsemble, grid: np.ndarray):
    from math import dist

    aso = np.zeros(grid.shape[0])
    for i, gp in enumerate(grid):
        N = 0
        for conformer in ens:
            if any(
                dist(conformer.coords[j], gp) <= a.vdw_radius
                for j, a in enumerate(ens.atoms)
            ):
                N += 1

        aso[i] = N / ens.n_conformers

    return aso


def aso_numpy(ens: ml.ConformerEnsemble, grid: np.ndarray):
    aso_full = np.empty((ens.n_conformers, grid.shape[0]))

    # Iterate over conformers in the ensemble
    # Complexity (O(n_confs * n_gpts))
    for i, c in enumerate(ens):
        # Iterate over atoms
        for j, a in enumerate(c.atoms):
            where = np.sum((grid - c.coords[j]) ** 2, axis=-1) <= a.vdw_radius**2
            aso_full[i, where] = 1

    return np.average(
        aso_full,
        axis=0,
    )


def aso_scipy_cdist(ens: ml.ConformerEnsemble, grid: np.ndarray):
    aso_full = np.empty((ens.n_conformers, grid.shape[0]))

    # Iterate over conformers in the ensemble
    # Complexity (O(n_confs * n_gpts))
    vdwr = np.array([a.vdw_radius for a in ens.atoms])
    for i, c in enumerate(ens):
        alldist = cdist(grid, c.coords, "euclidean")
        where = np.any(alldist >= vdwr[np.newaxis, :], axis=-1)
        aso_full[i, where] = 1

    return np.average(
        aso_full,
        axis=0,
    )


def aso_molli_cdist(ens: ml.ConformerEnsemble, grid: np.ndarray):
    alldist = molli_xt.cdist32f_eu2(ens._coords, grid)
    vdwr2s = np.array([a.vdw_radius for a in ens.atoms]) ** 2
    diff = alldist <= vdwr2s[:, None]
    np.average(np.any(diff, axis=1), axis=0)


def aso_molli_kdtree_cdist(ens: ml.ConformerEnsemble, grid: np.ndarray):
    pruned = ml.descriptor.prune(grid, ens, max_dist=1.8)
    aso = np.zeros(grid.shape[0])
    aso[pruned] = aso_molli_cdist(ens, grid[pruned])
    return aso
[4]:
if __name__ == "__main__":
    ens = ml.ConformerEnsemble.load_mol2("65_vi.mol2")
    print(f"{ens}, {ens.n_atoms=}")

    calculators = (
        aso_naive,
        aso_numpy,
        aso_scipy_cdist,
        aso_molli_cdist,
        aso_molli_kdtree_cdist,
    )

    grids = {}
    for gn in ("grid15", "grid10", "grid07"):
        with h5py.File(f"bpa_aligned_{gn}.hdf5", "r") as f:
            grids[gn] = np.asarray(f["grid"])

    for gn, grid in grids.items():
        for calc in calculators:
            if calc is aso_naive:
                times = timeit.Timer(
                    """calc(ens, grid)""",
                    globals=globals(),
                ).repeat(1, 1)
            else:
                times = timeit.Timer(
                    """calc(ens, grid)""",
                    globals=globals(),
                ).repeat(5, 3)

            print(f"{gn}: {calc.__name__:<30}: {min(times):0.6f}")

ConformerEnsemble(name='65_vi', formula='C44 H37 O6 P1', n_conformers=215), ens.n_atoms=88
grid15: aso_naive                     : 202.560668
grid15: aso_numpy                     : 5.981086
grid15: aso_scipy_cdist               : 0.770540
grid15: aso_molli_cdist               : 0.688863
grid15: aso_molli_kdtree_cdist        : 0.163643
grid10: aso_naive                     : 656.652245
grid10: aso_numpy                     : 18.337421
grid10: aso_scipy_cdist               : 2.529017
grid10: aso_molli_cdist               : 2.139469
grid10: aso_molli_kdtree_cdist        : 0.516551
grid07: aso_naive                     : 1908.888225
grid07: aso_numpy                     : 50.978774
grid07: aso_scipy_cdist               : 7.755869
grid07: aso_molli_cdist               : 5.844610
grid07: aso_molli_kdtree_cdist        : 1.472854