Source code for idpconfgen.components.sidechain_packing.mcsce

"""
Implement MC-SCE sidechain packing algorithm logic.

MC-SCE repository at: https://github.com/THGLab/MCSCE
"""
from functools import partial

import numpy as np


mcsce_defaults = {
    'efunc_terms': ('lj', 'clash'),
    'mode': 'simple',
    'n_trials': 10,
    'batch_size': 16,
    'temperature': 300,
    'parallel_worker': 1,
    }

need_H_mask = True
only_H_mask = None


[docs]def add_mcsce_subparser(ap): """Add MC-SCE related parameters to client.""" group = ap.add_argument_group( title="MC-SCE related parameters", description=( "Parameters configuring MC-SCE sidechain sampling. " "Used only if `-scm mcsce` is selected." ), ) group.add_argument( '--mcsce-mode', help=f'Sidechain sampling method. Defaults to {mcsce_defaults["mode"]!r}.', # noqa: E501 choices=('simple', 'exhaustive'), default=mcsce_defaults['mode'], ) group.add_argument( '--mcsce-n_trials', help=f'Sampling trials in exhaustive mode. Defaults to {mcsce_defaults["n_trials"]}.', # noqa: E501 type=int, default=mcsce_defaults['n_trials'], ) group.add_argument( '--mcsce-batch_size', help=f'The MC-SCE batch size. Defaults to {mcsce_defaults["batch_size"]}.', # noqa: E501 type=int, default=mcsce_defaults['batch_size'], ) group.add_argument( '--mcsce-temperature', help=f'Sampling temperature. Defaults to {mcsce_defaults["temperature"]}.', # noqa: E501 type=int, default=mcsce_defaults['temperature'], ) return
[docs]def init_mcsce_sidechains( input_seq, template_masks, all_atom_masks, user_parameters=None, **ignore, ): """ Instantiate dedicated function environment for MC-SCE sidechain building. Examples -------- >>> calc_mcsce = init_mcsce_sidechains('MASFRTPKKLCVAGG', ...) >>> # a (N, 3) array with the N,CA,C,O coordinates >>> coords = np.array( ... ) >>> calc_mcsce(coords) Parameters ---------- input_seq : str The FASTA sequence of the protein for which this function will be used. template_masks : `libbuil.ConfMasks` object Related to the building template in `cli_build`. all_atom_masks : `libbuil.ConfMasks` object Related to the all atoom coordinate system in `cli_build`. user_parameters : dict Dictionary with additional parameters for the MC-SCE package. Returns ------- np.ndarray, dtype=bool, (M, 3) Array mask for the builder. np.ndarray (M, 3) Heavy atom coordinates of the protein sequence. """ from mcsce.core import build_definitions from mcsce.core.side_chain_builder import ( create_side_chain, initialize_func_calc, ) from mcsce.libs.libenergy import prepare_energy_function from mcsce.libs.libstructure import Structure _up = user_parameters or {} params = {**mcsce_defaults, **_up} # initiates only the backbone atoms s = Structure(fasta=input_seq) s.build() ff = build_definitions.forcefields["Amberff14SB"] ff_obj = ff(Cterminal='OXT', Nterminal='HN') _mode = params.pop('mode') mcsce_sampling_options = {'simple': True, 'exhaustive': False} try: return_first_valid = mcsce_sampling_options[_mode] except KeyError as err: _msg = "Mode has to be either 'simple' or 'exhaustive'" raise ValueError(_msg) from err initialize_func_calc( partial( prepare_energy_function, batch_size=params.pop('batch_size'), forcefield=ff_obj, terms=params.pop("efunc_terms"), ), structure=s, ) params['return_first_valid'] = return_first_valid def calc(coords): # yes, I could have made a class, store these attributes, bla bla # ... but I didn't want to :-) global need_H_mask global only_H_mask s.coords = coords[template_masks.non_sidechains] final_structure = create_side_chain(s, **params) if final_structure is None: return None, None if need_H_mask: # the atoms in final_structure are not ordered the same as the # atoms in cli_build. Instead of reordering the atoms in # final_structure, we extract a mask to take all sidechain atoms # and pass those to cli_build. The globals are used to perform # this operation only once. need_H_mask = False only_H_mask = np.where( np.in1d( final_structure.data_array[:, 2], ('N', 'CA', 'C', 'O', 'H1', 'H2', 'H3', 'OXT', 'H'), invert=True, ), ) return all_atom_masks.all_sidechains, final_structure.coords[only_H_mask] # noqa: E501 return calc