BALL
1.4.79
|
Computation of clusters of docking poses. More...
#include <BALL/DOCKING/COMMON/poseClustering.h>
Classes | |
class | ClusterProperties |
class | ClusterTreeNodeComparator |
class | ClusterTreeWriter_ |
struct | Default |
Default values for options. More... | |
struct | Option |
Option names. More... | |
class | PosePointer |
class | RigidTransformation |
Public Member Functions | |
void | printClusters (std::ostream &out=std::cout) const |
void | printClusterScores (std::ostream &out=std::cout) |
operation methods | |
bool | compute () |
Access methods | |
void | setConformationSet (ConformationSet *new_set, bool precompute_atombijection=false) |
sets the poses to be clustered, the conformation set's reference system will the base system More... | |
void | setBaseSystemAndPoses (System const &base_system, std::vector< PosePointer > const &poses) |
void | setBaseSystemAndTransformations (System const &base_system, String transformation_file_name) |
const ConformationSet * | getConformationSet () const |
returns the poses to be clustered as ConformationSet More... | |
ConformationSet * | getConformationSet () |
returns the poses to be clustered as ConformationSet More... | |
const std::vector < RigidTransformation > & | getRigidTransformations () const |
returns the poses as rigid transformations More... | |
std::vector< Vector3 > & | getCentersOfMass () |
returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE) More... | |
std::vector< Vector3 > const & | getCentersOfMass () const |
returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE) More... | |
const System & | getSystem () const |
returns the reference pose More... | |
System & | getSystem () |
returns the reference pose More... | |
Size | getNumberOfPoses () const |
returns the number of poses More... | |
Size | getNumberOfClusters () const |
returns the number of clusters found More... | |
const std::set< Index > & | getCluster (Index i) const |
std::set< Index > & | getCluster (Index i) |
Size | getClusterSize (Index i) const |
returns the size of cluster i More... | |
float | getClusterScore (Index i) const |
returns the score of cluster i More... | |
float | getScore (const System sys_a, const System sys_b, Options options) const |
returns the score between two poses given as systems More... | |
AtomBijection & | getAtomBijection () |
returns a reference to the cached AtomBijection More... | |
AtomBijection const & | getAtomBijection () const |
returns a const reference to the cached AtomBijection More... | |
void | applyTransformation2System (Index i, System &target_system) |
apply a transformation to a given system More... | |
void | convertTransformations2Snaphots () |
convert the poses to SnapShots More... | |
void | convertSnaphots2Transformations () |
convert the poses to rigid transformations More... | |
float | computeCompleteLinkageRMSD (Index i, Options options, bool initialize=true) |
returns the complete linkage RMSD of cluster i More... | |
boost::shared_ptr< System > | getPose (Index i) const |
returns the complete linkage RMSD of a pose set More... | |
std::vector< PosePointer > const & | getPoses () const |
returns poses as PosePointer More... | |
boost::shared_ptr< System > | getClusterRepresentative (Index i) |
returns the "central cluster" conformation of cluster i as system More... | |
Index | findClusterRepresentative (Index i) |
returns the index of the cluster representative More... | |
boost::shared_ptr < ConformationSet > | getClusterConformationSet (Index i) |
returns cluster i as ConformationSet More... | |
boost::shared_ptr < ConformationSet > | getReducedConformationSet () |
returns a ConformationSet containing one structure per cluster More... | |
bool | refineClustering (Options const &refined_options) |
methods given a full clustering | |
std::vector< std::set< Index > > | extractClustersForThreshold (float threshold, Size min_size=0) |
std::vector< std::set< Index > > | extractNBestClusters (Size n) |
std::vector< std::set< Index > > | filterClusters (Size min_size=1) |
void | serializeWardClusterTree (std::ostream &out, bool binary=false) |
void | deserializeWardClusterTree (std::istream &in, bool binary=false) |
void | exportWardClusterTreeToGraphViz (std::ostream &out) |
Static Public Member Functions | |
rigid transformation methods | |
static float | getRigidRMSD (Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix) |
static float | getSquaredRigidRMSD (Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix) |
static Eigen::Matrix3f | computeCovarianceMatrix (System const &system, Index rmsd_level_of_detail=C_ALPHA) |
Protected Member Functions | |
bool | trivialCompute_ () |
bool | linearSpaceCompute_ () |
bool | althausCompute_ () |
void | slinkInner_ (int current_level) |
void | clinkInner_ (int current_level) |
bool | nearestNeighborChainCompute_ () |
void | initWardDistance_ (Index rmsd_type) |
void | updateWardDistance_ (ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type) |
float | computeWardDistance_ (ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type) |
std::set< Index > | collectClusterBelow_ (ClusterTreeNode const &v) |
void | computeCenterOfMasses_ () |
void | precomputeAtomBijection_ () |
float | getClusterRMSD_ (Index i, Index j, Index rmsd_type) |
bool | readTransformationsFromFile_ (String filename) |
float | getRMSD_ (Index i, Index j, Index rmsd_type) |
void | storeSnapShotReferences_ () |
void | printCluster_ (Index i, std::ostream &out=std::cout) const |
void | printVariables_ (int a, int b, double c, int d, double e, int current_level) |
void | clear_ () |
Static Protected Member Functions | |
static bool | isExcludedByLevelOfDetail_ (Atom const *atom, Index rmsd_level_of_detail) |
Protected Attributes | |
Eigen::MatrixXd | pairwise_scores_ |
ConformationSet * | current_set_ |
the ConformationSet we wish to cluster More... | |
std::vector< std::set< Index > > | clusters_ |
the clusters: sets of pose indices More... | |
std::vector< Index > | cluster_representatives_ |
std::vector< float > | cluster_scores_ |
the scores of the clusters More... | |
Index | rmsd_level_of_detail_ |
the RMSD definition used for clustering More... | |
std::vector< PosePointer > | poses_ |
std::vector< RigidTransformation > | transformations_ |
Eigen::Matrix3f | covariance_matrix_ |
System | base_system_ |
SnapShot | base_conformation_ |
bool | has_rigid_transformations_ |
bool | delete_conformation_set_ |
std::vector< double > | lambda_ |
std::vector< int > | pi_ |
std::vector< double > | mu_ |
Size | number_of_selected_atoms_ |
std::vector< Vector3 > | com_ |
AtomBijection | atom_bijection_ |
System | system_i_ |
System | system_j_ |
ClusterTree | cluster_tree_ |
The tree built during hierarchical clustering. More... | |
Constant Definitions | |
typedef boost::adjacency_list < boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int > | ClusterTree |
typedef ClusterTree::vertex_descriptor | ClusterTreeNode |
BALL_CREATE (PoseClustering) | |
PoseClustering () | |
Default constructor. More... | |
PoseClustering (ConformationSet *poses, float rmsd) | |
PoseClustering (System const &base_system, String transformation_file_name) | |
PoseClustering for a given set of rigid transformations of a base structure. More... | |
virtual | ~PoseClustering () |
Public Attributes | |
Options | options |
options More... | |
void | setDefaultOptions () |
Computation of clusters of docking poses.
Pose ClusteringThis class computes clusters of docking poses given as a conformation set using a complete linkage algorithm.
The class assumes the following setup
We offer several algorithms via the option CLUSTER_METHOD:
The scope of the scoring (the atoms to be considered) can be defined via the option RMSD_LEVEL_OF_DETAIL. If the option is set to PROPERTY_BASED_ATOM_BIJECTION, arbitrary sets of atoms, e.g. binding pockets, can be used by assigning property named "ATOMBIJECTION_RMSD_SELECTION" to the respective atoms in the reference system. See also BALL::Expression.
The minimal rmsd or ward distance between the final clusters can be defined via option DISTANCE_THRESHOLD. In order to relate RMSD and ward distance, we use sqrt(ward_dist / number_of_selected_atoms) for threshold extraction.
The nearest neighbor chain ward clustering in principle computes a full clustering. Option DISTANCE_THRESHOLD gives a ward distance that is automatically used to extract clusters. Further extractions with different thresholds are possible.
The complete linkage algorithms guarantee a minimal cluster distance (max RMSD between all pairs of two clusters), specified with option DISTANCE_THRESHOLD.
The initial poses can be given as ConformationSet or as transformation file, i.e. translation and rotation of each pose. Depending on this choice, the option RMSD_TYPE has to be set to SNAPSHOT_RMSD or RIGID_RMSD. If RMSD_TYPE is set to CENTER_OF_MASS_DISTANCE, the option RMSD_LEVEL_OF_DETAIL will be ignored.
By setting the option RUN_PARALLEL to true, the user can request parallel execution. This will be performed if the execution environment is enabled (BALL_HAS_TBB), and if the algorithm supports it.
Definition at line 116 of file poseClustering.h.
typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int> BALL::PoseClustering::ClusterTree |
Definition at line 259 of file poseClustering.h.
typedef ClusterTree::vertex_descriptor BALL::PoseClustering::ClusterTreeNode |
Definition at line 261 of file poseClustering.h.
BALL::PoseClustering::PoseClustering | ( | ) |
Default constructor.
Constructors and Destructor
BALL::PoseClustering::PoseClustering | ( | ConformationSet * | poses, |
float | rmsd | ||
) |
Detailed constructor. (TODO: really pass a pointer here?)
BALL::PoseClustering::PoseClustering | ( | System const & | base_system, |
String | transformation_file_name | ||
) |
PoseClustering for a given set of rigid transformations of a base structure.
|
virtual |
|
protected |
apply a transformation to a given system
BALL::PoseClustering::BALL_CREATE | ( | PoseClustering | ) |
|
protected |
|
protected |
|
protected |
bool BALL::PoseClustering::compute | ( | ) |
start method.
|
protected |
float BALL::PoseClustering::computeCompleteLinkageRMSD | ( | Index | i, |
Options | options, | ||
bool | initialize = true |
||
) |
returns the complete linkage RMSD of cluster i
|
static |
Compute the covariance matrix for the given system
|
protected |
void BALL::PoseClustering::convertSnaphots2Transformations | ( | ) |
convert the poses to rigid transformations
void BALL::PoseClustering::convertTransformations2Snaphots | ( | ) |
convert the poses to SnapShots
void BALL::PoseClustering::deserializeWardClusterTree | ( | std::istream & | in, |
bool | binary = false |
||
) |
Import the cluster tree from boost::serialize format.
void BALL::PoseClustering::exportWardClusterTreeToGraphViz | ( | std::ostream & | out | ) |
Export the cluster tree in graphviz format.
std::vector<std::set<Index> > BALL::PoseClustering::extractClustersForThreshold | ( | float | threshold, |
Size | min_size = 0 |
||
) |
Extract clusters wrt a threshold if a complete clustering was performed Note: the Ward distance does not equal the rmsd. We use threshold = sqrt(ward_dist / number_of_selected_atoms). see NEAREST_NEIGHBOR_CHAIN_WARD
returns the first up to n clusters if previously a complete clustering was performed see NEAREST_NEIGHBOR_CHAIN_WARD
filters the current cluster set wrt to a minimal cluster size see NEAREST_NEIGHBOR_CHAIN_WARD
returns the index of the cluster representative
|
inline |
returns a reference to the cached AtomBijection
Definition at line 355 of file poseClustering.h.
|
inline |
returns a const reference to the cached AtomBijection
Definition at line 358 of file poseClustering.h.
|
inline |
returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE)
Definition at line 320 of file poseClustering.h.
|
inline |
returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE)
Definition at line 323 of file poseClustering.h.
returns indices of all poses assigned to cluster i Note: enumeration starts with 0
returns indices of all poses assigned to cluster i Note: enumeration starts with 0
boost::shared_ptr<ConformationSet> BALL::PoseClustering::getClusterConformationSet | ( | Index | i | ) |
returns cluster i as ConformationSet
returns the "central cluster" conformation of cluster i as system
|
inline |
returns the poses to be clustered as ConformationSet
Definition at line 311 of file poseClustering.h.
|
inline |
returns the poses to be clustered as ConformationSet
Definition at line 314 of file poseClustering.h.
|
inline |
returns the number of clusters found
Definition at line 335 of file poseClustering.h.
|
inline |
returns the number of poses
Definition at line 332 of file poseClustering.h.
returns the complete linkage RMSD of a pose set
returns the pose i as system
|
inline |
returns poses as PosePointer
Definition at line 379 of file poseClustering.h.
boost::shared_ptr<ConformationSet> BALL::PoseClustering::getReducedConformationSet | ( | ) |
returns a ConformationSet containing one structure per cluster
|
static |
Compute the root mean square deviation due to a rigid transformation of a point cloud (here, atoms)
t_ab | difference vector between the transformations to be compared |
M_ab | difference of the rotation matrices between the transformations to be compared |
covariance_matrix | the covariance matrix of the atom positions |
|
inline |
returns the poses as rigid transformations
Definition at line 317 of file poseClustering.h.
float BALL::PoseClustering::getScore | ( | const System | sys_a, |
const System | sys_b, | ||
Options | options | ||
) | const |
returns the score between two poses given as systems
|
static |
Compute the mean square deviation due to a rigid transformation of a point cloud (here, atoms)
t_ab | difference vector between the transformations to be compared |
M_ab | difference of the rotation matrices between the transformations to be compared |
covariance_matrix | the covariance matrix of the atom positions |
const System& BALL::PoseClustering::getSystem | ( | ) | const |
returns the reference pose
System& BALL::PoseClustering::getSystem | ( | ) |
returns the reference pose
|
protected |
|
staticprotected |
|
protected |
|
protected |
|
protected |
|
protected |
void BALL::PoseClustering::printClusters | ( | std::ostream & | out = std::cout | ) | const |
print the clusters as set of pose indices Note: start counting with 0
void BALL::PoseClustering::printClusterScores | ( | std::ostream & | out = std::cout | ) |
print clusters of pose indices with RMSD between clusters Note: start counting with 0
|
protected |
Refine a given clustering. This function can be used to refine a precomputed clustering further. An important use case would be to pre-cluster using an efficient rmsd implementation (e.g., center of mass or rigid rmsd), and then refine the resulting clusters with the general (i.e., snapshot based) rmsd.
NOTE: This function requires that clusters have already been computed. In the case of a full hierarchical clustering, extractClustersForThreshold or extractNBestClusters must have been called previously.
refined_options | The parameters for the refinment step. |
void BALL::PoseClustering::serializeWardClusterTree | ( | std::ostream & | out, |
bool | binary = false |
||
) |
Export the cluster tree to boost::serialize format.
void BALL::PoseClustering::setBaseSystemAndPoses | ( | System const & | base_system, |
std::vector< PosePointer > const & | poses | ||
) |
Set a vector of PosePointers to be clustered Poses (RigidTransformations or SnapShots) can live outside of this class and will not be destroyed.
void BALL::PoseClustering::setBaseSystemAndTransformations | ( | System const & | base_system, |
String | transformation_file_name | ||
) |
reads the poses given as transformations from a file and update the covariance matrix Note: the given system will be taken as reference, e.g. all transformations
void BALL::PoseClustering::setConformationSet | ( | ConformationSet * | new_set, |
bool | precompute_atombijection = false |
||
) |
sets the poses to be clustered, the conformation set's reference system will the base system
void BALL::PoseClustering::setDefaultOptions | ( | ) |
reset the options to default values
|
protected |
|
protected |
|
protected |
|
protected |
|
protected |
Definition at line 708 of file poseClustering.h.
|
protected |
Definition at line 673 of file poseClustering.h.
|
protected |
Definition at line 670 of file poseClustering.h.
|
protected |
Definition at line 651 of file poseClustering.h.
|
protected |
the scores of the clusters
Definition at line 654 of file poseClustering.h.
|
protected |
The tree built during hierarchical clustering.
Definition at line 715 of file poseClustering.h.
|
protected |
the clusters: sets of pose indices
Definition at line 649 of file poseClustering.h.
|
protected |
Definition at line 701 of file poseClustering.h.
|
protected |
Definition at line 667 of file poseClustering.h.
|
protected |
the ConformationSet we wish to cluster
Definition at line 646 of file poseClustering.h.
|
protected |
Definition at line 680 of file poseClustering.h.
|
protected |
Definition at line 676 of file poseClustering.h.
|
protected |
Definition at line 686 of file poseClustering.h.
|
protected |
Definition at line 692 of file poseClustering.h.
|
protected |
Definition at line 696 of file poseClustering.h.
Options BALL::PoseClustering::options |
options
Definition at line 414 of file poseClustering.h.
|
protected |
Definition at line 643 of file poseClustering.h.
|
protected |
Definition at line 690 of file poseClustering.h.
|
protected |
Definition at line 662 of file poseClustering.h.
|
protected |
the RMSD definition used for clustering
Definition at line 657 of file poseClustering.h.
|
protected |
Definition at line 711 of file poseClustering.h.
|
protected |
Definition at line 712 of file poseClustering.h.
|
protected |
Definition at line 665 of file poseClustering.h.