BALL  1.4.79
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
Classes | Public Member Functions | Protected Member Functions | Static Protected Member Functions | Protected Attributes | List of all members
BALL::PoseClustering Class Reference

Computation of clusters of docking poses. More...

#include <BALL/DOCKING/COMMON/poseClustering.h>

Classes

class  ClusterProperties
 
class  ClusterTreeNodeComparator
 
class  ClusterTreeWriter_
 
struct  Default
 Default values for options. More...
 
struct  Option
 Option names. More...
 
class  PosePointer
 
class  RigidTransformation
 

Public Member Functions

void printClusters (std::ostream &out=std::cout) const
 
void printClusterScores (std::ostream &out=std::cout)
 
operation methods
bool compute ()
 
Access methods
void setConformationSet (ConformationSet *new_set, bool precompute_atombijection=false)
 sets the poses to be clustered, the conformation set's reference system will the base system More...
 
void setBaseSystemAndPoses (System const &base_system, std::vector< PosePointer > const &poses)
 
void setBaseSystemAndTransformations (System const &base_system, String transformation_file_name)
 
const ConformationSetgetConformationSet () const
 returns the poses to be clustered as ConformationSet More...
 
ConformationSetgetConformationSet ()
 returns the poses to be clustered as ConformationSet More...
 
const std::vector
< RigidTransformation > & 
getRigidTransformations () const
 returns the poses as rigid transformations More...
 
std::vector< Vector3 > & getCentersOfMass ()
 returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE) More...
 
std::vector< Vector3 > const & getCentersOfMass () const
 returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE) More...
 
const SystemgetSystem () const
 returns the reference pose More...
 
SystemgetSystem ()
 returns the reference pose More...
 
Size getNumberOfPoses () const
 returns the number of poses More...
 
Size getNumberOfClusters () const
 returns the number of clusters found More...
 
const std::set< Index > & getCluster (Index i) const
 
std::set< Index > & getCluster (Index i)
 
Size getClusterSize (Index i) const
 returns the size of cluster i More...
 
float getClusterScore (Index i) const
 returns the score of cluster i More...
 
float getScore (const System sys_a, const System sys_b, Options options) const
 returns the score between two poses given as systems More...
 
AtomBijectiongetAtomBijection ()
 returns a reference to the cached AtomBijection More...
 
AtomBijection const & getAtomBijection () const
 returns a const reference to the cached AtomBijection More...
 
void applyTransformation2System (Index i, System &target_system)
 apply a transformation to a given system More...
 
void convertTransformations2Snaphots ()
 convert the poses to SnapShots More...
 
void convertSnaphots2Transformations ()
 convert the poses to rigid transformations More...
 
float computeCompleteLinkageRMSD (Index i, Options options, bool initialize=true)
 returns the complete linkage RMSD of cluster i More...
 
boost::shared_ptr< SystemgetPose (Index i) const
 returns the complete linkage RMSD of a pose set More...
 
std::vector< PosePointer > const & getPoses () const
 returns poses as PosePointer More...
 
boost::shared_ptr< SystemgetClusterRepresentative (Index i)
 returns the "central cluster" conformation of cluster i as system More...
 
Index findClusterRepresentative (Index i)
 returns the index of the cluster representative More...
 
boost::shared_ptr
< ConformationSet
getClusterConformationSet (Index i)
 returns cluster i as ConformationSet More...
 
boost::shared_ptr
< ConformationSet
getReducedConformationSet ()
 returns a ConformationSet containing one structure per cluster More...
 
bool refineClustering (Options const &refined_options)
 
methods given a full clustering
std::vector< std::set< Index > > extractClustersForThreshold (float threshold, Size min_size=0)
 
std::vector< std::set< Index > > extractNBestClusters (Size n)
 
std::vector< std::set< Index > > filterClusters (Size min_size=1)
 
void serializeWardClusterTree (std::ostream &out, bool binary=false)
 
void deserializeWardClusterTree (std::istream &in, bool binary=false)
 
void exportWardClusterTreeToGraphViz (std::ostream &out)
 

Static Public Member Functions

rigid transformation methods
static float getRigidRMSD (Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix)
 
static float getSquaredRigidRMSD (Eigen::Vector3f const &t_ab, Eigen::Matrix3f const &M_ab, Eigen::Matrix3f const &covariance_matrix)
 
static Eigen::Matrix3f computeCovarianceMatrix (System const &system, Index rmsd_level_of_detail=C_ALPHA)
 

Protected Member Functions

bool trivialCompute_ ()
 
bool linearSpaceCompute_ ()
 
bool althausCompute_ ()
 
void slinkInner_ (int current_level)
 
void clinkInner_ (int current_level)
 
bool nearestNeighborChainCompute_ ()
 
void initWardDistance_ (Index rmsd_type)
 
void updateWardDistance_ (ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type)
 
float computeWardDistance_ (ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type)
 
std::set< IndexcollectClusterBelow_ (ClusterTreeNode const &v)
 
void computeCenterOfMasses_ ()
 
void precomputeAtomBijection_ ()
 
float getClusterRMSD_ (Index i, Index j, Index rmsd_type)
 
bool readTransformationsFromFile_ (String filename)
 
float getRMSD_ (Index i, Index j, Index rmsd_type)
 
void storeSnapShotReferences_ ()
 
void printCluster_ (Index i, std::ostream &out=std::cout) const
 
void printVariables_ (int a, int b, double c, int d, double e, int current_level)
 
void clear_ ()
 

Static Protected Member Functions

static bool isExcludedByLevelOfDetail_ (Atom const *atom, Index rmsd_level_of_detail)
 

Protected Attributes

Eigen::MatrixXd pairwise_scores_
 
ConformationSetcurrent_set_
 the ConformationSet we wish to cluster More...
 
std::vector< std::set< Index > > clusters_
 the clusters: sets of pose indices More...
 
std::vector< Indexcluster_representatives_
 
std::vector< floatcluster_scores_
 the scores of the clusters More...
 
Index rmsd_level_of_detail_
 the RMSD definition used for clustering More...
 
std::vector< PosePointerposes_
 
std::vector< RigidTransformationtransformations_
 
Eigen::Matrix3f covariance_matrix_
 
System base_system_
 
SnapShot base_conformation_
 
bool has_rigid_transformations_
 
bool delete_conformation_set_
 
std::vector< doublelambda_
 
std::vector< int > pi_
 
std::vector< doublemu_
 
Size number_of_selected_atoms_
 
std::vector< Vector3com_
 
AtomBijection atom_bijection_
 
System system_i_
 
System system_j_
 
ClusterTree cluster_tree_
 The tree built during hierarchical clustering. More...
 

Constant Definitions

typedef boost::adjacency_list
< boost::vecS, boost::vecS,
boost::directedS,
ClusterProperties,
boost::no_property, unsigned
int > 
ClusterTree
 
typedef
ClusterTree::vertex_descriptor 
ClusterTreeNode
 
 BALL_CREATE (PoseClustering)
 
 PoseClustering ()
 Default constructor. More...
 
 PoseClustering (ConformationSet *poses, float rmsd)
 
 PoseClustering (System const &base_system, String transformation_file_name)
 PoseClustering for a given set of rigid transformations of a base structure. More...
 
virtual ~PoseClustering ()
 

Public Attributes

Options options
 options More...
 
void setDefaultOptions ()
 

Detailed Description

Computation of clusters of docking poses.

Pose ClusteringThis class computes clusters of docking poses given as a conformation set using a complete linkage algorithm.

The class assumes the following setup

We offer several algorithms via the option CLUSTER_METHOD:

The scope of the scoring (the atoms to be considered) can be defined via the option RMSD_LEVEL_OF_DETAIL. If the option is set to PROPERTY_BASED_ATOM_BIJECTION, arbitrary sets of atoms, e.g. binding pockets, can be used by assigning property named "ATOMBIJECTION_RMSD_SELECTION" to the respective atoms in the reference system. See also BALL::Expression.

The minimal rmsd or ward distance between the final clusters can be defined via option DISTANCE_THRESHOLD. In order to relate RMSD and ward distance, we use sqrt(ward_dist / number_of_selected_atoms) for threshold extraction.

The nearest neighbor chain ward clustering in principle computes a full clustering. Option DISTANCE_THRESHOLD gives a ward distance that is automatically used to extract clusters. Further extractions with different thresholds are possible.

The complete linkage algorithms guarantee a minimal cluster distance (max RMSD between all pairs of two clusters), specified with option DISTANCE_THRESHOLD.

The initial poses can be given as ConformationSet or as transformation file, i.e. translation and rotation of each pose. Depending on this choice, the option RMSD_TYPE has to be set to SNAPSHOT_RMSD or RIGID_RMSD. If RMSD_TYPE is set to CENTER_OF_MASS_DISTANCE, the option RMSD_LEVEL_OF_DETAIL will be ignored.

By setting the option RUN_PARALLEL to true, the user can request parallel execution. This will be performed if the execution environment is enabled (BALL_HAS_TBB), and if the algorithm supports it.

Definition at line 116 of file poseClustering.h.

Member Typedef Documentation

typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int> BALL::PoseClustering::ClusterTree

Definition at line 259 of file poseClustering.h.

typedef ClusterTree::vertex_descriptor BALL::PoseClustering::ClusterTreeNode

Definition at line 261 of file poseClustering.h.

Constructor & Destructor Documentation

BALL::PoseClustering::PoseClustering ( )

Default constructor.

Constructors and Destructor

BALL::PoseClustering::PoseClustering ( ConformationSet poses,
float  rmsd 
)

Detailed constructor. (TODO: really pass a pointer here?)

BALL::PoseClustering::PoseClustering ( System const &  base_system,
String  transformation_file_name 
)

PoseClustering for a given set of rigid transformations of a base structure.

virtual BALL::PoseClustering::~PoseClustering ( )
virtual

Member Function Documentation

bool BALL::PoseClustering::althausCompute_ ( )
protected
void BALL::PoseClustering::applyTransformation2System ( Index  i,
System target_system 
)

apply a transformation to a given system

BALL::PoseClustering::BALL_CREATE ( PoseClustering  )
void BALL::PoseClustering::clear_ ( )
protected
void BALL::PoseClustering::clinkInner_ ( int  current_level)
protected
std::set<Index> BALL::PoseClustering::collectClusterBelow_ ( ClusterTreeNode const &  v)
protected
bool BALL::PoseClustering::compute ( )

start method.

void BALL::PoseClustering::computeCenterOfMasses_ ( )
protected
float BALL::PoseClustering::computeCompleteLinkageRMSD ( Index  i,
Options  options,
bool  initialize = true 
)

returns the complete linkage RMSD of cluster i

static Eigen::Matrix3f BALL::PoseClustering::computeCovarianceMatrix ( System const &  system,
Index  rmsd_level_of_detail = C_ALPHA 
)
static

Compute the covariance matrix for the given system

float BALL::PoseClustering::computeWardDistance_ ( ClusterTreeNode  i,
ClusterTreeNode  j,
Index  rmsd_type 
)
protected
void BALL::PoseClustering::convertSnaphots2Transformations ( )

convert the poses to rigid transformations

void BALL::PoseClustering::convertTransformations2Snaphots ( )

convert the poses to SnapShots

void BALL::PoseClustering::deserializeWardClusterTree ( std::istream &  in,
bool  binary = false 
)

Import the cluster tree from boost::serialize format.

void BALL::PoseClustering::exportWardClusterTreeToGraphViz ( std::ostream &  out)

Export the cluster tree in graphviz format.

std::vector<std::set<Index> > BALL::PoseClustering::extractClustersForThreshold ( float  threshold,
Size  min_size = 0 
)

Extract clusters wrt a threshold if a complete clustering was performed Note: the Ward distance does not equal the rmsd. We use threshold = sqrt(ward_dist / number_of_selected_atoms). see NEAREST_NEIGHBOR_CHAIN_WARD

std::vector<std::set<Index> > BALL::PoseClustering::extractNBestClusters ( Size  n)

returns the first up to n clusters if previously a complete clustering was performed see NEAREST_NEIGHBOR_CHAIN_WARD

std::vector<std::set<Index> > BALL::PoseClustering::filterClusters ( Size  min_size = 1)

filters the current cluster set wrt to a minimal cluster size see NEAREST_NEIGHBOR_CHAIN_WARD

Index BALL::PoseClustering::findClusterRepresentative ( Index  i)

returns the index of the cluster representative

AtomBijection& BALL::PoseClustering::getAtomBijection ( )
inline

returns a reference to the cached AtomBijection

Definition at line 355 of file poseClustering.h.

AtomBijection const& BALL::PoseClustering::getAtomBijection ( ) const
inline

returns a const reference to the cached AtomBijection

Definition at line 358 of file poseClustering.h.

std::vector<Vector3>& BALL::PoseClustering::getCentersOfMass ( )
inline

returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE)

Definition at line 320 of file poseClustering.h.

std::vector<Vector3> const& BALL::PoseClustering::getCentersOfMass ( ) const
inline

returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE)

Definition at line 323 of file poseClustering.h.

const std::set<Index>& BALL::PoseClustering::getCluster ( Index  i) const

returns indices of all poses assigned to cluster i Note: enumeration starts with 0

std::set<Index>& BALL::PoseClustering::getCluster ( Index  i)

returns indices of all poses assigned to cluster i Note: enumeration starts with 0

boost::shared_ptr<ConformationSet> BALL::PoseClustering::getClusterConformationSet ( Index  i)

returns cluster i as ConformationSet

boost::shared_ptr<System> BALL::PoseClustering::getClusterRepresentative ( Index  i)

returns the "central cluster" conformation of cluster i as system

float BALL::PoseClustering::getClusterRMSD_ ( Index  i,
Index  j,
Index  rmsd_type 
)
protected
float BALL::PoseClustering::getClusterScore ( Index  i) const

returns the score of cluster i

Size BALL::PoseClustering::getClusterSize ( Index  i) const

returns the size of cluster i

const ConformationSet* BALL::PoseClustering::getConformationSet ( ) const
inline

returns the poses to be clustered as ConformationSet

Definition at line 311 of file poseClustering.h.

ConformationSet* BALL::PoseClustering::getConformationSet ( )
inline

returns the poses to be clustered as ConformationSet

Definition at line 314 of file poseClustering.h.

Size BALL::PoseClustering::getNumberOfClusters ( ) const
inline

returns the number of clusters found

Definition at line 335 of file poseClustering.h.

Size BALL::PoseClustering::getNumberOfPoses ( ) const
inline

returns the number of poses

Definition at line 332 of file poseClustering.h.

boost::shared_ptr<System> BALL::PoseClustering::getPose ( Index  i) const

returns the complete linkage RMSD of a pose set

returns the pose i as system

std::vector<PosePointer> const& BALL::PoseClustering::getPoses ( ) const
inline

returns poses as PosePointer

Definition at line 379 of file poseClustering.h.

boost::shared_ptr<ConformationSet> BALL::PoseClustering::getReducedConformationSet ( )

returns a ConformationSet containing one structure per cluster

static float BALL::PoseClustering::getRigidRMSD ( Eigen::Vector3f const &  t_ab,
Eigen::Matrix3f const &  M_ab,
Eigen::Matrix3f const &  covariance_matrix 
)
static

Compute the root mean square deviation due to a rigid transformation of a point cloud (here, atoms)

Parameters
t_abdifference vector between the transformations to be compared
M_abdifference of the rotation matrices between the transformations to be compared
covariance_matrixthe covariance matrix of the atom positions
const std::vector<RigidTransformation>& BALL::PoseClustering::getRigidTransformations ( ) const
inline

returns the poses as rigid transformations

Definition at line 317 of file poseClustering.h.

float BALL::PoseClustering::getRMSD_ ( Index  i,
Index  j,
Index  rmsd_type 
)
protected
float BALL::PoseClustering::getScore ( const System  sys_a,
const System  sys_b,
Options  options 
) const

returns the score between two poses given as systems

static float BALL::PoseClustering::getSquaredRigidRMSD ( Eigen::Vector3f const &  t_ab,
Eigen::Matrix3f const &  M_ab,
Eigen::Matrix3f const &  covariance_matrix 
)
static

Compute the mean square deviation due to a rigid transformation of a point cloud (here, atoms)

Parameters
t_abdifference vector between the transformations to be compared
M_abdifference of the rotation matrices between the transformations to be compared
covariance_matrixthe covariance matrix of the atom positions
const System& BALL::PoseClustering::getSystem ( ) const

returns the reference pose

System& BALL::PoseClustering::getSystem ( )

returns the reference pose

void BALL::PoseClustering::initWardDistance_ ( Index  rmsd_type)
protected
static bool BALL::PoseClustering::isExcludedByLevelOfDetail_ ( Atom const *  atom,
Index  rmsd_level_of_detail 
)
staticprotected
bool BALL::PoseClustering::linearSpaceCompute_ ( )
protected
bool BALL::PoseClustering::nearestNeighborChainCompute_ ( )
protected
void BALL::PoseClustering::precomputeAtomBijection_ ( )
protected
void BALL::PoseClustering::printCluster_ ( Index  i,
std::ostream &  out = std::cout 
) const
protected
void BALL::PoseClustering::printClusters ( std::ostream &  out = std::cout) const

print the clusters as set of pose indices Note: start counting with 0

void BALL::PoseClustering::printClusterScores ( std::ostream &  out = std::cout)

print clusters of pose indices with RMSD between clusters Note: start counting with 0

void BALL::PoseClustering::printVariables_ ( int  a,
int  b,
double  c,
int  d,
double  e,
int  current_level 
)
protected
bool BALL::PoseClustering::readTransformationsFromFile_ ( String  filename)
protected
bool BALL::PoseClustering::refineClustering ( Options const &  refined_options)

Refine a given clustering. This function can be used to refine a precomputed clustering further. An important use case would be to pre-cluster using an efficient rmsd implementation (e.g., center of mass or rigid rmsd), and then refine the resulting clusters with the general (i.e., snapshot based) rmsd.

NOTE: This function requires that clusters have already been computed. In the case of a full hierarchical clustering, extractClustersForThreshold or extractNBestClusters must have been called previously.

Parameters
refined_optionsThe parameters for the refinment step.
void BALL::PoseClustering::serializeWardClusterTree ( std::ostream &  out,
bool  binary = false 
)

Export the cluster tree to boost::serialize format.

void BALL::PoseClustering::setBaseSystemAndPoses ( System const &  base_system,
std::vector< PosePointer > const &  poses 
)

Set a vector of PosePointers to be clustered Poses (RigidTransformations or SnapShots) can live outside of this class and will not be destroyed.

void BALL::PoseClustering::setBaseSystemAndTransformations ( System const &  base_system,
String  transformation_file_name 
)

reads the poses given as transformations from a file and update the covariance matrix Note: the given system will be taken as reference, e.g. all transformations

void BALL::PoseClustering::setConformationSet ( ConformationSet new_set,
bool  precompute_atombijection = false 
)

sets the poses to be clustered, the conformation set's reference system will the base system

void BALL::PoseClustering::setDefaultOptions ( )

reset the options to default values

void BALL::PoseClustering::slinkInner_ ( int  current_level)
protected
void BALL::PoseClustering::storeSnapShotReferences_ ( )
protected
bool BALL::PoseClustering::trivialCompute_ ( )
protected
void BALL::PoseClustering::updateWardDistance_ ( ClusterTreeNode  parent,
ClusterTreeNode  i,
ClusterTreeNode  j,
Index  rmsd_type 
)
protected

Member Data Documentation

AtomBijection BALL::PoseClustering::atom_bijection_
protected

Definition at line 708 of file poseClustering.h.

SnapShot BALL::PoseClustering::base_conformation_
protected

Definition at line 673 of file poseClustering.h.

System BALL::PoseClustering::base_system_
protected

Definition at line 670 of file poseClustering.h.

std::vector< Index > BALL::PoseClustering::cluster_representatives_
protected

Definition at line 651 of file poseClustering.h.

std::vector< float > BALL::PoseClustering::cluster_scores_
protected

the scores of the clusters

Definition at line 654 of file poseClustering.h.

ClusterTree BALL::PoseClustering::cluster_tree_
protected

The tree built during hierarchical clustering.

Definition at line 715 of file poseClustering.h.

std::vector< std::set<Index> > BALL::PoseClustering::clusters_
protected

the clusters: sets of pose indices

Definition at line 649 of file poseClustering.h.

std::vector<Vector3> BALL::PoseClustering::com_
protected

Definition at line 701 of file poseClustering.h.

Eigen::Matrix3f BALL::PoseClustering::covariance_matrix_
protected

Definition at line 667 of file poseClustering.h.

ConformationSet* BALL::PoseClustering::current_set_
protected

the ConformationSet we wish to cluster

Definition at line 646 of file poseClustering.h.

bool BALL::PoseClustering::delete_conformation_set_
protected

Definition at line 680 of file poseClustering.h.

bool BALL::PoseClustering::has_rigid_transformations_
protected

Definition at line 676 of file poseClustering.h.

std::vector<double> BALL::PoseClustering::lambda_
protected

Definition at line 686 of file poseClustering.h.

std::vector<double> BALL::PoseClustering::mu_
protected

Definition at line 692 of file poseClustering.h.

Size BALL::PoseClustering::number_of_selected_atoms_
protected

Definition at line 696 of file poseClustering.h.

Options BALL::PoseClustering::options

options

Definition at line 414 of file poseClustering.h.

Eigen::MatrixXd BALL::PoseClustering::pairwise_scores_
protected

Definition at line 643 of file poseClustering.h.

std::vector<int> BALL::PoseClustering::pi_
protected

Definition at line 690 of file poseClustering.h.

std::vector<PosePointer> BALL::PoseClustering::poses_
protected

Definition at line 662 of file poseClustering.h.

Index BALL::PoseClustering::rmsd_level_of_detail_
protected

the RMSD definition used for clustering

Definition at line 657 of file poseClustering.h.

System BALL::PoseClustering::system_i_
protected

Definition at line 711 of file poseClustering.h.

System BALL::PoseClustering::system_j_
protected

Definition at line 712 of file poseClustering.h.

std::vector<RigidTransformation> BALL::PoseClustering::transformations_
protected

Definition at line 665 of file poseClustering.h.