BALL  1.4.79
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
poseClustering.h
Go to the documentation of this file.
1 // -*- Mode: C++; tab-width: 2; -*-
2 // vi: set ts=2:
3 //
4 
5 #ifndef BALL_DOCKING_POSECLUSTERING_H
6 #define BALL_DOCKING_POSECLUSTERING_H
7 
8 #ifndef BALL_DATATYPE_OPTIONS_H
9 # include <BALL/DATATYPE/options.h>
10 #endif
11 
12 #ifndef BALL_DOCKING_COMMON_CONFORMATIONSET_H
14 #endif
15 
16 #ifndef BALL_MOLMEC_COMMON_SNAPSHOT_H
18 #endif
19 
20 #ifndef BALL_STRUCTURE_ATOMBIJECTION_H
22 #endif
23 
24 #ifndef BALL_KERNEL_SYSTEM_H
25 # include <BALL/KERNEL/system.h>
26 #endif
27 
28 #ifndef BALL_DATATYPE_STRING_H
29 # include <BALL/DATATYPE/string.h>
30 #endif
31 
32 #ifndef BALL_MATHS_VECTOR3_H
33 # include <BALL/MATHS/vector3.h>
34 #endif
35 
36 #ifndef BALL_MATHS_MATRIX44_H
37 # include <BALL/MATHS/matrix44.h>
38 #endif
39 
40 #include <Eigen/Core>
41 
42 #include <boost/shared_ptr.hpp>
43 #include <boost/variant.hpp>
44 #include <boost/graph/adjacency_list.hpp>
45 
46 #include <set>
47 #include <iostream>
48 
49 #ifdef BALL_HAS_TBB
50 # include <tbb/parallel_reduce.h>
51 # include <tbb/blocked_range.h>
52 #endif
53 
54 //#define POSECLUSTERING_DEBUG 1
55 #undef POSECLUSTERING_DEBUG
56 
57 namespace BALL
58 {
117  {
118  public:
122  struct BALL_EXPORT Option
124  {
127  static const String CLUSTER_METHOD;
128 
132 
136 
139  static const String RMSD_TYPE;
140 
143  static const String RUN_PARALLEL;
144 
145  };
146 
149  {
150  static const Index CLUSTER_METHOD;
151  static const float DISTANCE_THRESHOLD;
153  static const Index RMSD_TYPE;
154  static const bool RUN_PARALLEL;
155  static const bool USE_CENTER_OF_MASS_PRECLINK;
156  };
157 
158  enum BALL_EXPORT RMSDType
159  {
161  RIGID_RMSD,
162  CENTER_OF_MASS_DISTANCE
163  };
164 
165  enum BALL_EXPORT RMSDLevelOfDetail
166  {
167  C_ALPHA, //=0
168  HEAVY_ATOMS,
169  BACKBONE,
170  ALL_ATOMS,
171  PROPERTY_BASED_ATOM_BIJECTION
172  };
173 
174  enum BALL_EXPORT ClusterMethod
175  {
177  SLINK_SIBSON,
178  CLINK_DEFAYS,
180  CLINK_ALTHAUS
181  };
182 
184  {
185  public:
187 
188  RigidTransformation(Eigen::Vector3f const& new_trans, Eigen::Matrix3f const& new_rot)
189  : translation(new_trans),
190  rotation(new_rot)
191  {}
192 
193  Eigen::Vector3f translation;
194  Eigen::Matrix3f rotation;
195  };
196 
202  {
203  public:
204  PosePointer(RigidTransformation const* new_trafo, SnapShot const* new_snap = 0)
205  : trafo(new_trafo),
206  snap(new_snap)
207  { }
208 
209  PosePointer(SnapShot const* new_snap)
210  : trafo(0),
211  snap(new_snap)
212  { }
213 
215  SnapShot const* snap;
216  };
217 
219  {
220  public:
223  template <class Archive>
224  void serialize(Archive& ar, const unsigned int version);
225 
228  std::set<Index> poses;
229 
233 
242  boost::variant<Eigen::VectorXf, RigidTransformation> center;
243 
246  float merged_at;
247 #ifdef POSECLUSTERING_DEBUG
248 
250  float current_cluster_id;
251 #endif
252  };
253 
254  typedef boost::adjacency_list<boost::vecS,
255  boost::vecS,
256  boost::directedS,
258  boost::no_property,
259  unsigned int> ClusterTree;
260 
261  typedef ClusterTree::vertex_descriptor ClusterTreeNode;
262 
264 
267 
269  PoseClustering();
270 
273  PoseClustering(ConformationSet* poses, float rmsd);
274 
276  PoseClustering(System const& base_system, String transformation_file_name);
277 
279  virtual ~PoseClustering();
281 
282 
285 
288  bool compute();
289 
291 
295 
297  void setConformationSet(ConformationSet* new_set, bool precompute_atombijection = false);
298 
303  void setBaseSystemAndPoses(System const& base_system, std::vector<PosePointer> const& poses);
304 
307  // will be applied upon the current conformation
308  void setBaseSystemAndTransformations(System const& base_system, String transformation_file_name);
309 
311  const ConformationSet* getConformationSet() const {return current_set_;}
312 
314  ConformationSet* getConformationSet() {return current_set_;}
315 
317  const std::vector<RigidTransformation> & getRigidTransformations() const {return transformations_;}
318 
320  std::vector<Vector3> & getCentersOfMass() {return com_;}
321 
323  std::vector<Vector3> const & getCentersOfMass() const {return com_;}
324 
326  const System& getSystem() const;
327 
329  System& getSystem();
330 
332  Size getNumberOfPoses() const {return poses_.size();}
333 
335  Size getNumberOfClusters() const {return clusters_.size();}
336 
339  const std::set<Index>& getCluster(Index i) const;
340 
343  std::set<Index>& getCluster(Index i);
344 
346  Size getClusterSize(Index i) const;
347 
349  float getClusterScore(Index i) const;
350 
352  float getScore(const System sys_a, const System sys_b, Options options) const;
353 
355  AtomBijection& getAtomBijection() {return atom_bijection_;}
356 
358  AtomBijection const& getAtomBijection() const {return atom_bijection_;}
359 
361  void applyTransformation2System(Index i, System& target_system);
362 
364  void convertTransformations2Snaphots();
365 
367  void convertSnaphots2Transformations();
368 
370  float computeCompleteLinkageRMSD(Index i, Options options, bool initialize = true);
371 
373  //float computeCompleteLinkageRMSD(boost::shared_ptr<ConformationSet> cluster, Option options) const;
374 
376  boost::shared_ptr<System> getPose(Index i) const;
377 
379  std::vector<PosePointer> const& getPoses() const {return poses_;}
380 
382  boost::shared_ptr<System> getClusterRepresentative(Index i);
383 
385  Index findClusterRepresentative(Index i);
386 
388  boost::shared_ptr<ConformationSet> getClusterConformationSet(Index i);
389 
391  boost::shared_ptr<ConformationSet> getReducedConformationSet();
392 
406  bool refineClustering(Options const& refined_options);
407 
409 
413  Options options;
415 
418  void setDefaultOptions();
419 
421 
424 
430  static float getRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
431 
437  static float getSquaredRigidRMSD(Eigen::Vector3f const& t_ab, Eigen::Matrix3f const& M_ab, Eigen::Matrix3f const& covariance_matrix);
438 
441  static Eigen::Matrix3f computeCovarianceMatrix(System const& system, Index rmsd_level_of_detail = C_ALPHA);
442 
444 
447 
453  std::vector<std::set<Index> > extractClustersForThreshold(float threshold, Size min_size = 0);
454 
458  std::vector<std::set<Index> > extractNBestClusters(Size n);
459 
463  std::vector<std::set<Index> > filterClusters(Size min_size = 1);
464 
467  void serializeWardClusterTree(std::ostream& out, bool binary = false);
468 
471  void deserializeWardClusterTree(std::istream& in, bool binary = false);
472 
475  void exportWardClusterTreeToGraphViz(std::ostream& out);
476 
478 
479 
482  void printClusters(std::ostream& out = std::cout) const;
483 
486  void printClusterScores(std::ostream& out = std::cout);
487 
488 
489  protected:
490 
491 #ifdef BALL_HAS_TBB
492 
494  class ComputeNearestClusterTask_
495  {
496  public:
498  ComputeNearestClusterTask_(PoseClustering* parent,
499  const std::vector<ClusterTreeNode>& active_clusters,
500  Position current_cluster,
501  Index rmsd_type);
502 
504  ComputeNearestClusterTask_(ComputeNearestClusterTask_& cnct, tbb::split);
505 
507  void join(ComputeNearestClusterTask_ const& cnct);
508 
510  void operator() (const tbb::blocked_range<size_t>& r);
511 
513  Position getMinIndex() {return my_min_index_;}
514 
516  float getMinValue() {return my_min_value_;}
517 
518  protected:
519  // the PoseClustering instance that called us
520  PoseClustering* parent_;
521 
522  // the array we work on
523  const std::vector<ClusterTreeNode>& active_clusters_;
524 
525  // the cluster to compare to everything else
526  Position current_cluster_;
527 
528  // the kind of rmsd computation desired
529  Index rmsd_type_;
530 
531  // the minimum index in our own block
532  Position my_min_index_;
533 
534  // the minimum value in our own block
535  float my_min_value_;
536  };
537 #endif
538 
542  {
543  public:
544  ClusterTreeWriter_(ClusterTree const* cluster_tree)
545  : cluster_tree_(cluster_tree)
546  { }
547 
548  void operator() (std::ostream& out, const ClusterTreeNode& v) const;
549 
550  protected:
552  };
553 
557  {
558  public:
560  : cluster_tree_(&cluster_tree)
561  {}
562 
563  bool operator() (ClusterTreeNode const first, ClusterTreeNode const second) const
564  {
565  float first_value = (*cluster_tree_)[ first].merged_at;
566  float second_value = (*cluster_tree_)[second].merged_at;
567 
568  return first_value < second_value;
569  }
570 
571  protected:
573  };
574 
575  // trivial complete linkage implementation
576  // with O(n^2) space request
577  bool trivialCompute_();
578 
579  // space efficient (SLINK or CLINK) clustering
580  bool linearSpaceCompute_();
581 
582  //
583  bool althausCompute_();
584 
585  // implementation of a single linkage clustering as described in
586  // R. Sibson: SLINK: an optimally efficient algorithm for the single-link cluster method.
587  // The Computer Journal. 16, 1, British Computer Society, 1973, p. 30-34
588  void slinkInner_(int current_level);
589 
590  // implementation of a complete linkage clustering as described in
591  // D. Defays: An efficient algorithm for a complete link method.
592  // The Computer Journal. 20, 4, British Computer Society, 1977, p. 364-366.
593  void clinkInner_(int current_level);
594 
595  // implememtation of the nearest neighbor chain clustering algorithm
596  // as described in:
597  // Murtagh, Fionn (1983): "A survey of recent advances in hierarchical clustering algorithms",
598  // The Computer Journal 26 (4): 354–359
599  bool nearestNeighborChainCompute_();
600 
601  void initWardDistance_(Index rmsd_type);
602 
603  void updateWardDistance_(ClusterTreeNode parent, ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
604 
605  float computeWardDistance_(ClusterTreeNode i, ClusterTreeNode j, Index rmsd_type);
606 
607  std::set<Index> collectClusterBelow_(ClusterTreeNode const& v);
608 
609  // compute the center of masses
610  void computeCenterOfMasses_();
611 
612  // precompute an atom bijection for faster access
613  void precomputeAtomBijection_();
614 
615  // check the given atom wrt choice of option RMSD_LEVEL_OF_DETAIL
616  bool static isExcludedByLevelOfDetail_(Atom const* atom, Index rmsd_level_of_detail);
617 
618  // distance between cluster i and cluster j
619  float getClusterRMSD_(Index i, Index j, Index rmsd_type);
620 
621  // reads the poses given as transformations from a file
622  // Note: the previously given system will be taken
623  // as untransformed reference, e.g. all transformations
624  // will be applied upon the current conformation
625  bool readTransformationsFromFile_(String filename);
626 
627  // compute the RMSD between two "poses"
628  float getRMSD_(Index i, Index j, Index rmsd_type);
629 
630  // store pointers to the snapshots in the poses vector
631  void storeSnapShotReferences_();
632 
633  //
634  void printCluster_(Index i, std::ostream& out = std::cout) const;
635 
636  //
637  void printVariables_(int a, int b, double c, int d, double e, int current_level);
638 
639  //
640  void clear_();
641 
642  // only used by trivial clustering
643  Eigen::MatrixXd pairwise_scores_;
644 
647 
649  std::vector< std::set<Index> > clusters_;
650 
651  std::vector< Index > cluster_representatives_;
652 
654  std::vector< float > cluster_scores_;
655 
658 
659 
660  // ----- unified access to the poses, independent of their type
661  // (the poses are either stored in transformations_, or in current_set_)
662  std::vector<PosePointer> poses_;
663 
664  // ----- data structure for transformation input (instead of snapshots)
665  std::vector<RigidTransformation> transformations_;
666 
667  Eigen::Matrix3f covariance_matrix_;
668 
669  // TODO: maybe use a const - ptr instead?
671 
672  // the reference state
674 
675  // flag indicating the use of transformation as input
677 
678  // do we need to delete the conformation set, that was
679  // created by converting transformations to snapshots
681 
682  // ------ data structures for slink and clink
683 
684  // stores the distance at which this indexed element has longer
685  // the largest index of its cluster
686  std::vector<double> lambda_;
687 
688  // the index of the cluster representative at merge-time
689  // (element with largest index)
690  std::vector<int> pi_;
691 
692  std::vector<double> mu_;
693 
694 
695  // ----- data structures for nearest neighbor chain ward
697 
698  // ----- data structure for CENTER_OF_GRAVITY_CLINK
699 
700  // the geometric center of mass
701  std::vector<Vector3> com_;
702 
703  // ----- general data structures
704 
705  // We cache the atom bijection for faster
706  // RMSD computation; this is possible, since the system topology does
707  // not change
709 
710  // helper dummies to speed up snapshot application
713 
716  }; //class PoseClustering
717 } //namesspace BALL
718 
719 #endif // BALL_DOCKING_POSECLUSTERING_H
#define BALL_CREATE(name)
Definition: create.h:62
std::vector< double > mu_
std::vector< double > lambda_
static const String RMSD_TYPE
std::vector< Vector3 > & getCentersOfMass()
returns the centers of mass-vector (non-empty only for CENTER_OF_MASS_DISTANCE)
static const Index RMSD_TYPE
std::vector< PosePointer > const & getPoses() const
returns poses as PosePointer
RIGID_RMSD
static const bool RUN_PARALLEL
const std::vector< RigidTransformation > & getRigidTransformations() const
returns the poses as rigid transformations
CLINK_DEFAYS
std::vector< std::set< Index > > clusters_
the clusters: sets of pose indices
std::vector< Vector3 > com_
static const String CLUSTER_METHOD
RigidTransformation(Eigen::Vector3f const &new_trans, Eigen::Matrix3f const &new_rot)
static const String RUN_PARALLEL
boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, ClusterProperties, boost::no_property, unsigned int > ClusterTree
TRIVIAL_COMPLETE_LINKAGE
Eigen::Matrix3f covariance_matrix_
static const float DISTANCE_THRESHOLD
std::vector< int > pi_
SLINK_SIBSON
const ConformationSet * getConformationSet() const
returns the poses to be clustered as ConformationSet
ClusterTreeWriter_(ClusterTree const *cluster_tree)
ClusterTreeNodeComparator(ClusterTree &cluster_tree)
Index rmsd_level_of_detail_
the RMSD definition used for clustering
RigidTransformation const * trafo
HEAVY_ATOMS
PosePointer(RigidTransformation const *new_trafo, SnapShot const *new_snap=0)
BALL_EXTERN_VARIABLE const double c
Definition: constants.h:149
static const Index CLUSTER_METHOD
std::vector< Index > cluster_representatives_
BACKBONE
boost::variant< Eigen::VectorXf, RigidTransformation > center
AtomBijection const & getAtomBijection() const
returns a const reference to the cached AtomBijection
Eigen::MatrixXd pairwise_scores_
Computation of clusters of docking poses.
ConformationSet * current_set_
the ConformationSet we wish to cluster
static const String DISTANCE_THRESHOLD
Size getNumberOfClusters() const
returns the number of clusters found
Size getNumberOfPoses() const
returns the number of poses
static const String RMSD_LEVEL_OF_DETAIL
SNAPSHOT_RMSD
PosePointer(SnapShot const *new_snap)
ConformationSet * getConformationSet()
returns the poses to be clustered as ConformationSet
std::vector< Vector3 > const & getCentersOfMass() const
returns the centers of mass-vector, const version (non-empty only for CENTER_OF_MASS_DISTANCE) ...
std::vector< float > cluster_scores_
the scores of the clusters
std::vector< PosePointer > poses_
static const Index RMSD_LEVEL_OF_DETAIL
static const bool USE_CENTER_OF_MASS_PRECLINK
C_ALPHA
ClusterTree cluster_tree_
The tree built during hierarchical clustering.
ALL_ATOMS
std::vector< RigidTransformation > transformations_
AtomBijection & getAtomBijection()
returns a reference to the cached AtomBijection
BALL_INDEX_TYPE Index
Default values for options.
NEAREST_NEIGHBOR_CHAIN_WARD
ClusterTree::vertex_descriptor ClusterTreeNode
#define BALL_EXPORT
Definition: COMMON/global.h:50
AtomBijection atom_bijection_
BALL_SIZE_TYPE Position