OpenMS
QTClusterFinder.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Steffen Sass, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 #include <boost/heap/fibonacci_heap.hpp>
45 #include <unordered_map>
46 
47 #include <list>
48 #include <vector>
49 #include <unordered_set>
50 #include <utility> // for pair<>
51 
52 namespace OpenMS
53 {
54 
104  class OPENMS_DLLAPI QTClusterFinder :
105  public BaseGroupFinder
106  {
107  public:
108 
110  typedef std::unordered_map<
111  std::pair<OpenMS::GridFeature*, OpenMS::GridFeature*>,
112  double> PairDistances;
113 
115  typedef std::unordered_map<
116  const OpenMS::GridFeature*, std::unordered_set<Size> > ElementMapping;
117 
119  typedef boost::heap::fibonacci_heap<QTCluster> Heap;
120 
122 
123  private:
126 
128  bool use_IDs_;
129 
130  //TODO we could also bin by equal sized RT bins, or by a fixed RT size
131  //TODO could be made dependent on nr. of maps (e.g. with 5 maps you get [around] 4 diffs per ID already)
134 
136  double min_score_;
137 
142 
144  double max_diff_rt_;
145 
147  double max_diff_mz_;
148 
151 
154 
156  std::unordered_set<const OpenMS::GridFeature*> already_used_;
157 
160  std::map<double, double> bin_tolerances_;
161 
165  double getDistance_(const OpenMS::GridFeature* left, const
166  OpenMS::GridFeature* right);
167 
169  void setParameters_(double max_intensity, double max_mz);
170 
182  bool makeConsensusFeature_(Heap& cluster_heads,
183  ConsensusFeature& feature,
184  ElementMapping& element_mapping,
185  const Grid& grid,
186  const std::vector<Heap::handle_type>& handles);
187 
197  void computeClustering_(const Grid& grid,
198  Heap& cluster_heads,
199  std::vector<QTCluster::BulkData>& cluster_data,
200  std::vector<Heap::handle_type>& handles,
201  ElementMapping& element_mapping);
202 
210  ElementMapping& element_mapping);
211 
221  void createConsensusFeature_(ConsensusFeature& feature, const double quality,
222  const QTCluster::Elements& elements);
223 
242  void updateClustering_(ElementMapping& element_mapping,
243  const Grid& grid,
244  const QTCluster::Elements& elements,
245  Heap& cluster_heads,
246  const std::vector<Heap::handle_type>& handles,
247  Size best_id);
248 
250  template <typename MapType>
251  void run_(const std::vector<MapType>& input_maps, ConsensusMap& result_map);
252 
254  template <typename MapType>
255  void run_internal_(const std::vector<MapType>& input_maps,
256  ConsensusMap& result_map, bool do_progress);
257 
264  void addClusterElements_(const Grid& grid, QTCluster& cluster);
265 
269  bool distIsOutlier_(double dist, double rt);
270 
271 protected:
272 
273  enum
274  {
276  MZ = Peak2D::MZ
277  };
278 
279 public:
280 
283 
285  ~QTClusterFinder() override;
286 
288  static const String getProductName()
289  {
290  return "qt";
291  }
292 
300  void run(const std::vector<ConsensusMap>& input_maps,
301  ConsensusMap& result_map) override;
302 
310  void run(const std::vector<FeatureMap>& input_maps,
311  ConsensusMap& result_map);
312 
315  {
316  return new QTClusterFinder();
317  }
318  };
319 } // namespace OpenMS
320 
The base class of all element group finding algorithms.
Definition: BaseGroupFinder.h:64
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:71
A container for consensus elements.
Definition: ConsensusMap.h:92
A functor class for the calculation of distances between features or consensus features.
Definition: FeatureDistance.h:91
Representation of a feature in a hash grid.
Definition: GridFeature.h:53
Container for (2-dimensional coordinate, value) pairs.
Definition: HashGrid.h:61
@ MZ
Mass-to-charge dimension id (1 if used as a const int)
Definition: Peak2D.h:76
@ RT
Retention time dimension id (0 if used as a const int)
Definition: Peak2D.h:75
A variant of QT clustering for the detection of feature groups.
Definition: QTClusterFinder.h:106
HashGrid< OpenMS::GridFeature * > Grid
Definition: QTClusterFinder.h:121
std::unordered_map< std::pair< OpenMS::GridFeature *, OpenMS::GridFeature * >, double > PairDistances
Distances between pairs of grid features.
Definition: QTClusterFinder.h:112
bool makeConsensusFeature_(Heap &cluster_heads, ConsensusFeature &feature, ElementMapping &element_mapping, const Grid &grid, const std::vector< Heap::handle_type > &handles)
Extract the best cluster from cluster_heads and turn it into a consensus feature.
void run_internal_(const std::vector< MapType > &input_maps, ConsensusMap &result_map, bool do_progress)
Runs the algorithm on feature maps or consensus maps (internal)
double max_diff_mz_
Maximum m/z difference.
Definition: QTClusterFinder.h:147
void computeClustering_(const Grid &grid, Heap &cluster_heads, std::vector< QTCluster::BulkData > &cluster_data, std::vector< Heap::handle_type > &handles, ElementMapping &element_mapping)
Computes an initial QT clustering of the points in the hash grid.
~QTClusterFinder() override
Destructor.
void createConsensusFeature_(ConsensusFeature &feature, const double quality, const QTCluster::Elements &elements)
creates a consensus feature from the given elements
int nr_partitions_
Maximum m/z difference.
Definition: QTClusterFinder.h:150
void run(const std::vector< FeatureMap > &input_maps, ConsensusMap &result_map)
Runs the algorithm on feature maps.
double getDistance_(const OpenMS::GridFeature *left, const OpenMS::GridFeature *right)
Calculates the distance between two grid features.
std::map< double, double > bin_tolerances_
Definition: QTClusterFinder.h:160
Size min_nr_diffs_per_bin_
Min. nr. of differences from matched IDs requested to calculate a linking tolerance per RT bin.
Definition: QTClusterFinder.h:133
std::unordered_set< const OpenMS::GridFeature * > already_used_
Set of features already used.
Definition: QTClusterFinder.h:156
double min_score_
Min. score for an ID to be considered for tolerance estimation.
Definition: QTClusterFinder.h:136
void setParameters_(double max_intensity, double max_mz)
Sets algorithm parameters.
bool use_IDs_
Consider peptide identifications for grouping?
Definition: QTClusterFinder.h:128
FeatureDistance feature_distance_
Feature distance functor.
Definition: QTClusterFinder.h:153
double noID_penalty_
Definition: QTClusterFinder.h:141
boost::heap::fibonacci_heap< QTCluster > Heap
Heap to efficiently find the best clusters.
Definition: QTClusterFinder.h:119
void run(const std::vector< ConsensusMap > &input_maps, ConsensusMap &result_map) override
Runs the algorithm on consensus maps.
std::unordered_map< const OpenMS::GridFeature *, std::unordered_set< Size > > ElementMapping
Map to store which grid features are next to which clusters (saves the clusters ids)
Definition: QTClusterFinder.h:116
void updateClustering_(ElementMapping &element_mapping, const Grid &grid, const QTCluster::Elements &elements, Heap &cluster_heads, const std::vector< Heap::handle_type > &handles, Size best_id)
update the clustering:
double max_diff_rt_
Maximum RT difference.
Definition: QTClusterFinder.h:144
QTClusterFinder()
Constructor.
Size num_maps_
Number of input maps.
Definition: QTClusterFinder.h:125
void removeFromElementMapping_(const QTCluster &cluster, ElementMapping &element_mapping)
Removes id of current top cluster in the heap from element mapping.
void addClusterElements_(const Grid &grid, QTCluster &cluster)
Adds elements to the cluster based on the elements hashed in the grid.
static BaseGroupFinder * create()
Returns an instance of this class.
Definition: QTClusterFinder.h:314
bool distIsOutlier_(double dist, double rt)
Looks up the matching bin for rt in bin_tolerances_ and checks if dist is in the allowed range.
void run_(const std::vector< MapType > &input_maps, ConsensusMap &result_map)
Runs the algorithm on feature maps or consensus maps.
static const String getProductName()
Returns the name of the product.
Definition: QTClusterFinder.h:288
A representation of a QT cluster used for feature grouping.
Definition: QTCluster.h:108
std::vector< Element > Elements
Definition: QTCluster.h:129
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
@ RT
RT in seconds.