OpenMS
QTClusterFinder.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hendrik Weisser $
6 // $Authors: Steffen Sass, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
17 
18 #include <boost/heap/fibonacci_heap.hpp>
19 #include <unordered_map>
20 
21 #include <list>
22 #include <vector>
23 #include <unordered_set>
24 #include <utility> // for pair<>
25 
26 namespace OpenMS
27 {
28 
78  class OPENMS_DLLAPI QTClusterFinder :
79  public BaseGroupFinder
80  {
81  public:
82 
84  typedef std::unordered_map<
85  std::pair<OpenMS::GridFeature*, OpenMS::GridFeature*>,
86  double> PairDistances;
87 
89  typedef std::unordered_map<
90  const OpenMS::GridFeature*, std::unordered_set<Size> > ElementMapping;
91 
93  typedef boost::heap::fibonacci_heap<QTCluster> Heap;
94 
96 
97  private:
100 
102  bool use_IDs_;
103 
104  //TODO we could also bin by equal sized RT bins, or by a fixed RT size
105  //TODO could be made dependent on nr. of maps (e.g. with 5 maps you get [around] 4 diffs per ID already)
108 
110  double min_score_;
111 
116 
118  double max_diff_rt_;
119 
121  double max_diff_mz_;
122 
125 
128 
130  std::unordered_set<const OpenMS::GridFeature*> already_used_;
131 
134  std::map<double, double> bin_tolerances_;
135 
139  double getDistance_(const OpenMS::GridFeature* left, const
140  OpenMS::GridFeature* right);
141 
143  void setParameters_(double max_intensity, double max_mz);
144 
156  bool makeConsensusFeature_(Heap& cluster_heads,
157  ConsensusFeature& feature,
158  ElementMapping& element_mapping,
159  const Grid& grid,
160  const std::vector<Heap::handle_type>& handles);
161 
171  void computeClustering_(const Grid& grid,
172  Heap& cluster_heads,
173  std::vector<QTCluster::BulkData>& cluster_data,
174  std::vector<Heap::handle_type>& handles,
175  ElementMapping& element_mapping);
176 
184  ElementMapping& element_mapping);
185 
195  void createConsensusFeature_(ConsensusFeature& feature, const double quality,
196  const QTCluster::Elements& elements);
197 
216  void updateClustering_(ElementMapping& element_mapping,
217  const Grid& grid,
218  const QTCluster::Elements& elements,
219  Heap& cluster_heads,
220  const std::vector<Heap::handle_type>& handles,
221  Size best_id);
222 
224  template <typename MapType>
225  void run_(const std::vector<MapType>& input_maps, ConsensusMap& result_map);
226 
228  template <typename MapType>
229  void run_internal_(const std::vector<MapType>& input_maps,
230  ConsensusMap& result_map, bool do_progress);
231 
238  void addClusterElements_(const Grid& grid, QTCluster& cluster);
239 
243  bool distIsOutlier_(double dist, double rt);
244 
245 protected:
246 
247  enum
248  {
250  MZ = Peak2D::MZ
251  };
252 
253 public:
254 
257 
259  ~QTClusterFinder() override;
260 
262  static const String getProductName()
263  {
264  return "qt";
265  }
266 
274  void run(const std::vector<ConsensusMap>& input_maps,
275  ConsensusMap& result_map) override;
276 
284  void run(const std::vector<FeatureMap>& input_maps,
285  ConsensusMap& result_map);
286 
289  {
290  return new QTClusterFinder();
291  }
292  };
293 } // namespace OpenMS
294 
The base class of all element group finding algorithms.
Definition: BaseGroupFinder.h:38
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:45
A container for consensus elements.
Definition: ConsensusMap.h:66
A functor class for the calculation of distances between features or consensus features.
Definition: FeatureDistance.h:65
Representation of a feature in a hash grid.
Definition: GridFeature.h:27
Container for (2-dimensional coordinate, value) pairs.
Definition: HashGrid.h:35
@ MZ
Mass-to-charge dimension id (1 if used as a const int)
Definition: Peak2D.h:50
@ RT
Retention time dimension id (0 if used as a const int)
Definition: Peak2D.h:49
A variant of QT clustering for the detection of feature groups.
Definition: QTClusterFinder.h:80
HashGrid< OpenMS::GridFeature * > Grid
Definition: QTClusterFinder.h:95
std::unordered_map< std::pair< OpenMS::GridFeature *, OpenMS::GridFeature * >, double > PairDistances
Distances between pairs of grid features.
Definition: QTClusterFinder.h:86
bool makeConsensusFeature_(Heap &cluster_heads, ConsensusFeature &feature, ElementMapping &element_mapping, const Grid &grid, const std::vector< Heap::handle_type > &handles)
Extract the best cluster from cluster_heads and turn it into a consensus feature.
void run_internal_(const std::vector< MapType > &input_maps, ConsensusMap &result_map, bool do_progress)
Runs the algorithm on feature maps or consensus maps (internal)
double max_diff_mz_
Maximum m/z difference.
Definition: QTClusterFinder.h:121
void computeClustering_(const Grid &grid, Heap &cluster_heads, std::vector< QTCluster::BulkData > &cluster_data, std::vector< Heap::handle_type > &handles, ElementMapping &element_mapping)
Computes an initial QT clustering of the points in the hash grid.
~QTClusterFinder() override
Destructor.
void createConsensusFeature_(ConsensusFeature &feature, const double quality, const QTCluster::Elements &elements)
creates a consensus feature from the given elements
int nr_partitions_
Maximum m/z difference.
Definition: QTClusterFinder.h:124
void run(const std::vector< FeatureMap > &input_maps, ConsensusMap &result_map)
Runs the algorithm on feature maps.
double getDistance_(const OpenMS::GridFeature *left, const OpenMS::GridFeature *right)
Calculates the distance between two grid features.
std::map< double, double > bin_tolerances_
Definition: QTClusterFinder.h:134
Size min_nr_diffs_per_bin_
Min. nr. of differences from matched IDs requested to calculate a linking tolerance per RT bin.
Definition: QTClusterFinder.h:107
std::unordered_set< const OpenMS::GridFeature * > already_used_
Set of features already used.
Definition: QTClusterFinder.h:130
double min_score_
Min. score for an ID to be considered for tolerance estimation.
Definition: QTClusterFinder.h:110
void setParameters_(double max_intensity, double max_mz)
Sets algorithm parameters.
bool use_IDs_
Consider peptide identifications for grouping?
Definition: QTClusterFinder.h:102
FeatureDistance feature_distance_
Feature distance functor.
Definition: QTClusterFinder.h:127
double noID_penalty_
Definition: QTClusterFinder.h:115
boost::heap::fibonacci_heap< QTCluster > Heap
Heap to efficiently find the best clusters.
Definition: QTClusterFinder.h:93
void run(const std::vector< ConsensusMap > &input_maps, ConsensusMap &result_map) override
Runs the algorithm on consensus maps.
std::unordered_map< const OpenMS::GridFeature *, std::unordered_set< Size > > ElementMapping
Map to store which grid features are next to which clusters (saves the clusters ids)
Definition: QTClusterFinder.h:90
void updateClustering_(ElementMapping &element_mapping, const Grid &grid, const QTCluster::Elements &elements, Heap &cluster_heads, const std::vector< Heap::handle_type > &handles, Size best_id)
update the clustering:
double max_diff_rt_
Maximum RT difference.
Definition: QTClusterFinder.h:118
QTClusterFinder()
Constructor.
Size num_maps_
Number of input maps.
Definition: QTClusterFinder.h:99
void removeFromElementMapping_(const QTCluster &cluster, ElementMapping &element_mapping)
Removes id of current top cluster in the heap from element mapping.
void addClusterElements_(const Grid &grid, QTCluster &cluster)
Adds elements to the cluster based on the elements hashed in the grid.
static BaseGroupFinder * create()
Returns an instance of this class.
Definition: QTClusterFinder.h:288
bool distIsOutlier_(double dist, double rt)
Looks up the matching bin for rt in bin_tolerances_ and checks if dist is in the allowed range.
void run_(const std::vector< MapType > &input_maps, ConsensusMap &result_map)
Runs the algorithm on feature maps or consensus maps.
static const String getProductName()
Returns the name of the product.
Definition: QTClusterFinder.h:262
A representation of a QT cluster used for feature grouping.
Definition: QTCluster.h:82
std::vector< Element > Elements
Definition: QTCluster.h:103
A more convenient string class.
Definition: String.h:34
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
@ RT
RT in seconds.