OpenMS  2.7.0
QTCluster.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Steffen Sass, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 
36 #pragma once
37 
38 #include <OpenMS/CONCEPT/Types.h>
40 #include <OpenMS/OpenMSConfig.h>
41 #include <OpenMS/config.h>
42 
43 #include <boost/unordered_map.hpp>
44 
45 #include <map> // for multimap<>
46 #include <vector> // for vector<>
47 #include <set> // for set<>
48 #include <utility> // for pair<>
49 
50 namespace OpenMS
51 {
52  class GridFeature;
53 
54  // Boost switch since with 1.47 several classes got moved into a new
55  // boost::unordered namespace (specifically unordered_map).
56  namespace OpenMSBoost
57  {
58 #if OPENMS_BOOST_VERSION_MINOR > 47
59  using namespace boost::unordered;
60 #else
61  using namespace boost;
62 #endif
63  }
64 
65 
119  class OPENMS_DLLAPI QTCluster
120  {
121 public:
122 
123  // need to store more than one
124  typedef std::multimap<double, const GridFeature*> NeighborList;
125  typedef OpenMSBoost::unordered_map<Size, NeighborList> NeighborMapMulti;
126 
127  struct Neighbor
128  {
129  double distance;
131  };
132 
133  typedef OpenMSBoost::unordered_map<Size, Neighbor> NeighborMap;
134 
135  struct Element
136  {
139  };
140 
141  typedef std::vector<Element> Elements;
142 
149  class OPENMS_DLLAPI BulkData
150  {
151  friend class QTCluster;
152 
153  public:
154 
162  BulkData(const OpenMS::GridFeature* const center_point,
163  Size num_maps, double max_distance,
164  Int x_coord, Int y_coord, Size id);
165 
166  private:
167 
170 
173 
179 
188 
191 
194 
197 
200 
207  std::set<AASequence> annotations_;
208  };
209 
216  QTCluster(BulkData* const data, bool use_IDs);
217 
223  QTCluster() = delete;
224 
231  QTCluster(const QTCluster& rhs) = default;
232 
234  QTCluster& operator=(const QTCluster& rhs) = default;
235 
237  QTCluster(QTCluster&& rhs) = default;
238 
240  QTCluster& operator=(QTCluster&& rhs) = default;
241 
242  ~QTCluster() = default;
243 
245  const GridFeature* getCenterPoint() const;
246 
248  Size getId() const;
249 
251  double getCenterRT() const;
252 
254  double getCenterMZ() const;
255 
257  Int getXCoord() const;
258 
260  Int getYCoord() const;
261 
263  Size size() const;
264 
266  bool operator<(const QTCluster& cluster);
267 
274  void add(const GridFeature* const element, double distance);
275 
278 
286  bool update(const Elements& removed);
287 
289  double getQuality();
290 
292  double getCurrentQuality() const;
293 
295  const std::set<AASequence>& getAnnotations();
296 
304  void setInvalid();
305 
307  inline bool isInvalid() const
308  {
309  return !valid_;
310  }
311 
314 
317 
320 
321  private:
324 
345 
347  void makeSeqTable_(std::map<AASequence, std::map<Size,double>>& seq_table) const;
348 
351 
353  double quality_;
354 
357 
359  bool valid_;
360 
362  bool changed_;
363 
365  bool use_IDs_;
366 
377 
380  };
381 
382  // needed for the heap
383  bool operator<(const QTCluster& q1, const QTCluster& q2);
384 } // namespace OpenMS
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
Representation of a feature in a hash grid.
Definition: GridFeature.h:53
Class to store the bulk internal data (neighbors, annotations, etc.)
Definition: QTCluster.h:150
NeighborMapMulti tmp_neighbors_
Temporary map tracking *all* neighbors.
Definition: QTCluster.h:187
BulkData(const OpenMS::GridFeature *const center_point, Size num_maps, double max_distance, Int x_coord, Int y_coord, Size id)
Detailed constructor of the cluster body.
Int x_coord_
x coordinate in the grid cell
Definition: QTCluster.h:196
const GridFeature *const center_point_
Pointer to the cluster center.
Definition: QTCluster.h:169
std::set< AASequence > annotations_
Set of annotations of the cluster.
Definition: QTCluster.h:207
NeighborMap neighbors_
Map that keeps track of the best current feature for each map.
Definition: QTCluster.h:178
Size num_maps_
Number of input maps.
Definition: QTCluster.h:193
Int y_coord_
y coordinate in the grid cell
Definition: QTCluster.h:199
double max_distance_
Maximum distance of a point that can still belong to the cluster.
Definition: QTCluster.h:190
Size id_
unique id of this cluster
Definition: QTCluster.h:172
A representation of a QT cluster used for feature grouping.
Definition: QTCluster.h:120
std::vector< Element > Elements
Definition: QTCluster.h:141
const std::set< AASequence > & getAnnotations()
Return the set of peptide sequences annotated to the cluster center.
OpenMSBoost::unordered_map< Size, Neighbor > NeighborMap
Definition: QTCluster.h:133
void setInvalid()
Sets current cluster as invalid (also frees some memory)
Int getXCoord() const
Returns the x coordinate in the grid.
void initializeCluster()
Has to be called before adding elements (calling QTCluster::add)
bool operator<(const QTCluster &cluster)
Compare by quality.
double getCurrentQuality() const
Returns the cluster quality without recomputing.
QTCluster(const QTCluster &rhs)=default
Cheap copy ctor because most of the data lies outside of this class (BulkData*) Be very careful with ...
bool isInvalid() const
Whether current cluster is invalid.
Definition: QTCluster.h:307
bool valid_
Whether current cluster is valid.
Definition: QTCluster.h:359
void add(const GridFeature *const element, double distance)
Adds a new element/neighbor to the cluster.
double quality_
Quality of the cluster.
Definition: QTCluster.h:353
bool collect_annotations_
Whether initial collection of all neighbors is needed.
Definition: QTCluster.h:376
QTCluster & operator=(const QTCluster &rhs)=default
Cheap copy assignment, see copy ctor for details.
OpenMSBoost::unordered_map< Size, NeighborList > NeighborMapMulti
Definition: QTCluster.h:125
void recomputeNeighbors_()
report elements that are compatible with the optimal annotation
const GridFeature * getCenterPoint() const
Returns the cluster center.
void makeSeqTable_(std::map< AASequence, std::map< Size, double >> &seq_table) const
compute seq table, mapping: peptides -> best distance per input map
std::multimap< double, const GridFeature * > NeighborList
Definition: QTCluster.h:124
Size map_index
Definition: QTCluster.h:137
Size getId() const
returns the clusters id
void finalizeCluster()
Has to be called after adding elements (after calling QTCluster::add one or multiple times)
double distance
Definition: QTCluster.h:129
BulkData * data_
Pointer to data members.
Definition: QTCluster.h:356
double optimizeAnnotations_()
Finds the optimal annotation (peptide sequences) for the cluster.
~QTCluster()=default
Elements getAllNeighbors() const
Get all current neighbors.
bool changed_
Has the cluster changed (if yes, quality needs to be recomputed)?
Definition: QTCluster.h:362
bool use_IDs_
Keep track of peptide IDs and use them for matching?
Definition: QTCluster.h:365
bool update(const Elements &removed)
Updates the cluster after the indicated data points are removed.
Size size() const
Returns the size of the cluster (number of elements, incl. center)
QTCluster(BulkData *const data, bool use_IDs)
Detailed constructor of the cluster head.
double getQuality()
Returns the cluster quality and recomputes if necessary.
double getCenterMZ() const
Returns the m/z value of the cluster center.
QTCluster(QTCluster &&rhs)=default
cheap move ctor because most of the data lies outside of this class (BulkData*)
QTCluster & operator=(QTCluster &&rhs)=default
cheap move assignment because most of the data lies outside of this class (BulkData*)
Int getYCoord() const
Returns the y coordinate in the grid.
double getCenterRT() const
Returns the RT value of the cluster.
bool finalized_
Whether current cluster is accepting new elements or not (if true, no more new elements allowed)
Definition: QTCluster.h:379
const GridFeature * feature
Definition: QTCluster.h:130
void computeQuality_()
Computes the quality of the cluster.
QTCluster()=delete
Default constructor not accessible Objects of this class should only exist with a valid BulkData* giv...
Elements getElements() const
Gets the clustered elements meaning neighbors + cluster center.
Definition: QTCluster.h:136
Definition: QTCluster.h:128
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
bool operator<(const QTCluster &q1, const QTCluster &q2)