OpenMS  2.7.0
PeakTypeEstimator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
38 
39 #include <cmath>
40 #include <numeric>
41 
42 namespace OpenMS
43 {
49  class OPENMS_DLLAPI PeakTypeEstimator
50  {
51 public:
68  template <typename PeakConstIterator>
69  static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator& begin, const PeakConstIterator& end)
70  {
71  typedef typename PeakConstIterator::value_type PeakT;
72  // abort if there are less than 5 peak in the iterator range
73  if (end - begin < 5)
74  {
76  }
77 
78  const int max_peaks = 5; // maximal number of peaks we are looking at
79  int profile_evidence = 0; // number of peaks found to be profile
80  int centroid_evidence = 0; // number of peaks found to be centroided
81 
82  // copy data, since we need to modify
83  std::vector<PeakT> data(begin, end);
84  // total intensity of spectrum
85  double total_int = std::accumulate(begin, end, 0.0, [](double int_, const PeakT& p) { return int_ + p.getIntensity(); } );
86  double explained_int = 0;
87  // get the 5 highest peaks
88  for (int i = 0; i < max_peaks; ++i)
89  {
90  // stop if we explained +50% of all intensity
91  // (due to danger of interpreting noise - usually wrongly classified as centroided data)
92  if (explained_int > 0.5 * total_int) break;
93 
94  double int_max = 0;
95  Size idx = std::numeric_limits<Size>::max();
96  // find highest peak position
97  for (Size i = 0; i < data.size(); ++i)
98  {
99  if (data[i].getIntensity() > int_max)
100  {
101  int_max = data[i].getIntensity();
102  idx = i;
103  }
104  }
105  // no more peaks
106  if (idx == std::numeric_limits<Size>::max()) break;
107 
108  // check left and right peak shoulders and count number of sample points
109  typedef typename std::vector<PeakT>::iterator PeakIterator; // non-const version, since we need to modify the peaks
110  PeakIterator it_max = data.begin() + idx;
111  PeakIterator it = it_max;
112  double int_last = int_max;
113  while (it != data.begin()
114  && it->getIntensity() <= int_last // at most 100% of last sample point
115  && it->getIntensity() > 0
116  && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
117  && it->getMZ() + 1 > it_max->getMZ()) // at most 1 Th away
118  {
119  int_last = it->getIntensity();
120  explained_int += int_last;
121  it->setIntensity(0); // remove peak from future consideration
122  --it;
123  }
124  // if the current point is rising again, restore the intensity of the
125  // previous 'sink' point (because it could belong to a neighbour peak)
126  // e.g. imagine intensities: 1-2-3-2-1-2-4-2-1. We do not want to destroy the middle '1'
127  if (it->getIntensity() > int_last) (it+1)->setIntensity(int_last);
128 
129  //std::cerr << " Peak candidate: " << it_max->getMZ() << " ...";
130  bool break_left = false;
131  if (it_max - it < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
132  { // fewer than two sampling points on left shoulder
133  //std::cerr << " break left " << it_max - it << " points\n";
134  break_left = true;
135  // do not end loop here.. we still need to clean up the right side
136  }
137  it_max->setIntensity(int_max); // restore center intensity
138  explained_int -= int_max;
139  it = it_max;
140  int_last = int_max;
141  while (it != data.end()
142  && it->getIntensity() <= int_last // at most 100% of last sample point
143  && it->getIntensity() > 0
144  && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
145  && it->getMZ() - 1 < it_max->getMZ()) // at most 1 Th away
146  {
147  int_last = it->getIntensity();
148  explained_int += int_last;
149  it->setIntensity(0); // remove peak from future consideration
150  ++it;
151  }
152  // if the current point is rising again, restore the intensity of the
153  // previous 'sink' point (because it could belong to a neighbour peak)
154  // e.g. imagine intensities: 1-2-4-2-1-2-3-2-1. We do not want to destroy the middle '1'
155  // (note: the sequence is not identical to the one of the left shoulder)
156  if (it != data.end() && it->getIntensity() > int_last) (it-1)->setIntensity(int_last);
157 
158  if (break_left || it - it_max < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
159  { // fewer than two sampling points on right shoulder
160  //std::cerr << " break right " << it - it_max << " points\n";
161  ++centroid_evidence;
162  continue;
163  }
164  // peak has at least two sampling points on either side within 1 Th
165  ++profile_evidence;
166  //std::cerr << " PROFILE " << it - it_max << " points right\n";
167 
168  }
169 
170  float evidence_ratio = profile_evidence / float(profile_evidence + centroid_evidence);
171  //std::cerr << "--> Evidence ratio: " << evidence_ratio;
172 
173  if (evidence_ratio > 0.75) // 80% are profile
174  {
175  //std::cerr << " PROFILE\n";
177  }
178  else
179  {
180  //std::cerr << " CENTROID\n";
182  }
183  }
270  };
271 
272 } // namespace OpenMS
273 
Estimates if the data of a spectrum is raw data or peak data.
Definition: PeakTypeEstimator.h:50
static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end)
Estimates the peak type of the peaks in the iterator range based on intensity characteristics of up t...
Definition: PeakTypeEstimator.h:69
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ UNKNOWN
Unknown spectrum type.
Definition: SpectrumSettings.h:72
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
RawDataVector::iterator PeakIterator
Profile data iterator type.
Definition: OptimizePick.h:52
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47