OpenMS
PeakTypeEstimator.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
12 
13 #include <cmath>
14 #include <numeric>
15 
16 namespace OpenMS
17 {
23  class OPENMS_DLLAPI PeakTypeEstimator
24  {
25 public:
42  template <typename PeakConstIterator>
43  static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator& begin, const PeakConstIterator& end)
44  {
45  typedef typename PeakConstIterator::value_type PeakT;
46  // abort if there are less than 5 peak in the iterator range
47  if (end - begin < 5)
48  {
50  }
51 
52  const int max_peaks = 5; // maximal number of peaks we are looking at
53  int profile_evidence = 0; // number of peaks found to be profile
54  int centroid_evidence = 0; // number of peaks found to be centroided
55 
56  // copy data, since we need to modify
57  std::vector<PeakT> data(begin, end);
58  // total intensity of spectrum
59  double total_int = std::accumulate(begin, end, 0.0, [](double int_, const PeakT& p) { return int_ + p.getIntensity(); } );
60  double explained_int = 0;
61  // get the 5 highest peaks
62  for (int i = 0; i < max_peaks; ++i)
63  {
64  // stop if we explained +50% of all intensity
65  // (due to danger of interpreting noise - usually wrongly classified as centroided data)
66  if (explained_int > 0.5 * total_int) break;
67 
68  double int_max = 0;
69  Size idx = std::numeric_limits<Size>::max();
70  // find highest peak position
71  for (Size i = 0; i < data.size(); ++i)
72  {
73  if (data[i].getIntensity() > int_max)
74  {
75  int_max = data[i].getIntensity();
76  idx = i;
77  }
78  }
79  // no more peaks
80  if (idx == std::numeric_limits<Size>::max()) break;
81 
82  // check left and right peak shoulders and count number of sample points
83  typedef typename std::vector<PeakT>::iterator PeakIterator; // non-const version, since we need to modify the peaks
84  PeakIterator it_max = data.begin() + idx;
85  PeakIterator it = it_max;
86  double int_last = int_max;
87  while (it != data.begin()
88  && it->getIntensity() <= int_last // at most 100% of last sample point
89  && it->getIntensity() > 0
90  && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
91  && it->getMZ() + 1 > it_max->getMZ()) // at most 1 Th away
92  {
93  int_last = it->getIntensity();
94  explained_int += int_last;
95  it->setIntensity(0); // remove peak from future consideration
96  --it;
97  }
98  // if the current point is rising again, restore the intensity of the
99  // previous 'sink' point (because it could belong to a neighbour peak)
100  // e.g. imagine intensities: 1-2-3-2-1-2-4-2-1. We do not want to destroy the middle '1'
101  if (it->getIntensity() > int_last) (it+1)->setIntensity(int_last);
102 
103  //std::cerr << " Peak candidate: " << it_max->getMZ() << " ...";
104  bool break_left = false;
105  if (it_max - it < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
106  { // fewer than two sampling points on left shoulder
107  //std::cerr << " break left " << it_max - it << " points\n";
108  break_left = true;
109  // do not end loop here.. we still need to clean up the right side
110  }
111  it_max->setIntensity(int_max); // restore center intensity
112  explained_int -= int_max;
113  it = it_max;
114  int_last = int_max;
115  while (it != data.end()
116  && it->getIntensity() <= int_last // at most 100% of last sample point
117  && it->getIntensity() > 0
118  && (it->getIntensity() / int_last) > 0.1 // at least 10% of last sample point
119  && it->getMZ() - 1 < it_max->getMZ()) // at most 1 Th away
120  {
121  int_last = it->getIntensity();
122  explained_int += int_last;
123  it->setIntensity(0); // remove peak from future consideration
124  ++it;
125  }
126  // if the current point is rising again, restore the intensity of the
127  // previous 'sink' point (because it could belong to a neighbour peak)
128  // e.g. imagine intensities: 1-2-4-2-1-2-3-2-1. We do not want to destroy the middle '1'
129  // (note: the sequence is not identical to the one of the left shoulder)
130  if (it != data.end() && it->getIntensity() > int_last) (it-1)->setIntensity(int_last);
131 
132  if (break_left || it - it_max < 2+1) // 'it' does not fulfill the conditions, i.e. does not count
133  { // fewer than two sampling points on right shoulder
134  //std::cerr << " break right " << it - it_max << " points\n";
135  ++centroid_evidence;
136  continue;
137  }
138  // peak has at least two sampling points on either side within 1 Th
139  ++profile_evidence;
140  //std::cerr << " PROFILE " << it - it_max << " points right\n";
141 
142  }
143 
144  float evidence_ratio = profile_evidence / float(profile_evidence + centroid_evidence);
145  //std::cerr << "--> Evidence ratio: " << evidence_ratio;
146 
147  if (evidence_ratio > 0.75) // 80% are profile
148  {
149  //std::cerr << " PROFILE\n";
151  }
152  else
153  {
154  //std::cerr << " CENTROID\n";
156  }
157  }
244  };
245 
246 } // namespace OpenMS
247 
Estimates if the data of a spectrum is raw data or peak data.
Definition: PeakTypeEstimator.h:24
static SpectrumSettings::SpectrumType estimateType(const PeakConstIterator &begin, const PeakConstIterator &end)
Estimates the peak type of the peaks in the iterator range based on intensity characteristics of up t...
Definition: PeakTypeEstimator.h:43
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:45
@ UNKNOWN
Unknown spectrum type.
Definition: SpectrumSettings.h:46
@ PROFILE
profile data
Definition: SpectrumSettings.h:48
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:47
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
RawDataVector::iterator PeakIterator
Profile data iterator type.
Definition: OptimizePick.h:32
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22