OpenMS  2.7.0
PeakPickerIterative.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 // #define DEBUG_PEAK_PICKING
42 
43 namespace OpenMS
44 {
45 
51  {
52  int index;
54 
56  double leftWidth;
57  double rightWidth;
58  float mz;
59  };
60 
61  bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
63  {
65  }
66 
96  class OPENMS_DLLAPI PeakPickerIterative :
97  public DefaultParamHandler,
98  public ProgressLogger
99  {
100 
101 private:
103  double peak_width_;
107  double sn_win_len_;
109 
110 public:
111 
114  DefaultParamHandler("PeakPickerIterative"),
116  {
117  defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
118  defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
119 
120 
121  defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", {"advanced"});
122  defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", {"advanced"});
123  defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", {"advanced"});
124  defaults_.setMinInt("nr_iterations_", 1);
125  defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", {"advanced"});
126 
127  defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", {"advanced"});
128  defaults_.setValidStrings("check_width_internally", {"true","false"});
129 
130  defaults_.setValue("ms1_only", "false", "Only do MS1");
131  defaults_.setValidStrings("ms1_only", {"true","false"});
132  defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
133  defaults_.setValidStrings("clear_meta_data", {"true","false"});
134 
135  // write defaults into Param object param_
136  defaultsToParam_();
137  }
138 
139  void updateMembers_() override
140  {
141  signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
142  peak_width_ = (double)param_.getValue("peak_width");
143  spacing_difference_ = (double)param_.getValue("spacing_difference");
144  sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
145  nr_iterations_ = (double)param_.getValue("nr_iterations_");
146  sn_win_len_ = (double)param_.getValue("sn_win_len_");
147 
148  check_width_internally_ = param_.getValue("check_width_internally").toBool();
149  }
150 
152  ~PeakPickerIterative() override {}
153 
154 private:
155 
156  /*
157  * This will re-center the peaks by using the seeds (ordered by intensity) to
158  * find raw signals that may belong to this peak. Then the peak is centered
159  * using a weighted average.
160  * Signals are added to the peak as long as they are still inside the
161  * peak_width or as long as the signal intensity keeps falling. Also the
162  * distance to the previous signal and the whether the signal is below the
163  * noise level is taken into account.
164  * This function implements a single iteration of this algorithm.
165  *
166  */
167  void pickRecenterPeaks_(const MSSpectrum& input,
168  std::vector<PeakCandidate>& PeakCandidates,
170  {
171  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
172  {
173  int i = PeakCandidates[peak_it].index;
174  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
175  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
176  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
177 
178  // MZ spacing sanity checks
179  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
180  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
181  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
182  double est_peak_width = peak_width_;
183 
184  if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
185  {
186  // something has gone wrong, the points are further away than the peak width -> delete this peak
187  PeakCandidates[peak_it].integrated_intensity = -1;
188  PeakCandidates[peak_it].leftWidth = -1;
189  PeakCandidates[peak_it].rightWidth = -1;
190  PeakCandidates[peak_it].mz = -1;
191  continue;
192  }
193 
194  std::map<double, double> peak_raw_data;
195 
196  peak_raw_data[central_peak_mz] = central_peak_int;
197  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
198  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
199 
200  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
201  // COPY - from PeakPickerHiRes
202  // peak core found, now extend it to the left
203  Size k = 2;
204  while ((i - k + 1) > 0
205  && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
206  && (input[i - k].getIntensity() < peak_raw_data.begin()->second
207  || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
208  )
209  {
210  if (signal_to_noise_ > 0.0)
211  {
212  if (snt.getSignalToNoise(i - k) < signal_to_noise_)
213  {
214  break;
215  }
216  }
217  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
218  ++k;
219  }
220  double leftborder = input[i - k + 1].getMZ();
221 
222  // to the right
223  k = 2;
224  while ((i + k) < input.size()
225  && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
226  && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
227  || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
228  )
229  {
230  if (signal_to_noise_ > 0.0)
231  {
232  if (snt.getSignalToNoise(i + k) < signal_to_noise_)
233  {
234  break;
235  }
236  }
237 
238  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
239  ++k;
240  }
241  // END COPY - from PeakPickerHiRes
242  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
243 
244  double rightborder = input[i + k - 1].getMZ();
245 
246  double weighted_mz = 0;
247  double integrated_intensity = 0;
248  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
249  {
250  weighted_mz += map_it->first * map_it->second;
251  integrated_intensity += map_it->second;
252  }
253  weighted_mz /= integrated_intensity;
254 
255  // store the data
256  PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
257  PeakCandidates[peak_it].leftWidth = leftborder;
258  PeakCandidates[peak_it].rightWidth = rightborder;
259  PeakCandidates[peak_it].mz = weighted_mz;
260 
261  // find the closest raw signal peak to where we just put our peak and store it
262  double min_diff = std::fabs(weighted_mz - input[i].getMZ());
263  int min_i = i;
264 
265  // Search to the left
266  for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
267  {
268  if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
269  {
270  min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
271  min_i = i - m;
272  }
273  }
274  // Search to the right
275  for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
276  {
277  if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
278  {
279  min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
280  min_i = i + m;
281  }
282  }
283  PeakCandidates[peak_it].index = min_i;
284  }
285  }
286 
287 public:
288 
289  /*
290  * This will pick one single spectrum. The PeakPickerHiRes is used to
291  * generate seeds, these seeds are then used to re-center the mass and
292  * compute peak width and integrated intensity of the peak.
293  *
294  * Finally, other peaks that would fall within the primary peak are
295  * discarded
296  *
297  * The output are the remaining peaks.
298  */
299  void pick(const MSSpectrum& input, MSSpectrum& output)
300  {
301  // don't pick a spectrum with less than 3 data points
302  if (input.size() < 3) return;
303 
304  // copy meta data of the input spectrum
305  output.clear(true);
306  output.SpectrumSettings::operator=(input);
307  output.MetaInfoInterface::operator=(input);
308  output.setRT(input.getRT());
309  output.setMSLevel(input.getMSLevel());
310  output.setName(input.getName());
312  output.getFloatDataArrays().clear();
313 
314  std::vector<PeakCandidate> PeakCandidates;
315  MSSpectrum picked_spectrum;
316 
317  // Use the PeakPickerHiRes to find candidates ...
319  Param pepi_param = OpenMS::PeakPickerHiRes().getDefaults();
320  pepi_param.setValue("signal_to_noise", signal_to_noise_);
321  pepi_param.setValue("spacing_difference", spacing_difference_);
322  pp.setParameters(pepi_param);
323  pp.pick(input, picked_spectrum);
324 
325  // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
326  std::vector<PeakCandidate> newPeakCandidates_;
327  Size j = 0;
328  OPENMS_LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
329  for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
330  {
331  if (input[k].getMZ() > picked_spectrum[j].getMZ())
332  {
333  OPENMS_LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
334  PeakCandidate pc = { /*.index=*/ static_cast<int>(k), /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
335  newPeakCandidates_.push_back(pc);
336  j++;
337  }
338  }
339 
340  PeakCandidates = newPeakCandidates_;
341  std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
342 
343  // signal-to-noise estimation
345  if (signal_to_noise_ > 0.0)
346  {
347  Param snt_parameters = snt.getParameters();
348  snt_parameters.setValue("win_len", sn_win_len_);
349  snt_parameters.setValue("bin_count", sn_bin_count_);
350  snt.setParameters(snt_parameters);
351  snt.init(input);
352  }
353 
354  // The peak candidates are re-centered and the width is computed for each peak
355  for (int i = 0; i < nr_iterations_; i++)
356  {
357  pickRecenterPeaks_(input, PeakCandidates, snt);
358  }
359 
360  output.getFloatDataArrays().resize(3);
361  output.getFloatDataArrays()[0].setName("IntegratedIntensity");
362  output.getFloatDataArrays()[1].setName("leftWidth");
363  output.getFloatDataArrays()[2].setName("rightWidth");
364 
365  // Go through all candidates and exclude all lower-intensity candidates
366  // that are within the borders of another peak
367  OPENMS_LOG_DEBUG << "Will now merge candidates" << std::endl;
368  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
369  {
370  if (PeakCandidates[peak_it].leftWidth < 0) continue;
371 
372  //Remove all peak candidates that are enclosed by this peak
373  for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
374  {
375  if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
376  {
377  OPENMS_LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
378  PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
379  " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
380  PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
381  }
382  }
383 
384  Peak1D peak;
385  peak.setMZ(PeakCandidates[peak_it].mz);
386  peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
387  output.push_back(peak);
388 
389  OPENMS_LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
390  output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
391  output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
392  output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
393  }
394 
395  OPENMS_LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
396  output.sortByPosition();
397  }
398 
399  void pickExperiment(const PeakMap& input, PeakMap& output)
400  {
401  // make sure that output is clear
402  output.clear(true);
403 
404  // copy experimental settings
405  static_cast<ExperimentalSettings&>(output) = input;
406 
407  // resize output with respect to input
408  output.resize(input.size());
409 
410  bool ms1_only = param_.getValue("ms1_only").toBool();
411  bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
412 
413  Size progress = 0;
414  startProgress(0, input.size(), "picking peaks");
415  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
416  {
417  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
418  {
419  output[scan_idx] = input[scan_idx];
420  }
421  else
422  {
423  pick(input[scan_idx], output[scan_idx]);
424  if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
425  }
426  setProgress(progress++);
427  }
428  endProgress();
429  }
430 
431  };
432 
433 }
434 
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
const Param & getParameters() const
Non-mutable access to the parameters.
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Description of the experimental settings.
Definition: ExperimentalSettings.h:62
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
void resize(Size s)
Definition: MSExperiment.h:132
Size size() const
Definition: MSExperiment.h:127
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:71
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void setName(const String &name)
Sets the name.
void sortByPosition()
Lexicographically sorts the peaks by their position.
UInt getMSLevel() const
Returns the MS level.
const String & getName() const
Returns the name.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:104
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:113
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:76
void pick(const MSSpectrum &input, MSSpectrum &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition: PeakPickerIterative.h:99
void pickRecenterPeaks_(const MSSpectrum &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum > &snt)
Definition: PeakPickerIterative.h:167
void pickExperiment(const PeakMap &input, PeakMap &output)
Definition: PeakPickerIterative.h:399
int nr_iterations_
Definition: PeakPickerIterative.h:106
double sn_win_len_
Definition: PeakPickerIterative.h:107
PeakPickerIterative()
Constructor.
Definition: PeakPickerIterative.h:113
~PeakPickerIterative() override
Destructor.
Definition: PeakPickerIterative.h:152
double spacing_difference_
Definition: PeakPickerIterative.h:104
bool check_width_internally_
Definition: PeakPickerIterative.h:108
double signal_to_noise_
Definition: PeakPickerIterative.h:102
int sn_bin_count_
Definition: PeakPickerIterative.h:105
double peak_width_
Definition: PeakPickerIterative.h:103
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: PeakPickerIterative.h:139
void pick(const MSSpectrum &input, MSSpectrum &output)
Definition: PeakPickerIterative.h:299
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:83
virtual void init(const Container &c)
Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimat...
Definition: SignalToNoiseEstimator.h:101
virtual double getSignalToNoise(const Size index) const
Definition: SignalToNoiseEstimator.h:109
void setType(SpectrumType type)
sets the spectrum type
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
const double k
Definition: Constants.h:153
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
double leftWidth
Definition: PeakPickerIterative.h:56
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition: PeakPickerIterative.h:62
double peak_apex_intensity
Definition: PeakPickerIterative.h:53
int index
Definition: PeakPickerIterative.h:52
double integrated_intensity
Definition: PeakPickerIterative.h:55
float mz
Definition: PeakPickerIterative.h:58
double rightWidth
Definition: PeakPickerIterative.h:57
A small structure to hold peak candidates.
Definition: PeakPickerIterative.h:51