OpenMS
PeakPickerIterative.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hannes Roest $
6 // $Authors: Hannes Roest $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
15 
16 // #define DEBUG_PEAK_PICKING
17 
18 namespace OpenMS
19 {
20 
26  {
27  int index;
29 
31  double leftWidth;
32  double rightWidth;
33  float mz;
34  };
35 
36  bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
38  {
40  }
41 
71  class OPENMS_DLLAPI PeakPickerIterative :
72  public DefaultParamHandler,
73  public ProgressLogger
74  {
75 
76 private:
78  double peak_width_;
82  double sn_win_len_;
84 
85 public:
86 
89  DefaultParamHandler("PeakPickerIterative"),
91  {
92  defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
93  defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
94 
95 
96  defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", {"advanced"});
97  defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", {"advanced"});
98  defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", {"advanced"});
99  defaults_.setMinInt("nr_iterations_", 1);
100  defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", {"advanced"});
101 
102  defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", {"advanced"});
103  defaults_.setValidStrings("check_width_internally", {"true","false"});
104 
105  defaults_.setValue("ms1_only", "false", "Only do MS1");
106  defaults_.setValidStrings("ms1_only", {"true","false"});
107  defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
108  defaults_.setValidStrings("clear_meta_data", {"true","false"});
109 
110  // write defaults into Param object param_
111  defaultsToParam_();
112  }
113 
114  void updateMembers_() override
115  {
116  signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
117  peak_width_ = (double)param_.getValue("peak_width");
118  spacing_difference_ = (double)param_.getValue("spacing_difference");
119  sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
120  nr_iterations_ = (double)param_.getValue("nr_iterations_");
121  sn_win_len_ = (double)param_.getValue("sn_win_len_");
122 
123  check_width_internally_ = param_.getValue("check_width_internally").toBool();
124  }
125 
127  ~PeakPickerIterative() override {}
128 
129 private:
130 
131  /*
132  * This will re-center the peaks by using the seeds (ordered by intensity) to
133  * find raw signals that may belong to this peak. Then the peak is centered
134  * using a weighted average.
135  * Signals are added to the peak as long as they are still inside the
136  * peak_width or as long as the signal intensity keeps falling. Also the
137  * distance to the previous signal and the whether the signal is below the
138  * noise level is taken into account.
139  * This function implements a single iteration of this algorithm.
140  *
141  */
142  void pickRecenterPeaks_(const MSSpectrum& input,
143  std::vector<PeakCandidate>& PeakCandidates,
145  {
146  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
147  {
148  int i = PeakCandidates[peak_it].index;
149  double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
150  double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
151  double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
152 
153  // MZ spacing sanity checks
154  double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
155  double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
156  double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
157  double est_peak_width = peak_width_;
158 
159  if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
160  {
161  // something has gone wrong, the points are further away than the peak width -> delete this peak
162  PeakCandidates[peak_it].integrated_intensity = -1;
163  PeakCandidates[peak_it].leftWidth = -1;
164  PeakCandidates[peak_it].rightWidth = -1;
165  PeakCandidates[peak_it].mz = -1;
166  continue;
167  }
168 
169  std::map<double, double> peak_raw_data;
170 
171  peak_raw_data[central_peak_mz] = central_peak_int;
172  peak_raw_data[left_neighbor_mz] = left_neighbor_int;
173  peak_raw_data[right_neighbor_mz] = right_neighbor_int;
174 
175  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
176  // COPY - from PeakPickerHiRes
177  // peak core found, now extend it to the left
178  Size k = 2;
179  while ((i - k + 1) > 0
180  && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
181  && (input[i - k].getIntensity() < peak_raw_data.begin()->second
182  || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
183  )
184  {
185  if (signal_to_noise_ > 0.0)
186  {
187  if (snt.getSignalToNoise(i - k) < signal_to_noise_)
188  {
189  break;
190  }
191  }
192  peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
193  ++k;
194  }
195  double leftborder = input[i - k + 1].getMZ();
196 
197  // to the right
198  k = 2;
199  while ((i + k) < input.size()
200  && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
201  && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
202  || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
203  )
204  {
205  if (signal_to_noise_ > 0.0)
206  {
207  if (snt.getSignalToNoise(i + k) < signal_to_noise_)
208  {
209  break;
210  }
211  }
212 
213  peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
214  ++k;
215  }
216  // END COPY - from PeakPickerHiRes
217  // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
218 
219  double rightborder = input[i + k - 1].getMZ();
220 
221  double weighted_mz = 0;
222  double integrated_intensity = 0;
223  for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
224  {
225  weighted_mz += map_it->first * map_it->second;
226  integrated_intensity += map_it->second;
227  }
228  weighted_mz /= integrated_intensity;
229 
230  // store the data
231  PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
232  PeakCandidates[peak_it].leftWidth = leftborder;
233  PeakCandidates[peak_it].rightWidth = rightborder;
234  PeakCandidates[peak_it].mz = weighted_mz;
235 
236  // find the closest raw signal peak to where we just put our peak and store it
237  double min_diff = std::fabs(weighted_mz - input[i].getMZ());
238  int min_i = i;
239 
240  // Search to the left
241  for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
242  {
243  if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
244  {
245  min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
246  min_i = i - m;
247  }
248  }
249  // Search to the right
250  for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
251  {
252  if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
253  {
254  min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
255  min_i = i + m;
256  }
257  }
258  PeakCandidates[peak_it].index = min_i;
259  }
260  }
261 
262 public:
263 
264  /*
265  This will pick one single spectrum. The PeakPickerHiRes is used to
266  generate seeds, these seeds are then used to re-center the mass and
267  compute peak width and integrated intensity of the peak.
268 
269  Finally, other peaks that would fall within the primary peak are
270  discarded
271 
272  The output are the remaining peaks.
273  */
274  void pick(const MSSpectrum& input, MSSpectrum& output)
275  {
276  // don't pick a spectrum with less than 3 data points
277  if (input.size() < 3) return;
278 
279  // copy the spectrum meta data
280  copySpectrumMeta(input, output);
282  output.getFloatDataArrays().clear();
283 
284  std::vector<PeakCandidate> PeakCandidates;
285  MSSpectrum picked_spectrum;
286 
287  // Use the PeakPickerHiRes to find candidates ...
289  Param pepi_param = OpenMS::PeakPickerHiRes().getDefaults();
290  pepi_param.setValue("signal_to_noise", signal_to_noise_);
291  pepi_param.setValue("spacing_difference", spacing_difference_);
292  pp.setParameters(pepi_param);
293  pp.pick(input, picked_spectrum);
294 
295  // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
296  std::vector<PeakCandidate> newPeakCandidates_;
297  Size j = 0;
298  OPENMS_LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
299  for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
300  {
301  if (input[k].getMZ() > picked_spectrum[j].getMZ())
302  {
303  OPENMS_LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
304  PeakCandidate pc = { /*.index=*/ static_cast<int>(k), /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
305  newPeakCandidates_.push_back(pc);
306  j++;
307  }
308  }
309 
310  PeakCandidates = newPeakCandidates_;
311  std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
312 
313  // signal-to-noise estimation
315  if (signal_to_noise_ > 0.0)
316  {
317  Param snt_parameters = snt.getParameters();
318  snt_parameters.setValue("win_len", sn_win_len_);
319  snt_parameters.setValue("bin_count", sn_bin_count_);
320  snt.setParameters(snt_parameters);
321  snt.init(input);
322  }
323 
324  // The peak candidates are re-centered and the width is computed for each peak
325  for (int i = 0; i < nr_iterations_; i++)
326  {
327  pickRecenterPeaks_(input, PeakCandidates, snt);
328  }
329 
330  output.getFloatDataArrays().resize(3);
331  output.getFloatDataArrays()[0].setName("IntegratedIntensity");
332  output.getFloatDataArrays()[1].setName("leftWidth");
333  output.getFloatDataArrays()[2].setName("rightWidth");
334 
335  // Go through all candidates and exclude all lower-intensity candidates
336  // that are within the borders of another peak
337  OPENMS_LOG_DEBUG << "Will now merge candidates" << std::endl;
338  for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
339  {
340  if (PeakCandidates[peak_it].leftWidth < 0) continue;
341 
342  //Remove all peak candidates that are enclosed by this peak
343  for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
344  {
345  if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
346  {
347  OPENMS_LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
348  PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
349  " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
350  PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
351  }
352  }
353 
354  Peak1D peak;
355  peak.setMZ(PeakCandidates[peak_it].mz);
356  peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
357  output.push_back(peak);
358 
359  OPENMS_LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
360  output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
361  output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
362  output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
363  }
364 
365  OPENMS_LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
366  output.sortByPosition();
367  }
368 
369  void pickExperiment(const PeakMap& input, PeakMap& output)
370  {
371  // make sure that output is clear
372  output.clear(true);
373 
374  // copy experimental settings
375  static_cast<ExperimentalSettings&>(output) = input;
376 
377  // resize output with respect to input
378  output.resize(input.size());
379 
380  bool ms1_only = param_.getValue("ms1_only").toBool();
381  bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
382 
383  Size progress = 0;
384  startProgress(0, input.size(), "picking peaks");
385  for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
386  {
387  if (ms1_only && (input[scan_idx].getMSLevel() != 1))
388  {
389  output[scan_idx] = input[scan_idx];
390  }
391  else
392  {
393  pick(input[scan_idx], output[scan_idx]);
394  if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
395  }
396  setProgress(progress++);
397  }
398  endProgress();
399  }
400 
401  };
402 
403 }
404 
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:454
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
const Param & getParameters() const
Non-mutable access to the parameters.
void setParameters(const Param &param)
Sets the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Description of the experimental settings.
Definition: ExperimentalSettings.h:36
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
void resize(Size n)
Resize to n spectra.
Definition: MSExperiment.h:127
Size size() const
The number of spectra.
Definition: MSExperiment.h:121
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
void sortByPosition()
Lexicographically sorts the peaks by their position.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
Management and storage of parameters / INI files.
Definition: Param.h:44
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:84
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:93
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition: PeakPickerHiRes.h:58
void pick(const MSSpectrum &input, MSSpectrum &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition: PeakPickerIterative.h:74
void pickExperiment(const PeakMap &input, PeakMap &output)
Definition: PeakPickerIterative.h:369
int nr_iterations_
Definition: PeakPickerIterative.h:81
double sn_win_len_
Definition: PeakPickerIterative.h:82
PeakPickerIterative()
Constructor.
Definition: PeakPickerIterative.h:88
~PeakPickerIterative() override
Destructor.
Definition: PeakPickerIterative.h:127
double spacing_difference_
Definition: PeakPickerIterative.h:79
bool check_width_internally_
Definition: PeakPickerIterative.h:83
double signal_to_noise_
Definition: PeakPickerIterative.h:77
int sn_bin_count_
Definition: PeakPickerIterative.h:80
double peak_width_
Definition: PeakPickerIterative.h:78
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: PeakPickerIterative.h:114
void pick(const MSSpectrum &input, MSSpectrum &output)
Definition: PeakPickerIterative.h:274
void pickRecenterPeaks_(const MSSpectrum &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum > &snt) const
Definition: PeakPickerIterative.h:142
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition: SignalToNoiseEstimatorMedian.h:57
virtual void init(const Container &c)
Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimat...
Definition: SignalToNoiseEstimator.h:75
virtual double getSignalToNoise(const Size index) const
Definition: SignalToNoiseEstimator.h:83
void setType(SpectrumType type)
sets the spectrum type
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:47
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
const double k
Definition: Constants.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
double leftWidth
Definition: PeakPickerIterative.h:31
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition: PeakPickerIterative.h:37
double peak_apex_intensity
Definition: PeakPickerIterative.h:28
int index
Definition: PeakPickerIterative.h:27
double integrated_intensity
Definition: PeakPickerIterative.h:30
void copySpectrumMeta(const MSSpectrum &input, MSSpectrum &output, bool clear_spectrum=true)
Copies only the meta data contained in the input spectrum to the output spectrum.
float mz
Definition: PeakPickerIterative.h:33
double rightWidth
Definition: PeakPickerIterative.h:32
A small structure to hold peak candidates.
Definition: PeakPickerIterative.h:26