OpenMS
Loading...
Searching...
No Matches
PeakPickerIterative.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Hannes Roest $
6// $Authors: Hannes Roest $
7// --------------------------------------------------------------------------
8
9#pragma once
10
15
16// #define DEBUG_PEAK_PICKING
17
18namespace OpenMS
19{
20
26 {
27 int index;
29
31 double leftWidth;
32 double rightWidth;
33 float mz;
34 };
35
36 bool sort_peaks_by_intensity(const PeakCandidate& a, const PeakCandidate& b); // prototype
41
71 class OPENMS_DLLAPI PeakPickerIterative :
73 public ProgressLogger
74 {
75
76private:
84
85public:
86
89 DefaultParamHandler("PeakPickerIterative"),
91 {
92 defaults_.setValue("signal_to_noise_", 1.0, "Signal to noise value, each peak is required to be above this value (turn off by setting it to 0.0)");
93 defaults_.setValue("peak_width", 0.0, "Expected peak width half width in Dalton - peaks will be extended until this half width is reached (even if the intensitity is increasing). In conjunction with check_width_internally it will also be used to remove peaks whose spacing is larger than this value.");
94
95
96 defaults_.setValue("spacing_difference", 1.5, "Difference between peaks in multiples of the minimal difference to continue. The higher this value is set, the further apart peaks are allowed to be to still extend a peak. E.g. if the value is set to 1.5 and in a current peak the minimal spacing between peaks is 10 mDa, then only peaks at most 15 mDa apart will be added to the peak.", {"advanced"});
97 defaults_.setValue("sn_bin_count_", 30, "Bin count for the Signal to Noise estimation.", {"advanced"});
98 defaults_.setValue("nr_iterations_", 5, "Nr of iterations to perform (how many times the peaks are re-centered).", {"advanced"});
99 defaults_.setMinInt("nr_iterations_", 1);
100 defaults_.setValue("sn_win_len_", 20.0, "Window length for the Signal to Noise estimation.", {"advanced"});
101
102 defaults_.setValue("check_width_internally", "false", "Delete peaks where the spacing is larger than the peak width (should be set to true to avoid artefacts)", {"advanced"});
103 defaults_.setValidStrings("check_width_internally", {"true","false"});
104
105 defaults_.setValue("ms1_only", "false", "Only do MS1");
106 defaults_.setValidStrings("ms1_only", {"true","false"});
107 defaults_.setValue("clear_meta_data", "false", "Delete meta data about peak width");
108 defaults_.setValidStrings("clear_meta_data", {"true","false"});
109
110 // write defaults into Param object param_
111 defaultsToParam_();
112 }
113
114 void updateMembers_() override
115 {
116 signal_to_noise_ = (double)param_.getValue("signal_to_noise_");
117 peak_width_ = (double)param_.getValue("peak_width");
118 spacing_difference_ = (double)param_.getValue("spacing_difference");
119 sn_bin_count_ = (double)param_.getValue("sn_bin_count_");
120 nr_iterations_ = (double)param_.getValue("nr_iterations_");
121 sn_win_len_ = (double)param_.getValue("sn_win_len_");
122
123 check_width_internally_ = param_.getValue("check_width_internally").toBool();
124 }
125
128
129private:
130
131 /*
132 * This will re-center the peaks by using the seeds (ordered by intensity) to
133 * find raw signals that may belong to this peak. Then the peak is centered
134 * using a weighted average.
135 * Signals are added to the peak as long as they are still inside the
136 * peak_width or as long as the signal intensity keeps falling. Also the
137 * distance to the previous signal and the whether the signal is below the
138 * noise level is taken into account.
139 * This function implements a single iteration of this algorithm.
140 *
141 */
143 std::vector<PeakCandidate>& PeakCandidates,
145 {
146 for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
147 {
148 int i = PeakCandidates[peak_it].index;
149 double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity();
150 double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity();
151 double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity();
152
153 // MZ spacing sanity checks
154 double left_to_central = std::fabs(central_peak_mz - left_neighbor_mz);
155 double central_to_right = std::fabs(right_neighbor_mz - central_peak_mz);
156 double min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right;
157 double est_peak_width = peak_width_;
158
159 if (check_width_internally_ && (left_to_central > est_peak_width || central_to_right > est_peak_width))
160 {
161 // something has gone wrong, the points are further away than the peak width -> delete this peak
162 PeakCandidates[peak_it].integrated_intensity = -1;
163 PeakCandidates[peak_it].leftWidth = -1;
164 PeakCandidates[peak_it].rightWidth = -1;
165 PeakCandidates[peak_it].mz = -1;
166 continue;
167 }
168
169 std::map<double, double> peak_raw_data;
170
171 peak_raw_data[central_peak_mz] = central_peak_int;
172 peak_raw_data[left_neighbor_mz] = left_neighbor_int;
173 peak_raw_data[right_neighbor_mz] = right_neighbor_int;
174
175 // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
176 // COPY - from PeakPickerHiRes
177 // peak core found, now extend it to the left
178 Size k = 2;
179 while ((i - k + 1) > 0
180 && std::fabs(input[i - k].getMZ() - peak_raw_data.begin()->first) < spacing_difference_ * min_spacing
181 && (input[i - k].getIntensity() < peak_raw_data.begin()->second
182 || std::fabs(input[i - k].getMZ() - central_peak_mz) < est_peak_width)
183 )
184 {
185 if (signal_to_noise_ > 0.0)
186 {
187 if (snt.getSignalToNoise(i - k) < signal_to_noise_)
188 {
189 break;
190 }
191 }
192 peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity();
193 ++k;
194 }
195 double leftborder = input[i - k + 1].getMZ();
196
197 // to the right
198 k = 2;
199 while ((i + k) < input.size()
200 && std::fabs(input[i + k].getMZ() - peak_raw_data.rbegin()->first) < spacing_difference_ * min_spacing
201 && (input[i + k].getIntensity() < peak_raw_data.rbegin()->second
202 || std::fabs(input[i + k].getMZ() - central_peak_mz) < est_peak_width)
203 )
204 {
205 if (signal_to_noise_ > 0.0)
206 {
207 if (snt.getSignalToNoise(i + k) < signal_to_noise_)
208 {
209 break;
210 }
211 }
212
213 peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity();
214 ++k;
215 }
216 // END COPY - from PeakPickerHiRes
217 // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // // //
218
219 double rightborder = input[i + k - 1].getMZ();
220
221 double weighted_mz = 0;
222 double integrated_intensity = 0;
223 for (std::map<double, double>::const_iterator map_it = peak_raw_data.begin(); map_it != peak_raw_data.end(); ++map_it)
224 {
225 weighted_mz += map_it->first * map_it->second;
226 integrated_intensity += map_it->second;
227 }
228 weighted_mz /= integrated_intensity;
229
230 // store the data
231 PeakCandidates[peak_it].integrated_intensity = integrated_intensity;
232 PeakCandidates[peak_it].leftWidth = leftborder;
233 PeakCandidates[peak_it].rightWidth = rightborder;
234 PeakCandidates[peak_it].mz = weighted_mz;
235
236 // find the closest raw signal peak to where we just put our peak and store it
237 double min_diff = std::fabs(weighted_mz - input[i].getMZ());
238 int min_i = i;
239
240 // Search to the left
241 for (int m = 1; i - m > 0 && leftborder < input[i - m].getMZ(); m++)
242 {
243 if (std::fabs(weighted_mz - input[i - m].getMZ()) < min_diff)
244 {
245 min_diff = std::fabs(weighted_mz - input[i - m].getMZ());
246 min_i = i - m;
247 }
248 }
249 // Search to the right
250 for (int m = 1; i - m > 0 && rightborder > input[i + m].getMZ(); m++)
251 {
252 if (std::fabs(weighted_mz - input[i + m].getMZ()) < min_diff)
253 {
254 min_diff = std::fabs(weighted_mz - input[i + m].getMZ());
255 min_i = i + m;
256 }
257 }
258 PeakCandidates[peak_it].index = min_i;
259 }
260 }
261
262public:
263
264 /*
265 This will pick one single spectrum. The PeakPickerHiRes is used to
266 generate seeds, these seeds are then used to re-center the mass and
267 compute peak width and integrated intensity of the peak.
268
269 Finally, other peaks that would fall within the primary peak are
270 discarded
271
272 The output are the remaining peaks.
273 */
274 void pick(const MSSpectrum& input, MSSpectrum& output)
275 {
276 // don't pick a spectrum with less than 3 data points
277 if (input.size() < 3) return;
278
279 // copy the spectrum meta data
280 copySpectrumMeta(input, output);
281 output.setType(SpectrumSettings::CENTROID);
282 output.getFloatDataArrays().clear();
283
284 std::vector<PeakCandidate> PeakCandidates;
285 MSSpectrum picked_spectrum;
286
287 // Use the PeakPickerHiRes to find candidates ...
290 pepi_param.setValue("signal_to_noise", signal_to_noise_);
291 pepi_param.setValue("spacing_difference", spacing_difference_);
292 pp.setParameters(pepi_param);
293 pp.pick(input, picked_spectrum);
294
295 // after picking peaks, we store the closest index of the raw spectrum and the picked intensity
296 std::vector<PeakCandidate> newPeakCandidates_;
297 Size j = 0;
298 OPENMS_LOG_DEBUG << "Candidates " << picked_spectrum.size() << std::endl;
299 for (Size k = 0; k < input.size() && j < picked_spectrum.size(); k++)
300 {
301 if (input[k].getMZ() > picked_spectrum[j].getMZ())
302 {
303 OPENMS_LOG_DEBUG << "got a value " << k << " @ " << input[k] << std::endl;
304 PeakCandidate pc = { /*.index=*/ static_cast<int>(k), /*.intensity=*/ picked_spectrum[j].getIntensity(), -1, -1, -1, -1};
305 newPeakCandidates_.push_back(pc);
306 j++;
307 }
308 }
309
310 PeakCandidates = newPeakCandidates_;
311 std::sort(PeakCandidates.begin(), PeakCandidates.end(), sort_peaks_by_intensity);
312
313 // signal-to-noise estimation
315 if (signal_to_noise_ > 0.0)
316 {
317 Param snt_parameters = snt.getParameters();
318 snt_parameters.setValue("win_len", sn_win_len_);
319 snt_parameters.setValue("bin_count", sn_bin_count_);
320 snt.setParameters(snt_parameters);
321 snt.init(input);
322 }
323
324 // The peak candidates are re-centered and the width is computed for each peak
325 for (int i = 0; i < nr_iterations_; i++)
326 {
327 pickRecenterPeaks_(input, PeakCandidates, snt);
328 }
329
330 output.getFloatDataArrays().resize(3);
331 output.getFloatDataArrays()[0].setName("IntegratedIntensity");
332 output.getFloatDataArrays()[1].setName("leftWidth");
333 output.getFloatDataArrays()[2].setName("rightWidth");
334
335 // Go through all candidates and exclude all lower-intensity candidates
336 // that are within the borders of another peak
337 OPENMS_LOG_DEBUG << "Will now merge candidates" << std::endl;
338 for (Size peak_it = 0; peak_it < PeakCandidates.size(); peak_it++)
339 {
340 if (PeakCandidates[peak_it].leftWidth < 0) continue;
341
342 //Remove all peak candidates that are enclosed by this peak
343 for (Size m = peak_it + 1; m < PeakCandidates.size(); m++)
344 {
345 if (PeakCandidates[m].mz >= PeakCandidates[peak_it].leftWidth && PeakCandidates[m].mz <= PeakCandidates[peak_it].rightWidth)
346 {
347 OPENMS_LOG_DEBUG << "Remove peak " << m << " : " << PeakCandidates[m].mz << " " <<
348 PeakCandidates[m].peak_apex_intensity << " (too close to " << PeakCandidates[peak_it].mz <<
349 " " << PeakCandidates[peak_it].peak_apex_intensity << ")" << std::endl;
350 PeakCandidates[m].leftWidth = PeakCandidates[m].rightWidth = -1;
351 }
352 }
353
354 Peak1D peak;
355 peak.setMZ(PeakCandidates[peak_it].mz);
356 peak.setIntensity(PeakCandidates[peak_it].integrated_intensity);
357 output.push_back(peak);
358
359 OPENMS_LOG_DEBUG << "Push peak " << peak_it << " " << peak << std::endl;
360 output.getFloatDataArrays()[0].push_back(PeakCandidates[peak_it].integrated_intensity);
361 output.getFloatDataArrays()[1].push_back(PeakCandidates[peak_it].leftWidth);
362 output.getFloatDataArrays()[2].push_back(PeakCandidates[peak_it].rightWidth);
363 }
364
365 OPENMS_LOG_DEBUG << "Found seeds: " << PeakCandidates.size() << " / Found peaks: " << output.size() << std::endl;
366 output.sortByPosition();
367 }
368
369 void pickExperiment(const PeakMap& input, PeakMap& output)
370 {
371 // make sure that output is clear
372 output.clear(true);
373
374 // copy experimental settings
375 static_cast<ExperimentalSettings&>(output) = input;
376
377 // resize output with respect to input
378 output.resize(input.size());
379
380 bool ms1_only = param_.getValue("ms1_only").toBool();
381 bool clear_meta_data = param_.getValue("clear_meta_data").toBool();
382
383 Size progress = 0;
384 startProgress(0, input.size(), "picking peaks");
385 for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx)
386 {
387 if (ms1_only && (input[scan_idx].getMSLevel() != 1))
388 {
389 output[scan_idx] = input[scan_idx];
390 }
391 else
392 {
393 pick(input[scan_idx], output[scan_idx]);
394 if (clear_meta_data) {output[scan_idx].getFloatDataArrays().clear();}
395 }
396 setProgress(progress++);
397 }
398 endProgress();
399 }
400
401 };
402
403}
404
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition LogStream.h:457
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
const Param & getParameters() const
Non-mutable access to the parameters.
const Param & getDefaults() const
Non-mutable access to the default parameters.
void setParameters(const Param &param)
Sets the parameters.
Description of the experimental settings.
Definition ExperimentalSettings.h:36
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Size size() const noexcept
The number of spectra.
void resize(Size n)
Resize to n spectra.
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
void sortByPosition()
Lexicographically sorts the peaks by their position.
const FloatDataArrays & getFloatDataArrays() const
Returns a const reference to the float meta data arrays.
Management and storage of parameters / INI files.
Definition Param.h:46
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition Peak1D.h:86
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition Peak1D.h:95
This class implements a fast peak-picking algorithm best suited for high resolution MS data (FT-ICR-M...
Definition PeakPickerHiRes.h:59
void pick(const MSSpectrum &input, MSSpectrum &output) const
Applies the peak-picking algorithm to a single spectrum (MSSpectrum). The resulting picked peaks are ...
This class implements a peak-picking algorithm for high-resolution MS data (specifically designed for...
Definition PeakPickerIterative.h:74
void pickExperiment(const PeakMap &input, PeakMap &output)
Definition PeakPickerIterative.h:369
int nr_iterations_
Definition PeakPickerIterative.h:81
double sn_win_len_
Definition PeakPickerIterative.h:82
PeakPickerIterative()
Constructor.
Definition PeakPickerIterative.h:88
~PeakPickerIterative() override
Destructor.
Definition PeakPickerIterative.h:127
double spacing_difference_
Definition PeakPickerIterative.h:79
bool check_width_internally_
Definition PeakPickerIterative.h:83
double signal_to_noise_
Definition PeakPickerIterative.h:77
int sn_bin_count_
Definition PeakPickerIterative.h:80
double peak_width_
Definition PeakPickerIterative.h:78
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition PeakPickerIterative.h:114
void pick(const MSSpectrum &input, MSSpectrum &output)
Definition PeakPickerIterative.h:274
void pickRecenterPeaks_(const MSSpectrum &input, std::vector< PeakCandidate > &PeakCandidates, SignalToNoiseEstimatorMedian< MSSpectrum > &snt) const
Definition PeakPickerIterative.h:142
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition SignalToNoiseEstimatorMedian.h:57
virtual void init(const Container &c)
Set the start and endpoint of the raw data interval, for which signal to noise ratios will be estimat...
Definition SignalToNoiseEstimator.h:75
virtual double getSignalToNoise(const Size index) const
Definition SignalToNoiseEstimator.h:83
void setType(SpectrumType type)
sets the spectrum type
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
double leftWidth
Definition PeakPickerIterative.h:31
void copySpectrumMeta(const MSSpectrum &input, MSSpectrum &output, bool clear_spectrum=true)
Copies only the meta data contained in the input spectrum to the output spectrum.
bool sort_peaks_by_intensity(const PeakCandidate &a, const PeakCandidate &b)
Definition PeakPickerIterative.h:37
double peak_apex_intensity
Definition PeakPickerIterative.h:28
int index
Definition PeakPickerIterative.h:27
double integrated_intensity
Definition PeakPickerIterative.h:30
float mz
Definition PeakPickerIterative.h:33
double rightWidth
Definition PeakPickerIterative.h:32
A small structure to hold peak candidates.
Definition PeakPickerIterative.h:26