OpenMS
Loading...
Searching...
No Matches
SignalToNoiseEstimatorMedian.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow $
6// $Authors: $
7// --------------------------------------------------------------------------
8//
9
10#pragma once
11
12
17#include <vector>
18#include <algorithm> //for std::max_element
19
20namespace OpenMS
21{
54 template <typename Container = MSSpectrum>
56 public SignalToNoiseEstimator<Container>
57 {
58
59public:
60
63
65 using SignalToNoiseEstimator<Container>::defaults_;
66 using SignalToNoiseEstimator<Container>::param_;
67
70
72
75 {
76 //set the name for DefaultParamHandler error messages
77 this->setName("SignalToNoiseEstimatorMedian");
78
79 defaults_.setValue("max_intensity", -1, "maximal intensity considered for histogram construction. By default, it will be calculated automatically (see auto_mode)." \
80 " Only provide this parameter if you know what you are doing (and change 'auto_mode' to '-1')!" \
81 " All intensities EQUAL/ABOVE 'max_intensity' will be added to the LAST histogram bin." \
82 " If you choose 'max_intensity' too small, the noise estimate might be too small as well. " \
83 " If chosen too big, the bins become quite large (which you could counter by increasing 'bin_count', which increases runtime)." \
84 " In general, the Median-S/N estimator is more robust to a manual max_intensity than the MeanIterative-S/N.", {"advanced"});
85 defaults_.setMinInt("max_intensity", -1);
86
87 defaults_.setValue("auto_max_stdev_factor", 3.0, "parameter for 'max_intensity' estimation (if 'auto_mode' == 0): mean + 'auto_max_stdev_factor' * stdev", {"advanced"});
88 defaults_.setMinFloat("auto_max_stdev_factor", 0.0);
89 defaults_.setMaxFloat("auto_max_stdev_factor", 999.0);
90
91 defaults_.setValue("auto_max_percentile", 95, "parameter for 'max_intensity' estimation (if 'auto_mode' == 1): auto_max_percentile th percentile", {"advanced"});
92 defaults_.setMinInt("auto_max_percentile", 0);
93 defaults_.setMaxInt("auto_max_percentile", 100);
94
95 defaults_.setValue("auto_mode", 0, "method to use to determine maximal intensity: -1 --> use 'max_intensity'; 0 --> 'auto_max_stdev_factor' method (default); 1 --> 'auto_max_percentile' method", {"advanced"});
96 defaults_.setMinInt("auto_mode", -1);
97 defaults_.setMaxInt("auto_mode", 1);
98
99 defaults_.setValue("win_len", 200.0, "window length in Thomson");
100 defaults_.setMinFloat("win_len", 1.0);
101
102 defaults_.setValue("bin_count", 30, "number of bins for intensity values");
103 defaults_.setMinInt("bin_count", 3);
104
105 defaults_.setValue("min_required_elements", 10, "minimum number of elements required in a window (otherwise it is considered sparse)");
106 defaults_.setMinInt("min_required_elements", 1);
107
108 defaults_.setValue("noise_for_empty_window", std::pow(10.0, 20), "noise value used for sparse windows", {"advanced"});
109
110 defaults_.setValue("write_log_messages", "true", "Write out log messages in case of sparse windows or median in rightmost histogram bin");
111 defaults_.setValidStrings("write_log_messages", {"true","false"});
112
114 }
115
118 SignalToNoiseEstimator<Container>(source)
119 {
121 }
122
128 {
129 if (&source == this) return *this;
130
133 return *this;
134 }
135
137
141
144 {
146 }
147
150 {
152 }
153
154protected:
155
156
162 void computeSTN_(const Container& c) override
163 {
164 //first element in the scan
165 PeakIterator scan_first_ = c.begin();
166 //last element in the scan
167 PeakIterator scan_last_ = c.end();
168
169 // reset counter for sparse windows
171 // reset counter for histogram overflow
173
174 // reset the results
175 stn_estimates_.clear();
176 stn_estimates_.resize(c.size());
177
178 // maximal range of histogram needs to be calculated first
180 {
181 // use MEAN+auto_max_intensity_*STDEV as threshold
182 GaussianEstimate gauss_global = SignalToNoiseEstimator<Container>::estimate_(scan_first_, scan_last_);
183 max_intensity_ = gauss_global.mean + std::sqrt(gauss_global.variance) * auto_max_stdev_Factor_;
184 }
185 else if (auto_mode_ == AUTOMAXBYPERCENT)
186 {
187 // get value at "auto_max_percentile_"th percentile
188 // we use a histogram approach here as well.
189 if ((auto_max_percentile_ < 0) || (auto_max_percentile_ > 100))
190 {
192 throw Exception::InvalidValue(__FILE__,
193 __LINE__,
194 OPENMS_PRETTY_FUNCTION,
195 "auto_mode is on AUTOMAXBYPERCENT! auto_max_percentile is not in [0,100]. Use setAutoMaxPercentile(<value>) to change it!",
196 s);
197 }
198
199 std::vector<int> histogram_auto(100, 0);
200
201 // find maximum of current scan
202 auto maxIt = std::max_element(c.begin(), c.end() ,[](const PeakType& a, const PeakType& b){ return a.getIntensity() > b.getIntensity();});
203 typename PeakType::IntensityType maxInt = maxIt->getIntensity();
204
205 double bin_size = maxInt / 100;
206
207 // fill histogram
208 for(const auto& peak : c)
209 {
210 ++histogram_auto[(int) ((peak.getIntensity() - 1) / bin_size)];
211 }
212
213 // add up element counts in histogram until ?th percentile is reached
214 int elements_below_percentile = (int) (auto_max_percentile_ * c.size() / 100);
215 int elements_seen = 0;
216 int i = -1;
217 PeakIterator run = scan_first_;
218
219 while (run != scan_last_ && elements_seen < elements_below_percentile)
220 {
221 ++i;
222 elements_seen += histogram_auto[i];
223 ++run;
224 }
225
226 max_intensity_ = (((double)i) + 0.5) * bin_size;
227 }
228 else //if (auto_mode_ == MANUAL)
229 {
230 if (max_intensity_ <= 0)
231 {
233 throw Exception::InvalidValue(__FILE__,
234 __LINE__,
235 OPENMS_PRETTY_FUNCTION,
236 "auto_mode is on MANUAL! max_intensity is <=0. Needs to be positive! Use setMaxIntensity(<value>) or enable auto_mode!",
237 s);
238 }
239 }
240
241 if (max_intensity_ < 0)
242 {
243 std::cerr << "TODO SignalToNoiseEstimatorMedian: the max_intensity_ value should be positive! " << max_intensity_ << std::endl;
244 return;
245 }
246
247 PeakIterator window_pos_center = scan_first_;
248 PeakIterator window_pos_borderleft = scan_first_;
249 PeakIterator window_pos_borderright = scan_first_;
250
251 double window_half_size = win_len_ / 2;
252 double bin_size = std::max(1.0, max_intensity_ / bin_count_); // at least size of 1 for intensity bins
253 int bin_count_minus_1 = bin_count_ - 1;
254
255 std::vector<int> histogram(bin_count_, 0);
256 std::vector<double> bin_value(bin_count_, 0);
257 // calculate average intensity that is represented by a bin
258 for (int bin = 0; bin < bin_count_; bin++)
259 {
260 histogram[bin] = 0;
261 bin_value[bin] = (bin + 0.5) * bin_size;
262 }
263 // bin in which a datapoint would fall
264 int to_bin = 0;
265
266 // index of bin where the median is located
267 int median_bin = 0;
268 // additive number of elements from left to x in histogram
269 int element_inc_count = 0;
270
271 // tracks elements in current window, which may vary because of unevenly spaced data
272 int elements_in_window = 0;
273 // number of windows
274 int window_count = 0;
275
276 // number of elements where we find the median
277 int element_in_window_half = 0;
278
279 double noise; // noise value of a datapoint
280
282 SignalToNoiseEstimator<Container>::startProgress(0, c.size(), "noise estimation of data");
283
284 // MAIN LOOP
285 while (window_pos_center != scan_last_)
286 {
287
288 // erase all elements from histogram that will leave the window on the LEFT side
289 while ((*window_pos_borderleft).getPos() < (*window_pos_center).getPos() - window_half_size)
290 {
291 to_bin = std::max(std::min<int>((int)((*window_pos_borderleft).getIntensity() / bin_size), bin_count_minus_1), 0);
292 --histogram[to_bin];
293 --elements_in_window;
294 ++window_pos_borderleft;
295 }
296
297 // add all elements to histogram that will enter the window on the RIGHT side
298 while ((window_pos_borderright != scan_last_)
299 && ((*window_pos_borderright).getPos() <= (*window_pos_center).getPos() + window_half_size))
300 {
301 //std::cerr << (*window_pos_borderright).getIntensity() << " " << bin_size << " " << bin_count_minus_1 << std::endl;
302 to_bin = std::max(std::min<int>((int)((*window_pos_borderright).getIntensity() / bin_size), bin_count_minus_1), 0);
303 ++histogram[to_bin];
304 ++elements_in_window;
305 ++window_pos_borderright;
306 }
307
308 if (elements_in_window < min_required_elements_)
309 {
312 }
313 else
314 {
315 // find bin i where ceil[elements_in_window/2] <= sum_c(0..i){ histogram[c] }
316 median_bin = -1;
317 element_inc_count = 0;
318 element_in_window_half = (elements_in_window + 1) / 2;
319 while (median_bin < bin_count_minus_1 && element_inc_count < element_in_window_half)
320 {
321 ++median_bin;
322 element_inc_count += histogram[median_bin];
323 }
324
325 // increase the error count
326 if (median_bin == bin_count_minus_1) {++histogram_oob_percent_; }
327
328 // just avoid division by 0
329 noise = std::max(1.0, bin_value[median_bin]);
330 }
331
332 // store result
333 stn_estimates_[window_count] = (*window_pos_center).getIntensity() / noise;
334
335
336 // advance the window center by one datapoint
337 ++window_pos_center;
338 ++window_count;
339 // update progress
341
342 } // end while
343
345
346 sparse_window_percent_ = sparse_window_percent_ * 100 / window_count;
347 histogram_oob_percent_ = histogram_oob_percent_ * 100 / window_count;
348
349 // warn if percentage of sparse windows is above 20%
351 {
352 OPENMS_LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
354 << "% of all windows were sparse. You should consider increasing 'win_len' or decreasing 'min_required_elements'"
355 << std::endl;
356 }
357
358 // warn if percentage of possibly wrong median estimates is above 1%
360 {
361 OPENMS_LOG_WARN << "WARNING in SignalToNoiseEstimatorMedian: "
363 << "% of all Signal-to-Noise estimates are too high, because the median was found in the rightmost histogram-bin. "
364 << "You should consider increasing 'max_intensity' (and maybe 'bin_count' with it, to keep bin width reasonable)"
365 << std::endl;
366 }
367
368 } // end of shiftWindow_
369
371 void updateMembers_() override
372 {
373 max_intensity_ = (double)param_.getValue("max_intensity");
374 auto_max_stdev_Factor_ = (double)param_.getValue("auto_max_stdev_factor");
375 auto_max_percentile_ = param_.getValue("auto_max_percentile");
376 auto_mode_ = param_.getValue("auto_mode");
377 win_len_ = (double)param_.getValue("win_len");
378 bin_count_ = param_.getValue("bin_count");
379 min_required_elements_ = param_.getValue("min_required_elements");
380 noise_for_empty_window_ = (double)param_.getValue("noise_for_empty_window");
381 write_log_messages_ = (bool)param_.getValue("write_log_messages").toBool();
382 stn_estimates_.clear();
383 }
384
394 double win_len_;
402
403 // whether to write out log messages in the case of failure
405
406 // counter for sparse windows
408 // counter for histogram overflow
410
411
412 };
413
414} // namespace OpenMS
415
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor.
Param param_
Container for current parameters.
Definition DefaultParamHandler.h:139
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!
Definition DefaultParamHandler.h:146
void setName(const String &name)
Mutable access to the name.
Invalid value exception.
Definition Exception.h:306
bool toBool() const
Conversion to bool.
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
void setValidStrings(const std::string &key, const std::vector< std::string > &strings)
Sets the valid strings for the parameter key.
void setMaxFloat(const std::string &key, double max)
Sets the maximum value for the floating point or floating point list parameter key.
void setMaxInt(const std::string &key, int max)
Sets the maximum value for the integer or integer list parameter key.
void setMinInt(const std::string &key, int min)
Sets the minimum value for the integer or integer list parameter key.
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
void setMinFloat(const std::string &key, double min)
Sets the minimum value for the floating point or floating point list parameter key.
float IntensityType
Intensity type.
Definition Peak2D.h:37
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
Estimates the signal/noise (S/N) ratio of each data point in a scan by using the median (histogram ba...
Definition SignalToNoiseEstimatorMedian.h:57
SignalToNoiseEstimator< Container >::PeakIterator PeakIterator
Definition SignalToNoiseEstimatorMedian.h:68
double win_len_
range of data points which belong to a window in Thomson
Definition SignalToNoiseEstimatorMedian.h:394
SignalToNoiseEstimatorMedian & operator=(const SignalToNoiseEstimatorMedian &source)
Definition SignalToNoiseEstimatorMedian.h:127
double noise_for_empty_window_
Definition SignalToNoiseEstimatorMedian.h:401
~SignalToNoiseEstimatorMedian() override
Destructor.
Definition SignalToNoiseEstimatorMedian.h:139
SignalToNoiseEstimatorMedian()
default constructor
Definition SignalToNoiseEstimatorMedian.h:74
double max_intensity_
maximal intensity considered during binning (values above get discarded)
Definition SignalToNoiseEstimatorMedian.h:386
double auto_max_percentile_
parameter for initial automatic estimation of "max_intensity_" percentile or a stdev
Definition SignalToNoiseEstimatorMedian.h:390
double histogram_oob_percent_
Definition SignalToNoiseEstimatorMedian.h:409
void computeSTN_(const Container &c) override
Definition SignalToNoiseEstimatorMedian.h:162
bool write_log_messages_
Definition SignalToNoiseEstimatorMedian.h:404
void updateMembers_() override
overridden function from DefaultParamHandler to keep members up to date, when a parameter is changed
Definition SignalToNoiseEstimatorMedian.h:371
int min_required_elements_
minimal number of elements a window needs to cover to be used
Definition SignalToNoiseEstimatorMedian.h:398
SignalToNoiseEstimatorMedian(const SignalToNoiseEstimatorMedian &source)
Copy Constructor.
Definition SignalToNoiseEstimatorMedian.h:117
double getSparseWindowPercent() const
Returns how many percent of the windows were sparse.
Definition SignalToNoiseEstimatorMedian.h:143
double sparse_window_percent_
Definition SignalToNoiseEstimatorMedian.h:407
double getHistogramRightmostPercent() const
Returns the percentage where the median was found in the rightmost bin.
Definition SignalToNoiseEstimatorMedian.h:149
SignalToNoiseEstimator< Container >::PeakType PeakType
Definition SignalToNoiseEstimatorMedian.h:69
SignalToNoiseEstimator< Container >::GaussianEstimate GaussianEstimate
Definition SignalToNoiseEstimatorMedian.h:71
int auto_mode_
determines which method shall be used for estimating "max_intensity_". valid are MANUAL=-1,...
Definition SignalToNoiseEstimatorMedian.h:392
IntensityThresholdCalculation
method to use for estimating the maximal intensity that is used for histogram calculation
Definition SignalToNoiseEstimatorMedian.h:62
@ MANUAL
Definition SignalToNoiseEstimatorMedian.h:62
@ AUTOMAXBYSTDEV
Definition SignalToNoiseEstimatorMedian.h:62
@ AUTOMAXBYPERCENT
Definition SignalToNoiseEstimatorMedian.h:62
int bin_count_
number of bins in the histogram
Definition SignalToNoiseEstimatorMedian.h:396
double auto_max_stdev_Factor_
parameter for initial automatic estimation of "max_intensity_": a stdev multiplier
Definition SignalToNoiseEstimatorMedian.h:388
This class represents the abstract base class of a signal to noise estimator.
Definition SignalToNoiseEstimator.h:33
double variance
variance of estimated Gaussian
Definition SignalToNoiseEstimator.h:108
PeakIterator::value_type PeakType
Definition SignalToNoiseEstimator.h:40
SignalToNoiseEstimator & operator=(const SignalToNoiseEstimator &source)
Assignment operator.
Definition SignalToNoiseEstimator.h:60
GaussianEstimate estimate_(const PeakIterator &scan_first_, const PeakIterator &scan_last_) const
calculate mean & stdev of intensities of a spectrum
Definition SignalToNoiseEstimator.h:113
double mean
mean of estimated Gaussian
Definition SignalToNoiseEstimator.h:107
std::vector< double > stn_estimates_
stores the noise estimate for each peak
Definition SignalToNoiseEstimator.h:146
Container::const_iterator PeakIterator
Definition SignalToNoiseEstimator.h:39
protected struct to store parameters my, sigma for a Gaussian distribution
Definition SignalToNoiseEstimator.h:106
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19