OpenMS
GaussFilterAlgorithm.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Hannes Roest $
6 // $Authors: Eva Lange $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/CONCEPT/Types.h>
15 
16 #include <cmath>
17 #include <vector>
18 
19 namespace OpenMS
20 {
43 // #define DEBUG_FILTERING
44 
45  class OPENMS_DLLAPI GaussFilterAlgorithm
46  {
47 public:
50 
53 
58  {
59  // create new arrays for mz / intensity data and set their size
62  mz_array->data.resize(spectrum->getMZArray()->data.size());
63  intensity_array->data.resize(spectrum->getMZArray()->data.size());
64 
65  // apply the filter
66  bool ret_val = filter(
67  spectrum->getMZArray()->data.begin(),
68  spectrum->getMZArray()->data.end(),
69  spectrum->getIntensityArray()->data.begin(),
70  mz_array->data.begin(), intensity_array->data.begin()
71  );
72  // set the data of the spectrum to the new mz / int arrays
73  spectrum->setMZArray(mz_array);
74  spectrum->setIntensityArray(intensity_array);
75  return ret_val;
76  }
77 
82  {
83  // create new arrays for rt / intensity data and set their size
86  rt_array->data.resize(chromatogram->getTimeArray()->data.size());
87  intensity_array->data.resize(chromatogram->getTimeArray()->data.size());
88 
89  // apply the filter
90  bool ret_val = filter(
91  chromatogram->getTimeArray()->data.begin(),
92  chromatogram->getTimeArray()->data.end(),
93  chromatogram->getIntensityArray()->data.begin(),
94  rt_array->data.begin(), intensity_array->data.begin()
95  );
96  // set the data of the chromatogram to the new rt / int arrays
97  chromatogram->setTimeArray(rt_array);
98  chromatogram->setIntensityArray(intensity_array);
99  return ret_val;
100  }
101 
107  template <typename ConstIterT, typename IterT>
108  bool filter(
109  ConstIterT mz_in_start,
110  ConstIterT mz_in_end,
111  ConstIterT int_in_start,
112  IterT mz_out,
113  IterT int_out)
114  {
115  bool found_signal = false;
116 
117  ConstIterT mz_it = mz_in_start;
118  ConstIterT int_it = int_in_start;
119  for (; mz_it != mz_in_end; mz_it++, int_it++)
120  {
121  // if ppm tolerance is used, calculate a reasonable width value for this m/z
122  if (use_ppm_tolerance_)
123  {
124  initialize(Math::ppmToMass(ppm_tolerance_, *mz_it), spacing_, ppm_tolerance_, use_ppm_tolerance_);
125  }
126 
127  double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);
128 
129  // store new intensity and m/z into output iterator
130  *mz_out = *mz_it;
131  *int_out = new_int;
132  ++mz_out;
133  ++int_out;
134 
135  if (fabs(new_int) > 0) found_signal = true;
136  }
137  return found_signal;
138  }
139 
140  void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);
141 
142 protected:
143 
145  std::vector<double> coeffs_;
147  double sigma_;
149  double spacing_;
150 
151  // tolerance in ppm
154 
156  template <typename InputPeakIterator>
157  double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)
158  {
159  double v = 0.;
160  // norm the gaussian kernel area to one
161  double norm = 0.;
162  Size middle = coeffs_.size();
163 
164  double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);
165  double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));
166 
167  InputPeakIterator help_x = x;
168  InputPeakIterator help_y = y;
169 #ifdef DEBUG_FILTERING
170 
171  std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;
172 #endif
173 
174  //integrate from middle to start_pos
175  while ((help_x != first) && (*(help_x - 1) > start_pos))
176  {
177  // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)
178  double distance_in_gaussian = fabs(*x - *help_x);
179  Size left_position = (Size)floor(distance_in_gaussian / spacing_);
180 
181  // search for the true left adjacent data point (because of rounding errors)
182  for (int j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
183  {
184  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
185  {
186  left_position -= j;
187  break;
188  }
189 
190  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
191  {
192  left_position += j;
193  break;
194  }
195  }
196 
197  // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian
198  Size right_position = left_position + 1;
199  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
200  // check if the right data point in the gaussian exists
201  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
202  : coeffs_[left_position];
203 #ifdef DEBUG_FILTERING
204 
205  std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;
206  std::cout << " right_position " << right_position << std::endl;
207  std::cout << " left_position " << left_position << std::endl;
208  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
209  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
210  std::cout << "interpolated value left " << coeffs_right << std::endl;
211 #endif
212 
213 
214  // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)
215  distance_in_gaussian = fabs((*x) - (*(help_x - 1)));
216  left_position = (Size)floor(distance_in_gaussian / spacing_);
217 
218  // search for the true left adjacent data point (because of rounding errors)
219  for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
220  {
221  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
222  {
223  left_position -= j;
224  break;
225  }
226 
227  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
228  {
229  left_position += j;
230  break;
231  }
232  }
233 
234  // start the interpolation for the true value in the gaussian
235  right_position = left_position + 1;
236  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
237  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
238  : coeffs_[left_position];
239 #ifdef DEBUG_FILTERING
240 
241  std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
242  std::cout << " right_position " << right_position << std::endl;
243  std::cout << " left_position " << left_position << std::endl;
244  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
245  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
246  std::cout << "interpolated value right " << coeffs_left << std::endl;
247 
248  std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right
249  << std::endl;
250 #endif
251 
252 
253  norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);
254 
255  v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);
256  --help_x;
257  --help_y;
258  }
259 
260 
261  //integrate from middle to end_pos
262  help_x = x;
263  help_y = y;
264 #ifdef DEBUG_FILTERING
265 
266  std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;
267 #endif
268 
269  while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))
270  {
271  // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)
272  double distance_in_gaussian = fabs((*x) - (*help_x));
273  int left_position = (UInt)floor(distance_in_gaussian / spacing_);
274 
275  // search for the true left adjacent data point (because of rounding errors)
276  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
277  {
278  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
279  {
280  left_position -= j;
281  break;
282  }
283 
284  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
285  {
286  left_position += j;
287  break;
288  }
289  }
290  // start the interpolation for the true value in the gaussian
291  Size right_position = left_position + 1;
292  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
293  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
294  : coeffs_[left_position];
295 
296 #ifdef DEBUG_FILTERING
297 
298  std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;
299  std::cout << " left_position " << left_position << std::endl;
300  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
301  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
302  std::cout << "interpolated value left " << coeffs_left << std::endl;
303 #endif
304 
305  // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)
306  distance_in_gaussian = fabs((*x) - (*(help_x + 1)));
307  left_position = (UInt)floor(distance_in_gaussian / spacing_);
308 
309  // search for the true left adjacent data point (because of rounding errors)
310  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
311  {
312  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
313  {
314  left_position -= j;
315  break;
316  }
317 
318  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
319  {
320  left_position += j;
321  break;
322  }
323  }
324 
325  // start the interpolation for the true value in the gaussian
326  right_position = left_position + 1;
327  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
328  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
329  : coeffs_[left_position];
330 #ifdef DEBUG_FILTERING
331 
332  std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
333  std::cout << " left_position " << left_position << std::endl;
334  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
335  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
336  std::cout << "interpolated value right " << coeffs_right << std::endl;
337 
338  std::cout << " intensity " << fabs(*help_x - *(help_x + 1)) / 2.
339  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)
340  << "* " << coeffs_right
341  << std::endl;
342 #endif
343  norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);
344 
345  v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);
346  ++help_x;
347  ++help_y;
348  }
349 
350  if (v > 0)
351  {
352  return v / norm;
353  }
354  else
355  {
356  return 0;
357  }
358  }
359 
360  };
361 
362 } // namespace OpenMS
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilterAlgorithm.h:46
std::vector< double > coeffs_
Coefficients.
Definition: GaussFilterAlgorithm.h:145
GaussFilterAlgorithm()
Constructor.
void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance)
double sigma_
The standard derivation .
Definition: GaussFilterAlgorithm.h:147
bool filter(ConstIterT mz_in_start, ConstIterT mz_in_end, ConstIterT int_in_start, IterT mz_out, IterT int_out)
Smoothes two data arrays.
Definition: GaussFilterAlgorithm.h:108
double integrate_(InputPeakIterator x, InputPeakIterator y, InputPeakIterator first, InputPeakIterator last)
Computes the convolution of the raw data at position x and the gaussian kernel.
Definition: GaussFilterAlgorithm.h:157
double spacing_
The spacing of the pre-tabulated kernel coefficients.
Definition: GaussFilterAlgorithm.h:149
double ppm_tolerance_
Definition: GaussFilterAlgorithm.h:153
virtual ~GaussFilterAlgorithm()
Destructor.
bool use_ppm_tolerance_
Definition: GaussFilterAlgorithm.h:152
bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)
Smoothes an Spectrum containing profile data.
Definition: GaussFilterAlgorithm.h:57
bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)
Smoothes an Chromatogram containing profile data.
Definition: GaussFilterAlgorithm.h:81
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
boost::shared_ptr< Chromatogram > ChromatogramPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:130
boost::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:54
boost::shared_ptr< Spectrum > SpectrumPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:210
The datastructures used by the OpenSwath interfaces.
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:47
T ppmToMass(T ppm, T mz_ref)
Compute the mass diff in [Th], given a ppm value and a reference point.
Definition: MathFunctions.h:309
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
double norm(T beg, T end)
compute the Euclidean norm of the vector
Definition: StatsHelpers.h:31