OpenMS  2.7.0
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
49 #include <vector>
50 
51 namespace OpenMS
52 {
53 
62  class OPENMS_DLLAPI SpectraMerger :
63  public DefaultParamHandler, public ProgressLogger
64  {
65 
66 protected:
67 
68  /* Determine distance between two spectra
69 
70  Distance is determined as
71 
72  (d_rt/rt_max_ + d_mz/mz_max_) / 2
73 
74  */
76  public DefaultParamHandler
77  {
78 public:
80  DefaultParamHandler("SpectraDistance")
81  {
82  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
83  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
84  defaultsToParam_(); // calls updateMembers_
85  }
86 
87  void updateMembers_() override
88  {
89  rt_max_ = (double) param_.getValue("rt_tolerance");
90  mz_max_ = (double) param_.getValue("mz_tolerance");
91  }
92 
93  double getSimilarity(const double d_rt, const double d_mz) const
94  {
95  // 1 - distance
96  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
97  }
98 
99  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
100  double operator()(const BaseFeature& first, const BaseFeature& second) const
101  {
102  // get RT distance:
103  double d_rt = fabs(first.getRT() - second.getRT());
104  double d_mz = fabs(first.getMZ() - second.getMZ());
105 
106  if (d_rt > rt_max_ || d_mz > mz_max_)
107  {
108  return 0;
109  }
110 
111  // calculate similarity (0-1):
112  double sim = getSimilarity(d_rt, d_mz);
113 
114  return sim;
115  }
116 
117 protected:
118  double rt_max_;
119  double mz_max_;
120 
121  }; // end of SpectraDistance
122 
123 public:
124 
127 
130 
131  // @name Constructors and Destructors
132  // @{
135 
137  SpectraMerger(const SpectraMerger& source);
138 
140  ~SpectraMerger() override;
141  // @}
142 
143  // @name Operators
144  // @{
147  // @}
148 
149  // @name Merging functions
150  // @{
152  template <typename MapType>
154  {
155  IntList ms_levels = param_.getValue("block_method:ms_levels");
156  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
157  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
158 
159  if (rt_max_length == 0) // no rt restriction set?
160  {
161  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
162  }
163 
164  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
165  {
166  MergeBlocks spectra_to_merge;
167  Size idx_block(0);
168  SignedSize block_size_count(rt_block_size + 1);
169  Size idx_spectrum(0);
170  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
171  {
172  if (Int(it1->getMSLevel()) == *it_mslevel)
173  {
174  // block full if it contains a maximum number of scans or if maximum rt length spanned
175  if (++block_size_count >= rt_block_size ||
176  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
177  {
178  block_size_count = 0;
179  idx_block = idx_spectrum;
180  }
181  else
182  {
183  spectra_to_merge[idx_block].push_back(idx_spectrum);
184  }
185  }
186 
187  ++idx_spectrum;
188  }
189  // check if last block had sacrifice spectra
190  if (block_size_count == 0) //block just got initialized
191  {
192  spectra_to_merge[idx_block] = std::vector<Size>();
193  }
194 
195  // merge spectra, remove all old MS spectra and add new consensus spectra
196  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
197  }
198 
199  exp.sortSpectra();
200  }
201 
203  template <typename MapType>
205  {
206 
207  // convert spectra's precursors to clusterizable data
208  Size data_size;
209  std::vector<BinaryTreeNode> tree;
210  Map<Size, Size> index_mapping;
211  // local scope to save memory - we do not need the clustering stuff later
212  {
213  std::vector<BaseFeature> data;
214 
215  for (Size i = 0; i < exp.size(); ++i)
216  {
217  if (exp[i].getMSLevel() != 2)
218  {
219  continue;
220  }
221 
222  // remember which index in distance data ==> experiment index
223  index_mapping[data.size()] = i;
224 
225  // make cluster element
226  BaseFeature bf;
227  bf.setRT(exp[i].getRT());
228  std::vector<Precursor> pcs = exp[i].getPrecursors();
229  if (pcs.empty())
230  {
231  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
232  }
233  if (pcs.size() > 1)
234  {
235  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
236  }
237  bf.setMZ(pcs[0].getMZ());
238  data.push_back(bf);
239  }
240  data_size = data.size();
241 
242  SpectraDistance_ llc;
243  llc.setParameters(param_.copy("precursor_method:", true));
244  SingleLinkage sl;
245  DistanceMatrix<float> dist; // will be filled
247 
248  //ch.setThreshold(0.99);
249  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
250  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
251  }
252 
253  // extract the clusters
254  ClusterAnalyzer ca;
255  std::vector<std::vector<Size> > clusters;
256  // count number of real tree nodes (not the -1 ones):
257  Size node_count = 0;
258  for (Size ii = 0; ii < tree.size(); ++ii)
259  {
260  if (tree[ii].distance >= 1)
261  {
262  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
263  }
264  if (tree[ii].distance != -1)
265  {
266  ++node_count;
267  }
268  }
269  ca.cut(data_size - node_count, tree, clusters);
270 
271  //std::cerr << "Treesize: " << (tree.size()+1) << " #clusters: " << clusters.size() << std::endl;
272  //std::cerr << "tree:\n" << ca.newickTree(tree, true) << "\n";
273 
274  // convert to blocks
275  MergeBlocks spectra_to_merge;
276 
277  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
278  {
279  if (clusters[i_outer].size() <= 1)
280  {
281  continue;
282  }
283  // init block with first cluster element
284  Size cl_index0 = clusters[i_outer][0];
285  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
286  // add all other elements
287  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
288  {
289  Size cl_index = clusters[i_outer][i_inner];
290  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
291  }
292  }
293 
294  // do it
295  mergeSpectra_(exp, spectra_to_merge, 2);
296 
297  exp.sortSpectra();
298  }
299 
306  template <typename MapType>
307  void average(MapType& exp, const String& average_type)
308  {
309  // MS level to be averaged
310  int ms_level = param_.getValue("average_gaussian:ms_level");
311  if (average_type == "tophat")
312  {
313  ms_level = param_.getValue("average_tophat:ms_level");
314  }
315 
316  // spectrum type (profile, centroid or automatic)
317  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
318  if (average_type == "tophat")
319  {
320  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
321  }
322 
323  // parameters for Gaussian averaging
324  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
325  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
326  double cutoff(param_.getValue("average_gaussian:cutoff"));
327 
328  // parameters for Top-Hat averaging
329  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
330  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
331  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
332  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
333  if ((range_scans % 2) == 0)
334  {
335  ++range_scans;
336  }
337  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
338 
339  AverageBlocks spectra_to_average_over;
340 
341  // loop over RT
342  int n(0); // spectrum index
343  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
344  {
345  if (Int(it_rt->getMSLevel()) == ms_level)
346  {
347  int m; // spectrum index
348  int steps;
349  bool terminate_now;
350  typename MapType::const_iterator it_rt_2;
351 
352  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
353  steps = 0;
354  m = n + 1;
355  it_rt_2 = it_rt + 1;
356  terminate_now = false;
357  while (it_rt_2 != exp.end() && !terminate_now)
358  {
359  if (Int(it_rt_2->getMSLevel()) == ms_level)
360  {
361  double weight = 1;
362  if (average_type == "gaussian")
363  {
364  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
365  }
366  std::pair<Size, double> p(m, weight);
367  spectra_to_average_over[n].push_back(p);
368  ++steps;
369  }
370  if (average_type == "gaussian")
371  {
372  // Gaussian
373  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
374  }
375  else if (unit)
376  {
377  // Top-Hat with RT unit = scans
378  terminate_now = (steps > range_scans);
379  }
380  else
381  {
382  // Top-Hat with RT unit = seconds
383  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
384  }
385  ++m;
386  ++it_rt_2;
387  }
388 
389  // go backward
390  steps = 0;
391  m = n;
392  it_rt_2 = it_rt;
393  terminate_now = false;
394  while (it_rt_2 != exp.begin() && !terminate_now)
395  {
396  if (Int(it_rt_2->getMSLevel()) == ms_level)
397  {
398  double weight = 1;
399  if (average_type == "gaussian")
400  {
401  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
402  }
403  std::pair<Size, double> p(m, weight);
404  spectra_to_average_over[n].push_back(p);
405  ++steps;
406  }
407  if (average_type == "gaussian")
408  {
409  // Gaussian
410  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
411  }
412  else if (unit)
413  {
414  // Top-Hat with RT unit = scans
415  terminate_now = (steps > range_scans);
416  }
417  else
418  {
419  // Top-Hat with RT unit = seconds
420  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
421  }
422  --m;
423  --it_rt_2;
424  }
425 
426  }
427  ++n;
428  }
429 
430  // normalize weights
431  for (AverageBlocks::Iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
432  {
433  double sum(0.0);
434  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
435  {
436  sum += it2->second;
437  }
438 
439  for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
440  {
441  (*it2).second /= sum;
442  }
443  }
444 
445  // determine type of spectral data (profile or centroided)
447  if (spectrum_type == "automatic")
448  {
449  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
450  type = exp[idx].getType(true);
451  }
452  else if (spectrum_type == "profile")
453  {
455  }
456  else if (spectrum_type == "centroid")
457  {
459  }
460  else
461  {
462  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
463  }
464 
465  // generate new spectra
466  if (type == SpectrumSettings::CENTROID)
467  {
468  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
469  }
470  else
471  {
472  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
473  }
474 
475  exp.sortSpectra();
476  }
477 
478  // @}
479 
480 protected:
481 
492  template <typename MapType>
493  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
494  {
495  double mz_binning_width(param_.getValue("mz_binning_width"));
496  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
497 
498  // merge spectra
499  MapType merged_spectra;
500 
501  Map<Size, Size> cluster_sizes;
502  std::set<Size> merged_indices;
503 
504  // set up alignment
505  SpectrumAlignment sas;
506  Param p;
507  p.setValue("tolerance", mz_binning_width);
508  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
509  {
510  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
511  }
512 
513  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
514  sas.setParameters(p);
515  std::vector<std::pair<Size, Size> > alignment;
516 
517  Size count_peaks_aligned(0);
518  Size count_peaks_overall(0);
519 
520  // each BLOCK
521  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
522  {
523  ++cluster_sizes[it->second.size() + 1]; // for stats
524 
525  typename MapType::SpectrumType consensus_spec = exp[it->first];
526  consensus_spec.setMSLevel(ms_level);
527 
528  //consensus_spec.unify(exp[it->first]); // append meta info
529  merged_indices.insert(it->first);
530 
531  //typename MapType::SpectrumType all_peaks = exp[it->first];
532  double rt_average = consensus_spec.getRT();
533  double precursor_mz_average = 0.0;
534  Size precursor_count(0);
535  if (!consensus_spec.getPrecursors().empty())
536  {
537  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
538  ++precursor_count;
539  }
540 
541  count_peaks_overall += consensus_spec.size();
542 
543  // block elements
544  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
545  {
546  consensus_spec.unify(exp[*sit]); // append meta info
547  merged_indices.insert(*sit);
548 
549  rt_average += exp[*sit].getRT();
550  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
551  {
552  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
553  ++precursor_count;
554  }
555 
556  // merge data points
557  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
558  //std::cerr << "alignment of " << it->first << " with " << *sit << " yielded " << alignment.size() << " common peaks!\n";
559  count_peaks_aligned += alignment.size();
560  count_peaks_overall += exp[*sit].size();
561 
562  Size align_index(0);
563  Size spec_b_index(0);
564 
565  // sanity check for number of peaks
566  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
567  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
568  {
569  if (alignment.size() == 0 || alignment[align_index].second != spec_b_index)
570  // ... add unaligned peak
571  {
572  consensus_spec.push_back(*pit);
573  }
574  // or add aligned peak height to ALL corresponding existing peaks
575  else
576  {
577  Size counter(0);
578  Size copy_of_align_index(align_index);
579 
580  while (alignment.size() > 0 &&
581  copy_of_align_index < alignment.size() &&
582  alignment[copy_of_align_index].second == spec_b_index)
583  {
584  ++copy_of_align_index;
585  ++counter;
586  } // Count the number of peaks in a which correspond to a single b peak.
587 
588  while (alignment.size() > 0 &&
589  align_index < alignment.size() &&
590  alignment[align_index].second == spec_b_index)
591  {
592  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
593  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
594  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
595  if (align_index == alignment.size())
596  {
597  alignment.clear(); // end reached -> avoid going into this block again
598  }
599  }
600  align_size = align_size + 1 - counter; //Decrease align_size by number of
601  }
602  ++spec_b_index;
603  }
604  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
605  if (spec_a + spec_b - align_size != consensus_spec.size())
606  {
607  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
608  }
609  }
610  rt_average /= it->second.size() + 1;
611  consensus_spec.setRT(rt_average);
612 
613  if (ms_level >= 2)
614  {
615  if (precursor_count)
616  {
617  precursor_mz_average /= precursor_count;
618  }
619  std::vector<Precursor> pcs = consensus_spec.getPrecursors();
620  //if (pcs.size()>1) OPENMS_LOG_WARN << "Removing excessive precursors - leaving only one per MS2 spectrum.\n";
621  pcs.resize(1);
622  pcs[0].setMZ(precursor_mz_average);
623  consensus_spec.setPrecursors(pcs);
624  }
625 
626  if (consensus_spec.empty())
627  {
628  continue;
629  }
630  else
631  {
632  merged_spectra.addSpectrum(consensus_spec);
633  }
634  }
635 
636  OPENMS_LOG_INFO << "Cluster sizes:\n";
637  for (Map<Size, Size>::const_iterator it = cluster_sizes.begin(); it != cluster_sizes.end(); ++it)
638  {
639  OPENMS_LOG_INFO << " size " << it->first << ": " << it->second << "x\n";
640  }
641 
642  char buffer[200];
643  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
644  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
645  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
646 
647  // remove all spectra that were within a cluster
648  typename MapType::SpectrumType empty_spec;
649  MapType exp_tmp;
650  for (Size i = 0; i < exp.size(); ++i)
651  {
652  if (merged_indices.count(i) == 0) // save unclustered ones
653  {
654  exp_tmp.addSpectrum(exp[i]);
655  exp[i] = empty_spec;
656  }
657  }
658 
659  //typedef std::vector<typename MapType::SpectrumType> Base;
660  //exp.Base::operator=(exp_tmp);
661  exp.clear(false);
662  exp.getSpectra().insert(exp.end(), exp_tmp.begin(), exp_tmp.end());
663 
664  // exp.erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<typename MapType::SpectrumType>(ListUtils::create<int>(String(ms_level)), false)), exp.end());
665 
666  // ... and add consensus spectra
667  exp.getSpectra().insert(exp.end(), merged_spectra.begin(), merged_spectra.end());
668 
669  }
670 
691  template <typename MapType>
692  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
693  {
694  MapType exp_tmp; // temporary experiment for averaged spectra
695 
696  double mz_binning_width(param_.getValue("mz_binning_width"));
697  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
698 
699  unsigned progress = 0;
700  std::stringstream progress_message;
701  progress_message << "averaging profile spectra of MS level " << ms_level;
702  startProgress(0, spectra_to_average_over.size(), progress_message.str());
703 
704  // loop over blocks
705  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
706  {
707  setProgress(++progress);
708 
709  // loop over spectra in blocks
710  std::vector<double> mz_positions_all; // m/z positions from all spectra
711  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
712  {
713  // loop over m/z positions
714  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
715  {
716  mz_positions_all.push_back(it_mz->getMZ());
717  }
718  }
719 
720  sort(mz_positions_all.begin(), mz_positions_all.end());
721 
722  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
723  std::vector<double> intensities;
724  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
725  double delta_mz(mz_binning_width); // for m/z unit Da
726  for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
727  {
728  if (mz_binning_unit == "ppm")
729  {
730  delta_mz = mz_binning_width * (*it_mz) / 1000000;
731  }
732 
733  if (((*it_mz) - last_mz) > delta_mz)
734  {
735  mz_positions.push_back(*it_mz);
736  intensities.push_back(0.0);
737  last_mz = *it_mz;
738  }
739  }
740 
741  // loop over spectra in blocks
742  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
743  {
744  SplineInterpolatedPeaks spline(exp[it2->first]);
746 
747  // loop over m/z positions
748  for (Size i = 0; i < mz_positions.size(); ++i)
749  {
750  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
751  {
752  intensities[i] += nav.eval(mz_positions[i]) * (it2->second); // spline-interpolated intensity * weight
753  }
754  }
755  }
756 
757  // update spectrum
758  typename MapType::SpectrumType average_spec = exp[it->first];
759  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
760  //average_spec.setMSLevel(ms_level);
761 
762  // refill spectrum
763  for (Size i = 0; i < mz_positions.size(); ++i)
764  {
765  typename MapType::PeakType peak;
766  peak.setMZ(mz_positions[i]);
767  peak.setIntensity(intensities[i]);
768  average_spec.push_back(peak);
769  }
770 
771  // store spectrum temporarily
772  exp_tmp.addSpectrum(average_spec);
773  }
774 
775  endProgress();
776 
777  // loop over blocks
778  int n(0);
779  //typename MapType::SpectrumType empty_spec;
780  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
781  {
782  exp[it->first] = exp_tmp[n];
783  //exp_tmp[n] = empty_spec;
784  ++n;
785  }
786 
787  }
788 
804  template <typename MapType>
805  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
806  {
807  MapType exp_tmp; // temporary experiment for averaged spectra
808 
809  double mz_binning_width(param_.getValue("mz_binning_width"));
810  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
811 
812  unsigned progress = 0;
813  ProgressLogger logger;
814  std::stringstream progress_message;
815  progress_message << "averaging centroid spectra of MS level " << ms_level;
816  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
817 
818  // loop over blocks
819  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
820  {
821  logger.setProgress(++progress);
822 
823  // collect peaks from all spectra
824  // loop over spectra in blocks
825  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
826  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
827  {
828  // loop over m/z positions
829  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
830  {
831  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second)); // m/z, intensity * weight
832  mz_intensity_all.push_back(mz_intensity);
833  }
834  }
835 
836  sort(mz_intensity_all.begin(), mz_intensity_all.end(), SpectraMerger::compareByFirst);
837 
838  // generate new spectrum
839  std::vector<double> mz_new;
840  std::vector<double> intensity_new;
841  double last_mz = std::numeric_limits<double>::min();
842  double delta_mz = mz_binning_width;
843  double sum_mz(0);
844  double sum_intensity(0);
845  Size count(0);
846  for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
847  {
848  if (mz_binning_unit == "ppm")
849  {
850  delta_mz = mz_binning_width * (it_mz->first) / 1000000;
851  }
852 
853  if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
854  {
855  mz_new.push_back(sum_mz / count);
856  intensity_new.push_back(sum_intensity); // intensities already weighted
857 
858  sum_mz = 0;
859  sum_intensity = 0;
860 
861  last_mz = it_mz->first;
862  count = 0;
863  }
864 
865  sum_mz += it_mz->first;
866  sum_intensity += it_mz->second;
867  ++count;
868  }
869  if (count > 0)
870  {
871  mz_new.push_back(sum_mz / count);
872  intensity_new.push_back(sum_intensity); // intensities already weighted
873  }
874 
875  // update spectrum
876  typename MapType::SpectrumType average_spec = exp[it->first];
877  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
878  //average_spec.setMSLevel(ms_level);
879 
880  // refill spectrum
881  for (Size i = 0; i < mz_new.size(); ++i)
882  {
883  typename MapType::PeakType peak;
884  peak.setMZ(mz_new[i]);
885  peak.setIntensity(intensity_new[i]);
886  average_spec.push_back(peak);
887  }
888 
889  // store spectrum temporarily
890  exp_tmp.addSpectrum(average_spec);
891 
892  }
893 
894  logger.endProgress();
895 
896  // loop over blocks
897  int n(0);
898  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
899  {
900  exp[it->first] = exp_tmp[n];
901  ++n;
902  }
903 
904  }
905 
909  bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
910  {
911  return i.first < j.first;
912  }
913 
914  };
915 
916 }
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
A basic LC-MS feature.
Definition: BaseFeature.h:58
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
Illegal self operation exception.
Definition: Exception.h:372
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Not all required information provided.
Definition: Exception.h:189
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin()
Definition: MSExperiment.h:157
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:167
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Size size() const
Definition: MSExperiment.h:127
Base::const_iterator const_iterator
Definition: MSExperiment.h:125
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:71
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
Base::const_iterator ConstIterator
Definition: Map.h:81
Base::iterator Iterator
Definition: Map.h:80
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:104
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:113
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:59
Definition: SpectraMerger.h:77
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:93
SpectraDistance_()
Definition: SpectraMerger.h:79
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:100
double mz_max_
Definition: SpectraMerger.h:119
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:87
double rt_max_
Definition: SpectraMerger.h:118
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:909
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:805
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:126
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:204
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:129
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:692
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:493
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:153
void average(MapType &exp, const String &average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:307
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:69
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:112
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:62
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:120
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47