OpenMS
SpectraMerger.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Chris Bielow $
6 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
7 // --------------------------------------------------------------------------
8 //
9 #pragma once
10 
23 
24 #include <vector>
25 
26 namespace OpenMS
27 {
28 
37  class OPENMS_DLLAPI SpectraMerger :
38  public DefaultParamHandler, public ProgressLogger
39  {
40 
41 protected:
42 
43  /* Determine distance between two spectra
44 
45  Distance is determined as
46 
47  (d_rt/rt_max_ + d_mz/mz_max_) / 2
48 
49  */
51  public DefaultParamHandler
52  {
53 public:
55  DefaultParamHandler("SpectraDistance")
56  {
57  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
58  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
59  defaultsToParam_(); // calls updateMembers_
60  }
61 
62  void updateMembers_() override
63  {
64  rt_max_ = (double) param_.getValue("rt_tolerance");
65  mz_max_ = (double) param_.getValue("mz_tolerance");
66  }
67 
68  double getSimilarity(const double d_rt, const double d_mz) const
69  {
70  // 1 - distance
71  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
72  }
73 
74  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
75  double operator()(const BaseFeature& first, const BaseFeature& second) const
76  {
77  // get RT distance:
78  double d_rt = fabs(first.getRT() - second.getRT());
79  double d_mz = fabs(first.getMZ() - second.getMZ());
80 
81  if (d_rt > rt_max_ || d_mz > mz_max_)
82  {
83  return 0;
84  }
85 
86  // calculate similarity (0-1):
87  double sim = getSimilarity(d_rt, d_mz);
88 
89  return sim;
90  }
91 
92 protected:
93  double rt_max_;
94  double mz_max_;
95 
96  }; // end of SpectraDistance
97 
98 public:
99 
101  typedef std::map<Size, std::vector<Size> > MergeBlocks;
102 
104  typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
105 
106  // @name Constructors and Destructors
107  // @{
110 
112  SpectraMerger(const SpectraMerger& source);
113 
115  SpectraMerger(SpectraMerger&& source) = default;
116 
118  ~SpectraMerger() override;
119  // @}
120 
121  // @name Operators
122  // @{
125 
127  SpectraMerger& operator=(SpectraMerger&& source) = default;
128  // @}
129 
130  // @name Merging functions
131  // @{
133  template <typename MapType>
135  {
136  IntList ms_levels = param_.getValue("block_method:ms_levels");
137  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
138  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
139 
140  if (rt_max_length == 0) // no rt restriction set?
141  {
142  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
143  }
144 
145  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
146  {
147  MergeBlocks spectra_to_merge;
148  Size idx_block(0);
149  SignedSize block_size_count(rt_block_size + 1);
150  Size idx_spectrum(0);
151  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
152  {
153  if (Int(it1->getMSLevel()) == *it_mslevel)
154  {
155  // block full if it contains a maximum number of scans or if maximum rt length spanned
156  if (++block_size_count >= rt_block_size ||
157  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
158  {
159  block_size_count = 0;
160  idx_block = idx_spectrum;
161  }
162  else
163  {
164  spectra_to_merge[idx_block].push_back(idx_spectrum);
165  }
166  }
167 
168  ++idx_spectrum;
169  }
170  // check if last block had sacrifice spectra
171  if (block_size_count == 0) //block just got initialized
172  {
173  spectra_to_merge[idx_block] = std::vector<Size>();
174  }
175 
176  // merge spectra, remove all old MS spectra and add new consensus spectra
177  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
178  }
179 
180  exp.sortSpectra();
181  }
182 
184  template <typename MapType>
186  {
187 
188  // convert spectra's precursors to clusterizable data
189  Size data_size;
190  std::vector<BinaryTreeNode> tree;
191  std::map<Size, Size> index_mapping;
192  // local scope to save memory - we do not need the clustering stuff later
193  {
194  std::vector<BaseFeature> data;
195 
196  for (Size i = 0; i < exp.size(); ++i)
197  {
198  if (exp[i].getMSLevel() != 2)
199  {
200  continue;
201  }
202 
203  // remember which index in distance data ==> experiment index
204  index_mapping[data.size()] = i;
205 
206  // make cluster element
207  BaseFeature bf;
208  bf.setRT(exp[i].getRT());
209  const auto& pcs = exp[i].getPrecursors();
210  // keep the first Precursor
211  if (pcs.empty())
212  {
213  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
214  }
215  if (pcs.size() > 1)
216  {
217  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
218  }
219  bf.setMZ(pcs[0].getMZ());
220  data.push_back(bf);
221  }
222 
223  data_size = data.size();
224  SpectraDistance_ llc;
225  llc.setParameters(param_.copy("precursor_method:", true));
226  SingleLinkage sl;
227  DistanceMatrix<float> dist; // will be filled
229 
230  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
231  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
232  }
233 
234  // extract the clusters
235  ClusterAnalyzer ca;
236  std::vector<std::vector<Size> > clusters;
237  // count number of real tree nodes (not the -1 ones):
238  Size node_count = 0;
239  for (Size ii = 0; ii < tree.size(); ++ii)
240  {
241  if (tree[ii].distance >= 1)
242  {
243  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
244  }
245  if (tree[ii].distance != -1)
246  {
247  ++node_count;
248  }
249  }
250  ca.cut(data_size - node_count, tree, clusters);
251 
252  // convert to blocks
253  MergeBlocks spectra_to_merge;
254 
255  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
256  {
257  if (clusters[i_outer].size() <= 1)
258  {
259  continue;
260  }
261  // init block with first cluster element
262  Size cl_index0 = clusters[i_outer][0];
263  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
264  // add all other elements
265  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
266  {
267  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
268  }
269  }
270 
271  // do it
272  mergeSpectra_(exp, spectra_to_merge, 2);
273 
274  exp.sortSpectra();
275  }
276 
277 
285  template <typename MapType>
286  void average(MapType& exp, const String& average_type, int ms_level = -1)
287  {
288  // MS level to be averaged
289  if (ms_level < 0)
290  {
291  ms_level = param_.getValue("average_gaussian:ms_level");
292  if (average_type == "tophat")
293  {
294  ms_level = param_.getValue("average_tophat:ms_level");
295  }
296  }
297 
298  // spectrum type (profile, centroid or automatic)
299  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
300  if (average_type == "tophat")
301  {
302  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
303  }
304 
305  // parameters for Gaussian averaging
306  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
307  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
308  double cutoff(param_.getValue("average_gaussian:cutoff"));
309  //double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
310  //int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
311 
312  // parameters for Top-Hat averaging
313  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
314  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
315  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
316  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
317  if ((range_scans % 2) == 0)
318  {
319  ++range_scans;
320  }
321  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
322 
323  AverageBlocks spectra_to_average_over;
324 
325  // loop over RT
326  int n(0); // spectrum index
327  int cntr(0); // spectrum counter
328  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
329  {
330  if (Int(it_rt->getMSLevel()) == ms_level)
331  {
332  int m; // spectrum index
333  int steps;
334  bool terminate_now;
335  typename MapType::const_iterator it_rt_2;
336 
337  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
338  steps = 0;
339  m = n + 1;
340  it_rt_2 = it_rt + 1;
341  terminate_now = false;
342  while (it_rt_2 != exp.end() && !terminate_now)
343  {
344  if (Int(it_rt_2->getMSLevel()) == ms_level)
345  {
346  double weight = 1;
347  if (average_type == "gaussian")
348  {
349  //factor * (rt_2 -rt)^2
350  double base = it_rt_2->getRT() - it_rt->getRT();
351  weight = std::exp(factor * base * base);
352  }
353  std::pair<Size, double> p(m, weight);
354  spectra_to_average_over[n].push_back(p);
355 
356  ++steps;
357  }
358  if (average_type == "gaussian")
359  {
360  // Gaussian
361  double base = it_rt_2->getRT() - it_rt->getRT();
362  terminate_now = std::exp(factor * base * base) < cutoff;
363  }
364  else if (unit)
365  {
366  // Top-Hat with RT unit = scans
367  terminate_now = (steps > range_scans);
368  }
369  else
370  {
371  // Top-Hat with RT unit = seconds
372  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
373  }
374  ++m;
375  ++it_rt_2;
376  }
377 
378  // go backward
379  steps = 0;
380  m = n;
381  it_rt_2 = it_rt;
382  terminate_now = false;
383  while (it_rt_2 != exp.begin() && !terminate_now)
384  {
385  if (Int(it_rt_2->getMSLevel()) == ms_level)
386  {
387  double weight = 1;
388  if (average_type == "gaussian")
389  {
390  double base = it_rt_2->getRT() - it_rt->getRT();
391  weight = std::exp(factor * base * base);
392  }
393  std::pair<Size, double> p(m, weight);
394  spectra_to_average_over[n].push_back(p);
395 
396  ++steps;
397  }
398  if (average_type == "gaussian")
399  {
400  // Gaussian
401  double base = it_rt_2->getRT() - it_rt->getRT();
402  terminate_now = std::exp(factor * base * base) < cutoff;
403  }
404  else if (unit)
405  {
406  // Top-Hat with RT unit = scans
407  terminate_now = (steps > range_scans);
408  }
409  else
410  {
411  // Top-Hat with RT unit = seconds
412  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
413  }
414  --m;
415  --it_rt_2;
416  }
417  ++cntr;
418  }
419  ++n;
420  }
421 
422  if (cntr == 0)
423  {
424  //return;
425  throw Exception::InvalidParameter(__FILE__,
426  __LINE__,
427  OPENMS_PRETTY_FUNCTION,
428  "Input mzML does not have any spectra of MS level specified by ms_level.");
429  }
430 
431  // normalize weights
432  for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
433  {
434  double sum(0.0);
435  for (const auto& weight: it->second)
436  {
437  sum += weight.second;
438  }
439 
440  for (auto& weight: it->second)
441  {
442  weight.second /= sum;
443  }
444  }
445 
446  // determine type of spectral data (profile or centroided)
448  if (spectrum_type == "automatic")
449  {
450  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
451  type = exp[idx].getType(true);
452  }
453  else if (spectrum_type == "profile")
454  {
456  }
457  else if (spectrum_type == "centroid")
458  {
460  }
461  else
462  {
463  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
464  }
465 
466  // generate new spectra
467  if (type == SpectrumSettings::CENTROID)
468  {
469  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
470  }
471  else
472  {
473  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
474  }
475 
476  exp.sortSpectra();
477  }
478 
479  // @}
480 
481 protected:
482 
493  template <typename MapType>
494  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
495  {
496  double mz_binning_width(param_.getValue("mz_binning_width"));
497  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
498 
499  // merge spectra
500  MapType merged_spectra;
501 
502  std::map<Size, Size> cluster_sizes;
503  std::set<Size> merged_indices;
504 
505  // set up alignment
506  SpectrumAlignment sas;
507  Param p;
508  p.setValue("tolerance", mz_binning_width);
509  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
510  {
511  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
512  }
513 
514  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
515  sas.setParameters(p);
516  std::vector<std::pair<Size, Size> > alignment;
517 
518  Size count_peaks_aligned(0);
519  Size count_peaks_overall(0);
520 
521  // each BLOCK
522  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
523  {
524  ++cluster_sizes[it->second.size() + 1]; // for stats
525 
526  typename MapType::SpectrumType consensus_spec = exp[it->first];
527  consensus_spec.setMSLevel(ms_level);
528 
529  merged_indices.insert(it->first);
530 
531  double rt_average = consensus_spec.getRT();
532  double precursor_mz_average = 0.0;
533  Size precursor_count(0);
534  if (!consensus_spec.getPrecursors().empty())
535  {
536  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
537  ++precursor_count;
538  }
539 
540  count_peaks_overall += consensus_spec.size();
541 
542  String consensus_native_id = consensus_spec.getNativeID();
543 
544  // block elements
545  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
546  {
547  consensus_spec.unify(exp[*sit]); // append meta info
548  merged_indices.insert(*sit);
549 
550  rt_average += exp[*sit].getRT();
551  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
552  {
553  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
554  ++precursor_count;
555  }
556 
557  // add native ID to consensus native ID, comma separated
558  consensus_native_id += ",";
559  consensus_native_id += exp[*sit].getNativeID();
560 
561  // merge data points
562  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
563  count_peaks_aligned += alignment.size();
564  count_peaks_overall += exp[*sit].size();
565 
566  Size align_index(0);
567  Size spec_b_index(0);
568 
569  // sanity check for number of peaks
570  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
571  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
572  {
573  if (alignment.empty() || alignment[align_index].second != spec_b_index)
574  // ... add unaligned peak
575  {
576  consensus_spec.push_back(*pit);
577  }
578  // or add aligned peak height to ALL corresponding existing peaks
579  else
580  {
581  Size counter(0);
582  Size copy_of_align_index(align_index);
583 
584  while (!alignment.empty() &&
585  copy_of_align_index < alignment.size() &&
586  alignment[copy_of_align_index].second == spec_b_index)
587  {
588  ++copy_of_align_index;
589  ++counter;
590  } // Count the number of peaks in a which correspond to a single b peak.
591 
592  while (!alignment.empty() &&
593  align_index < alignment.size() &&
594  alignment[align_index].second == spec_b_index)
595  {
596  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
597  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
598  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
599  if (align_index == alignment.size())
600  {
601  alignment.clear(); // end reached -> avoid going into this block again
602  }
603  }
604  align_size = align_size + 1 - counter; //Decrease align_size by number of
605  }
606  ++spec_b_index;
607  }
608  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
609  if (spec_a + spec_b - align_size != consensus_spec.size())
610  {
611  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
612  }
613  }
614  rt_average /= it->second.size() + 1;
615  consensus_spec.setRT(rt_average);
616 
617  // set new consensus native ID
618  consensus_spec.setNativeID(consensus_native_id);
619 
620  if (ms_level >= 2)
621  {
622  if (precursor_count)
623  {
624  precursor_mz_average /= precursor_count;
625  }
626  auto& pcs = consensus_spec.getPrecursors();
627  pcs.resize(1);
628  pcs[0].setMZ(precursor_mz_average);
629  consensus_spec.setPrecursors(pcs);
630  }
631 
632  if (consensus_spec.empty())
633  {
634  continue;
635  }
636  else
637  {
638  merged_spectra.addSpectrum(std::move(consensus_spec));
639  }
640  }
641 
642  OPENMS_LOG_INFO << "Cluster sizes:\n";
643  for (const auto& cl_size : cluster_sizes)
644  {
645  OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
646  }
647 
648  char buffer[200];
649  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
650  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
651  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
652 
653  // remove all spectra that were within a cluster
654  typename MapType::SpectrumType empty_spec;
655  MapType exp_tmp;
656  for (Size i = 0; i < exp.size(); ++i)
657  {
658  if (merged_indices.count(i) == 0) // save unclustered ones
659  {
660  exp_tmp.addSpectrum(exp[i]);
661  exp[i] = empty_spec;
662  }
663  }
664 
665  //Meta_Data will not be cleared
666  exp.clear(false);
667  exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
668  std::make_move_iterator(exp_tmp.end()));
669 
670  // ... and add consensus spectra
671  exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
672  std::make_move_iterator(merged_spectra.end()));
673 
674  }
675 
696  template <typename MapType>
697  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
698  {
699  MapType exp_tmp; // temporary experiment for averaged spectra
700 
701  double mz_binning_width(param_.getValue("mz_binning_width"));
702  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
703 
704  unsigned progress = 0;
705  std::stringstream progress_message;
706  progress_message << "averaging profile spectra of MS level " << ms_level;
707  startProgress(0, spectra_to_average_over.size(), progress_message.str());
708 
709  // loop over blocks
710  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
711  {
712  setProgress(++progress);
713 
714  // loop over spectra in blocks
715  std::vector<double> mz_positions_all; // m/z positions from all spectra
716  for (const auto& spec : it->second)
717  {
718  // loop over m/z positions
719  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
720  {
721  mz_positions_all.push_back(it_mz->getMZ());
722  }
723  }
724 
725  sort(mz_positions_all.begin(), mz_positions_all.end());
726 
727  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
728  std::vector<double> intensities;
729  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
730  double delta_mz(mz_binning_width); // for m/z unit Da
731  for (const auto mz_pos : mz_positions_all)
732  {
733  if (mz_binning_unit == "ppm")
734  {
735  delta_mz = mz_binning_width * mz_pos / 1000000;
736  }
737 
738  if ((mz_pos - last_mz) > delta_mz)
739  {
740  mz_positions.push_back(mz_pos);
741  intensities.push_back(0.0);
742  last_mz = mz_pos;
743  }
744  }
745 
746  // loop over spectra in blocks
747  for (const auto& spec : it->second)
748  {
749  SplineInterpolatedPeaks spline(exp[spec.first]);
751 
752  // loop over m/z positions
753  for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
754  {
755  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
756  {
757  intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
758  }
759  }
760  }
761 
762  // update spectrum
763  typename MapType::SpectrumType average_spec = exp[it->first];
764  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
765 
766  // refill spectrum
767  for (Size i = 0; i < mz_positions.size(); ++i)
768  {
769  typename MapType::PeakType peak;
770  peak.setMZ(mz_positions[i]);
771  peak.setIntensity(intensities[i]);
772  average_spec.push_back(peak);
773  }
774 
775  // store spectrum temporarily
776  exp_tmp.addSpectrum(std::move(average_spec));
777  }
778 
779  endProgress();
780 
781  // loop over blocks
782  int n(0);
783  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
784  {
785  exp[it->first] = exp_tmp[n];
786  ++n;
787  }
788  }
789 
805  template <typename MapType>
806  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
807  {
808  MapType exp_tmp; // temporary experiment for averaged spectra
809 
810  double mz_binning_width(param_.getValue("mz_binning_width"));
811  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
812 
813  unsigned progress = 0;
814  ProgressLogger logger;
815  std::stringstream progress_message;
816  progress_message << "averaging centroid spectra of MS level " << ms_level;
817  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
818 
819  // loop over blocks
820  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
821  {
822  logger.setProgress(++progress);
823 
824  // collect peaks from all spectra
825  // loop over spectra in blocks
826  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
827  for (const auto& weightedMZ: it->second)
828  {
829  // loop over m/z positions
830  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
831  {
832  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
833  mz_intensity_all.push_back(mz_intensity);
834  }
835  }
836 
837  sort(mz_intensity_all.begin(), mz_intensity_all.end());
838 
839  // generate new spectrum
840  std::vector<double> mz_new;
841  std::vector<double> intensity_new;
842  double last_mz = std::numeric_limits<double>::min();
843  double delta_mz = mz_binning_width;
844  double sum_mz(0);
845  double sum_intensity(0);
846  Size count(0);
847  for (const auto& mz_pos : mz_intensity_all)
848  {
849  if (mz_binning_unit == "ppm")
850  {
851  delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
852  }
853 
854  if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
855  {
856  mz_new.push_back(sum_mz / count);
857  intensity_new.push_back(sum_intensity); // intensities already weighted
858 
859  sum_mz = 0;
860  sum_intensity = 0;
861 
862  last_mz = mz_pos.first;
863  count = 0;
864  }
865 
866  sum_mz += mz_pos.first;
867  sum_intensity += mz_pos.second;
868  ++count;
869  }
870  if (count > 0)
871  {
872  mz_new.push_back(sum_mz / count);
873  intensity_new.push_back(sum_intensity); // intensities already weighted
874  }
875 
876  // update spectrum
877  typename MapType::SpectrumType average_spec = exp[it->first];
878  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
879 
880  // refill spectrum
881  for (Size i = 0; i < mz_new.size(); ++i)
882  {
883  typename MapType::PeakType peak;
884  peak.setMZ(mz_new[i]);
885  peak.setIntensity(intensity_new[i]);
886  average_spec.push_back(peak);
887  }
888 
889  // store spectrum temporarily
890  exp_tmp.addSpectrum(std::move(average_spec));
891  }
892 
893  logger.endProgress();
894 
895  // loop over blocks
896  int n(0);
897  for (const auto& spectral_index : spectra_to_average_over)
898  {
899  exp[spectral_index.first] = std::move(exp_tmp[n]);
900  ++n;
901  }
902  }
903  };
904 }
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:444
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:449
A basic LC-MS feature.
Definition: BaseFeature.h:33
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:26
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:37
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:84
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:66
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:42
Illegal self operation exception.
Definition: Exception.h:328
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:299
Not all required information provided.
Definition: Exception.h:155
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin() noexcept
Definition: MSExperiment.h:156
Size size() const noexcept
The number of spectra.
Definition: MSExperiment.h:121
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:171
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition: MSExperiment.h:91
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:79
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:44
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:28
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:84
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:93
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:172
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:178
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:190
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:184
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:27
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress(UInt64 bytes_processed=0) const
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:31
Definition: SpectraMerger.h:52
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:68
SpectraDistance_()
Definition: SpectraMerger.h:54
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:75
double mz_max_
Definition: SpectraMerger.h:94
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:62
double rt_max_
Definition: SpectraMerger.h:93
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:39
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:286
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:806
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:104
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:185
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:697
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:494
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:101
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:134
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:43
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:62
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:45
@ PROFILE
profile data
Definition: SpectrumSettings.h:48
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:47
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:84
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:34
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:104
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:29
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:81
FLASHIda C++ to C# (or vice versa) bridge functions The functions here are called in C# to invoke fun...
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19