OpenMS
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
49 
50 #include <vector>
51 
52 namespace OpenMS
53 {
54 
63  class OPENMS_DLLAPI SpectraMerger :
64  public DefaultParamHandler, public ProgressLogger
65  {
66 
67 protected:
68 
69  /* Determine distance between two spectra
70 
71  Distance is determined as
72 
73  (d_rt/rt_max_ + d_mz/mz_max_) / 2
74 
75  */
77  public DefaultParamHandler
78  {
79 public:
81  DefaultParamHandler("SpectraDistance")
82  {
83  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
84  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
85  defaultsToParam_(); // calls updateMembers_
86  }
87 
88  void updateMembers_() override
89  {
90  rt_max_ = (double) param_.getValue("rt_tolerance");
91  mz_max_ = (double) param_.getValue("mz_tolerance");
92  }
93 
94  double getSimilarity(const double d_rt, const double d_mz) const
95  {
96  // 1 - distance
97  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
98  }
99 
100  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
101  double operator()(const BaseFeature& first, const BaseFeature& second) const
102  {
103  // get RT distance:
104  double d_rt = fabs(first.getRT() - second.getRT());
105  double d_mz = fabs(first.getMZ() - second.getMZ());
106 
107  if (d_rt > rt_max_ || d_mz > mz_max_)
108  {
109  return 0;
110  }
111 
112  // calculate similarity (0-1):
113  double sim = getSimilarity(d_rt, d_mz);
114 
115  return sim;
116  }
117 
118 protected:
119  double rt_max_;
120  double mz_max_;
121 
122  }; // end of SpectraDistance
123 
124 public:
125 
127  typedef std::map<Size, std::vector<Size> > MergeBlocks;
128 
130  typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
131 
132  // @name Constructors and Destructors
133  // @{
136 
138  SpectraMerger(const SpectraMerger& source);
139 
141  SpectraMerger(SpectraMerger&& source) = default;
142 
144  ~SpectraMerger() override;
145  // @}
146 
147  // @name Operators
148  // @{
151 
153  SpectraMerger& operator=(SpectraMerger&& source) = default;
154  // @}
155 
156  // @name Merging functions
157  // @{
159  template <typename MapType>
161  {
162  IntList ms_levels = param_.getValue("block_method:ms_levels");
163  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
164  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
165 
166  if (rt_max_length == 0) // no rt restriction set?
167  {
168  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
169  }
170 
171  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
172  {
173  MergeBlocks spectra_to_merge;
174  Size idx_block(0);
175  SignedSize block_size_count(rt_block_size + 1);
176  Size idx_spectrum(0);
177  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
178  {
179  if (Int(it1->getMSLevel()) == *it_mslevel)
180  {
181  // block full if it contains a maximum number of scans or if maximum rt length spanned
182  if (++block_size_count >= rt_block_size ||
183  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
184  {
185  block_size_count = 0;
186  idx_block = idx_spectrum;
187  }
188  else
189  {
190  spectra_to_merge[idx_block].push_back(idx_spectrum);
191  }
192  }
193 
194  ++idx_spectrum;
195  }
196  // check if last block had sacrifice spectra
197  if (block_size_count == 0) //block just got initialized
198  {
199  spectra_to_merge[idx_block] = std::vector<Size>();
200  }
201 
202  // merge spectra, remove all old MS spectra and add new consensus spectra
203  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
204  }
205 
206  exp.sortSpectra();
207  }
208 
210  template <typename MapType>
212  {
213 
214  // convert spectra's precursors to clusterizable data
215  Size data_size;
216  std::vector<BinaryTreeNode> tree;
217  std::map<Size, Size> index_mapping;
218  // local scope to save memory - we do not need the clustering stuff later
219  {
220  std::vector<BaseFeature> data;
221 
222  for (Size i = 0; i < exp.size(); ++i)
223  {
224  if (exp[i].getMSLevel() != 2)
225  {
226  continue;
227  }
228 
229  // remember which index in distance data ==> experiment index
230  index_mapping[data.size()] = i;
231 
232  // make cluster element
233  BaseFeature bf;
234  bf.setRT(exp[i].getRT());
235  const auto& pcs = exp[i].getPrecursors();
236  // keep the first Precursor
237  if (pcs.empty())
238  {
239  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
240  }
241  if (pcs.size() > 1)
242  {
243  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
244  }
245  bf.setMZ(pcs[0].getMZ());
246  data.push_back(bf);
247  }
248  data_size = data.size();
249 
250  SpectraDistance_ llc;
251  llc.setParameters(param_.copy("precursor_method:", true));
252  SingleLinkage sl;
253  DistanceMatrix<float> dist; // will be filled
255 
256  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
257  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
258  }
259 
260  // extract the clusters
261  ClusterAnalyzer ca;
262  std::vector<std::vector<Size> > clusters;
263  // count number of real tree nodes (not the -1 ones):
264  Size node_count = 0;
265  for (Size ii = 0; ii < tree.size(); ++ii)
266  {
267  if (tree[ii].distance >= 1)
268  {
269  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
270  }
271  if (tree[ii].distance != -1)
272  {
273  ++node_count;
274  }
275  }
276  ca.cut(data_size - node_count, tree, clusters);
277 
278  // convert to blocks
279  MergeBlocks spectra_to_merge;
280 
281  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
282  {
283  if (clusters[i_outer].size() <= 1)
284  {
285  continue;
286  }
287  // init block with first cluster element
288  Size cl_index0 = clusters[i_outer][0];
289  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
290  // add all other elements
291  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
292  {
293  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
294  }
295  }
296 
297  // do it
298  mergeSpectra_(exp, spectra_to_merge, 2);
299 
300  exp.sortSpectra();
301  }
302 
311  static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
312  {
313  if (mz1 == mz2 || tol_ppm <= 0)
314  {
315  return true;
316  }
317 
318  const int min_c = 1;
319  const int max_iso_diff = 5; // maximum charge difference 5 is more than enough
320  const double max_charge_diff_ratio = 3.0; // maximum ratio between charges (large / small charge)
321 
322  for (int c1 = min_c; c1 <= max_c; ++c1)
323  {
324  double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
325 
326  for (int c2 = min_c; c2 <= max_c; ++c2)
327  {
328  if (c1 / c2 > max_charge_diff_ratio)
329  {
330  continue;
331  }
332  if (c2 / c1 > max_charge_diff_ratio)
333  {
334  break;
335  }
336 
337  double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
338 
339  if (fabs(mass1 - mass2) > max_iso_diff)
340  {
341  continue;
342  }
343  for (int i = -max_iso_diff; i <= max_iso_diff; ++i)
344  {
345  if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
346  {
347  return true;
348  }
349  }
350  }
351  }
352  return false;
353  }
354 
362  template <typename MapType>
363  void average(MapType& exp, const String& average_type, int ms_level = -1)
364  {
365  // MS level to be averaged
366  if (ms_level < 0)
367  {
368  ms_level = param_.getValue("average_gaussian:ms_level");
369  if (average_type == "tophat")
370  {
371  ms_level = param_.getValue("average_tophat:ms_level");
372  }
373  }
374 
375  // spectrum type (profile, centroid or automatic)
376  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
377  if (average_type == "tophat")
378  {
379  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
380  }
381 
382  // parameters for Gaussian averaging
383  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
384  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
385  double cutoff(param_.getValue("average_gaussian:cutoff"));
386  double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
387  int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
388 
389  // parameters for Top-Hat averaging
390  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
391  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
392  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
393  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
394  if ((range_scans % 2) == 0)
395  {
396  ++range_scans;
397  }
398  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
399 
400  AverageBlocks spectra_to_average_over;
401 
402  // loop over RT
403  int n(0); // spectrum index
404  int cntr(0); // spectrum counter
405  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
406  {
407  if (Int(it_rt->getMSLevel()) == ms_level)
408  {
409  int m; // spectrum index
410  int steps;
411  bool terminate_now;
412  typename MapType::const_iterator it_rt_2;
413 
414  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
415  steps = 0;
416  m = n + 1;
417  it_rt_2 = it_rt + 1;
418  terminate_now = false;
419  while (it_rt_2 != exp.end() && !terminate_now)
420  {
421  if (Int(it_rt_2->getMSLevel()) == ms_level)
422  {
423  bool add = true;
424  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
425  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
426  it_rt_2->getPrecursors().size() > 0)
427  {
428  double mz1 = it_rt->getPrecursors()[0].getMZ();
429  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
430  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
431  }
432 
433  if (add)
434  {
435  double weight = 1;
436  if (average_type == "gaussian")
437  {
438  //factor * (rt_2 -rt)^2
439  double base = it_rt_2->getRT() - it_rt->getRT();
440  weight = std::exp(factor * base * base);
441  }
442  std::pair<Size, double> p(m, weight);
443  spectra_to_average_over[n].push_back(p);
444  }
445  ++steps;
446  }
447  if (average_type == "gaussian")
448  {
449  // Gaussian
450  double base = it_rt_2->getRT() - it_rt->getRT();
451  terminate_now = std::exp(factor * base * base) < cutoff;
452  }
453  else if (unit)
454  {
455  // Top-Hat with RT unit = scans
456  terminate_now = (steps > range_scans);
457  }
458  else
459  {
460  // Top-Hat with RT unit = seconds
461  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
462  }
463  ++m;
464  ++it_rt_2;
465  }
466 
467  // go backward
468  steps = 0;
469  m = n;
470  it_rt_2 = it_rt;
471  terminate_now = false;
472  while (it_rt_2 != exp.begin() && !terminate_now)
473  {
474  if (Int(it_rt_2->getMSLevel()) == ms_level)
475  {
476  bool add = true;
477  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
478  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
479  it_rt_2->getPrecursors().size() > 0)
480  {
481  double mz1 = it_rt->getPrecursors()[0].getMZ();
482  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
483  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
484  }
485  if (add)
486  {
487  double weight = 1;
488  if (average_type == "gaussian")
489  {
490  double base = it_rt_2->getRT() - it_rt->getRT();
491  weight = std::exp(factor * base * base);
492  }
493  std::pair<Size, double> p(m, weight);
494  spectra_to_average_over[n].push_back(p);
495  }
496  ++steps;
497  }
498  if (average_type == "gaussian")
499  {
500  // Gaussian
501  double base = it_rt_2->getRT() - it_rt->getRT();
502  terminate_now = std::exp(factor * base * base) < cutoff;
503  }
504  else if (unit)
505  {
506  // Top-Hat with RT unit = scans
507  terminate_now = (steps > range_scans);
508  }
509  else
510  {
511  // Top-Hat with RT unit = seconds
512  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
513  }
514  --m;
515  --it_rt_2;
516  }
517  ++cntr;
518  }
519  ++n;
520  }
521 
522  if (cntr == 0)
523  {
524  //return;
525  throw Exception::InvalidParameter(__FILE__,
526  __LINE__,
527  OPENMS_PRETTY_FUNCTION,
528  "Input mzML does not have any spectra of MS level specified by ms_level.");
529  }
530 
531  // normalize weights
532  for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
533  {
534  double sum(0.0);
535  for (const auto& weight: it->second)
536  {
537  sum += weight.second;
538  }
539 
540  for (auto& weight: it->second)
541  {
542  weight.second /= sum;
543  }
544  }
545 
546  // determine type of spectral data (profile or centroided)
548  if (spectrum_type == "automatic")
549  {
550  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
551  type = exp[idx].getType(true);
552  }
553  else if (spectrum_type == "profile")
554  {
556  }
557  else if (spectrum_type == "centroid")
558  {
560  }
561  else
562  {
563  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
564  }
565 
566  // generate new spectra
567  if (type == SpectrumSettings::CENTROID)
568  {
569  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
570  }
571  else
572  {
573  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
574  }
575 
576  exp.sortSpectra();
577  }
578 
579  // @}
580 
581 protected:
582 
593  template <typename MapType>
594  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
595  {
596  double mz_binning_width(param_.getValue("mz_binning_width"));
597  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
598 
599  // merge spectra
600  MapType merged_spectra;
601 
602  std::map<Size, Size> cluster_sizes;
603  std::set<Size> merged_indices;
604 
605  // set up alignment
606  SpectrumAlignment sas;
607  Param p;
608  p.setValue("tolerance", mz_binning_width);
609  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
610  {
611  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
612  }
613 
614  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
615  sas.setParameters(p);
616  std::vector<std::pair<Size, Size> > alignment;
617 
618  Size count_peaks_aligned(0);
619  Size count_peaks_overall(0);
620 
621  // each BLOCK
622  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
623  {
624  ++cluster_sizes[it->second.size() + 1]; // for stats
625 
626  typename MapType::SpectrumType consensus_spec = exp[it->first];
627  consensus_spec.setMSLevel(ms_level);
628 
629  merged_indices.insert(it->first);
630 
631  double rt_average = consensus_spec.getRT();
632  double precursor_mz_average = 0.0;
633  Size precursor_count(0);
634  if (!consensus_spec.getPrecursors().empty())
635  {
636  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
637  ++precursor_count;
638  }
639 
640  count_peaks_overall += consensus_spec.size();
641 
642  String consensus_native_id = consensus_spec.getNativeID();
643 
644  // block elements
645  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
646  {
647  consensus_spec.unify(exp[*sit]); // append meta info
648  merged_indices.insert(*sit);
649 
650  rt_average += exp[*sit].getRT();
651  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
652  {
653  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
654  ++precursor_count;
655  }
656 
657  // add native ID to consensus native ID, comma separated
658  consensus_native_id += ",";
659  consensus_native_id += exp[*sit].getNativeID();
660 
661  // merge data points
662  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
663  count_peaks_aligned += alignment.size();
664  count_peaks_overall += exp[*sit].size();
665 
666  Size align_index(0);
667  Size spec_b_index(0);
668 
669  // sanity check for number of peaks
670  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
671  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
672  {
673  if (alignment.empty() || alignment[align_index].second != spec_b_index)
674  // ... add unaligned peak
675  {
676  consensus_spec.push_back(*pit);
677  }
678  // or add aligned peak height to ALL corresponding existing peaks
679  else
680  {
681  Size counter(0);
682  Size copy_of_align_index(align_index);
683 
684  while (!alignment.empty() &&
685  copy_of_align_index < alignment.size() &&
686  alignment[copy_of_align_index].second == spec_b_index)
687  {
688  ++copy_of_align_index;
689  ++counter;
690  } // Count the number of peaks in a which correspond to a single b peak.
691 
692  while (!alignment.empty() &&
693  align_index < alignment.size() &&
694  alignment[align_index].second == spec_b_index)
695  {
696  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
697  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
698  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
699  if (align_index == alignment.size())
700  {
701  alignment.clear(); // end reached -> avoid going into this block again
702  }
703  }
704  align_size = align_size + 1 - counter; //Decrease align_size by number of
705  }
706  ++spec_b_index;
707  }
708  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
709  if (spec_a + spec_b - align_size != consensus_spec.size())
710  {
711  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
712  }
713  }
714  rt_average /= it->second.size() + 1;
715  consensus_spec.setRT(rt_average);
716 
717  // set new consensus native ID
718  consensus_spec.setNativeID(consensus_native_id);
719 
720  if (ms_level >= 2)
721  {
722  if (precursor_count)
723  {
724  precursor_mz_average /= precursor_count;
725  }
726  auto& pcs = consensus_spec.getPrecursors();
727  pcs.resize(1);
728  pcs[0].setMZ(precursor_mz_average);
729  consensus_spec.setPrecursors(pcs);
730  }
731 
732  if (consensus_spec.empty())
733  {
734  continue;
735  }
736  else
737  {
738  merged_spectra.addSpectrum(std::move(consensus_spec));
739  }
740  }
741 
742  OPENMS_LOG_INFO << "Cluster sizes:\n";
743  for (const auto& cl_size : cluster_sizes)
744  {
745  OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
746  }
747 
748  char buffer[200];
749  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
750  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
751  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
752 
753  // remove all spectra that were within a cluster
754  typename MapType::SpectrumType empty_spec;
755  MapType exp_tmp;
756  for (Size i = 0; i < exp.size(); ++i)
757  {
758  if (merged_indices.count(i) == 0) // save unclustered ones
759  {
760  exp_tmp.addSpectrum(exp[i]);
761  exp[i] = empty_spec;
762  }
763  }
764 
765  //Meta_Data will not be cleared
766  exp.clear(false);
767  exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
768  std::make_move_iterator(exp_tmp.end()));
769 
770  // ... and add consensus spectra
771  exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
772  std::make_move_iterator(merged_spectra.end()));
773 
774  }
775 
796  template <typename MapType>
797  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
798  {
799  MapType exp_tmp; // temporary experiment for averaged spectra
800 
801  double mz_binning_width(param_.getValue("mz_binning_width"));
802  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
803 
804  unsigned progress = 0;
805  std::stringstream progress_message;
806  progress_message << "averaging profile spectra of MS level " << ms_level;
807  startProgress(0, spectra_to_average_over.size(), progress_message.str());
808 
809  // loop over blocks
810  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
811  {
812  setProgress(++progress);
813 
814  // loop over spectra in blocks
815  std::vector<double> mz_positions_all; // m/z positions from all spectra
816  for (const auto& spec : it->second)
817  {
818  // loop over m/z positions
819  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
820  {
821  mz_positions_all.push_back(it_mz->getMZ());
822  }
823  }
824 
825  sort(mz_positions_all.begin(), mz_positions_all.end());
826 
827  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
828  std::vector<double> intensities;
829  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
830  double delta_mz(mz_binning_width); // for m/z unit Da
831  for (const auto mz_pos : mz_positions_all)
832  {
833  if (mz_binning_unit == "ppm")
834  {
835  delta_mz = mz_binning_width * mz_pos / 1000000;
836  }
837 
838  if ((mz_pos - last_mz) > delta_mz)
839  {
840  mz_positions.push_back(mz_pos);
841  intensities.push_back(0.0);
842  last_mz = mz_pos;
843  }
844  }
845 
846  // loop over spectra in blocks
847  for (const auto& spec : it->second)
848  {
849  SplineInterpolatedPeaks spline(exp[spec.first]);
851 
852  // loop over m/z positions
853  for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
854  {
855  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
856  {
857  intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
858  }
859  }
860  }
861 
862  // update spectrum
863  typename MapType::SpectrumType average_spec = exp[it->first];
864  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
865 
866  // refill spectrum
867  for (Size i = 0; i < mz_positions.size(); ++i)
868  {
869  typename MapType::PeakType peak;
870  peak.setMZ(mz_positions[i]);
871  peak.setIntensity(intensities[i]);
872  average_spec.push_back(peak);
873  }
874 
875  // store spectrum temporarily
876  exp_tmp.addSpectrum(std::move(average_spec));
877  }
878 
879  endProgress();
880 
881  // loop over blocks
882  int n(0);
883  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
884  {
885  exp[it->first] = exp_tmp[n];
886  ++n;
887  }
888  }
889 
905  template <typename MapType>
906  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
907  {
908  MapType exp_tmp; // temporary experiment for averaged spectra
909 
910  double mz_binning_width(param_.getValue("mz_binning_width"));
911  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
912 
913  unsigned progress = 0;
914  ProgressLogger logger;
915  std::stringstream progress_message;
916  progress_message << "averaging centroid spectra of MS level " << ms_level;
917  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
918 
919  // loop over blocks
920  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
921  {
922  logger.setProgress(++progress);
923 
924  // collect peaks from all spectra
925  // loop over spectra in blocks
926  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
927  for (const auto& weightedMZ: it->second)
928  {
929  // loop over m/z positions
930  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
931  {
932  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
933  mz_intensity_all.push_back(mz_intensity);
934  }
935  }
936 
937  sort(mz_intensity_all.begin(), mz_intensity_all.end());
938 
939  // generate new spectrum
940  std::vector<double> mz_new;
941  std::vector<double> intensity_new;
942  double last_mz = std::numeric_limits<double>::min();
943  double delta_mz = mz_binning_width;
944  double sum_mz(0);
945  double sum_intensity(0);
946  Size count(0);
947  for (const auto& mz_pos : mz_intensity_all)
948  {
949  if (mz_binning_unit == "ppm")
950  {
951  delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
952  }
953 
954  if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
955  {
956  mz_new.push_back(sum_mz / count);
957  intensity_new.push_back(sum_intensity); // intensities already weighted
958 
959  sum_mz = 0;
960  sum_intensity = 0;
961 
962  last_mz = mz_pos.first;
963  count = 0;
964  }
965 
966  sum_mz += mz_pos.first;
967  sum_intensity += mz_pos.second;
968  ++count;
969  }
970  if (count > 0)
971  {
972  mz_new.push_back(sum_mz / count);
973  intensity_new.push_back(sum_intensity); // intensities already weighted
974  }
975 
976  // update spectrum
977  typename MapType::SpectrumType average_spec = exp[it->first];
978  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
979 
980  // refill spectrum
981  for (Size i = 0; i < mz_new.size(); ++i)
982  {
983  typename MapType::PeakType peak;
984  peak.setMZ(mz_new[i]);
985  peak.setIntensity(intensity_new[i]);
986  average_spec.push_back(peak);
987  }
988 
989  // store spectrum temporarily
990  exp_tmp.addSpectrum(std::move(average_spec));
991  }
992 
993  logger.endProgress();
994 
995  // loop over blocks
996  int n(0);
997  for (const auto& spectral_index : spectra_to_average_over)
998  {
999  exp[spectral_index.first] = std::move(exp_tmp[n]);
1000  ++n;
1001  }
1002  }
1003  };
1004 }
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:470
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:475
A basic LC-MS feature.
Definition: BaseFeature.h:59
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
Illegal self operation exception.
Definition: Exception.h:372
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Not all required information provided.
Definition: Exception.h:188
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin()
Definition: MSExperiment.h:182
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:192
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Size size() const
The number of spectra.
Definition: MSExperiment.h:147
Base::const_iterator const_iterator
Definition: MSExperiment.h:117
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:105
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:198
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:204
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:216
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
Definition: SpectraMerger.h:78
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:94
SpectraDistance_()
Definition: SpectraMerger.h:80
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:101
double mz_max_
Definition: SpectraMerger.h:120
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:88
double rt_max_
Definition: SpectraMerger.h:119
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:65
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:363
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:311
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:906
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:130
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:211
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:797
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:594
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:127
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:160
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:69
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software.
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:110
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:60
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:126
const double PROTON_MASS_U
Definition: Constants.h:116
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48