OpenMS  3.0.0
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
49 
50 #include <vector>
51 
52 namespace OpenMS
53 {
54 
63  class OPENMS_DLLAPI SpectraMerger :
64  public DefaultParamHandler, public ProgressLogger
65  {
66 
67 protected:
68 
69  /* Determine distance between two spectra
70 
71  Distance is determined as
72 
73  (d_rt/rt_max_ + d_mz/mz_max_) / 2
74 
75  */
77  public DefaultParamHandler
78  {
79 public:
81  DefaultParamHandler("SpectraDistance")
82  {
83  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
84  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
85  defaultsToParam_(); // calls updateMembers_
86  }
87 
88  void updateMembers_() override
89  {
90  rt_max_ = (double) param_.getValue("rt_tolerance");
91  mz_max_ = (double) param_.getValue("mz_tolerance");
92  }
93 
94  double getSimilarity(const double d_rt, const double d_mz) const
95  {
96  // 1 - distance
97  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
98  }
99 
100  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
101  double operator()(const BaseFeature& first, const BaseFeature& second) const
102  {
103  // get RT distance:
104  double d_rt = fabs(first.getRT() - second.getRT());
105  double d_mz = fabs(first.getMZ() - second.getMZ());
106 
107  if (d_rt > rt_max_ || d_mz > mz_max_)
108  {
109  return 0;
110  }
111 
112  // calculate similarity (0-1):
113  double sim = getSimilarity(d_rt, d_mz);
114 
115  return sim;
116  }
117 
118 protected:
119  double rt_max_;
120  double mz_max_;
121 
122  }; // end of SpectraDistance
123 
124 public:
125 
127  typedef std::map<Size, std::vector<Size> > MergeBlocks;
128 
130  typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
131 
132  // @name Constructors and Destructors
133  // @{
135  SpectraMerger();
136 
138  SpectraMerger(const SpectraMerger& source);
139 
141  SpectraMerger(SpectraMerger&& source) = default;
142 
144  ~SpectraMerger() override;
145  // @}
146 
147  // @name Operators
148  // @{
150  SpectraMerger& operator=(const SpectraMerger& source);
151 
153  SpectraMerger& operator=(SpectraMerger&& source) = default;
154  // @}
155 
156  // @name Merging functions
157  // @{
159  template <typename MapType>
161  {
162  IntList ms_levels = param_.getValue("block_method:ms_levels");
163  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
164  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
165 
166  if (rt_max_length == 0) // no rt restriction set?
167  {
168  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
169  }
170 
171  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
172  {
173  MergeBlocks spectra_to_merge;
174  Size idx_block(0);
175  SignedSize block_size_count(rt_block_size + 1);
176  Size idx_spectrum(0);
177  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
178  {
179  if (Int(it1->getMSLevel()) == *it_mslevel)
180  {
181  // block full if it contains a maximum number of scans or if maximum rt length spanned
182  if (++block_size_count >= rt_block_size ||
183  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
184  {
185  block_size_count = 0;
186  idx_block = idx_spectrum;
187  }
188  else
189  {
190  spectra_to_merge[idx_block].push_back(idx_spectrum);
191  }
192  }
193 
194  ++idx_spectrum;
195  }
196  // check if last block had sacrifice spectra
197  if (block_size_count == 0) //block just got initialized
198  {
199  spectra_to_merge[idx_block] = std::vector<Size>();
200  }
201 
202  // merge spectra, remove all old MS spectra and add new consensus spectra
203  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
204  }
205 
206  exp.sortSpectra();
207  }
208 
210  template <typename MapType>
212  {
213 
214  // convert spectra's precursors to clusterizable data
215  Size data_size;
216  std::vector<BinaryTreeNode> tree;
217  std::map<Size, Size> index_mapping;
218  // local scope to save memory - we do not need the clustering stuff later
219  {
220  std::vector<BaseFeature> data;
221 
222  for (Size i = 0; i < exp.size(); ++i)
223  {
224  if (exp[i].getMSLevel() != 2)
225  {
226  continue;
227  }
228 
229  // remember which index in distance data ==> experiment index
230  index_mapping[data.size()] = i;
231 
232  // make cluster element
233  BaseFeature bf;
234  bf.setRT(exp[i].getRT());
235  const auto& pcs = exp[i].getPrecursors();
236  // keep the first Precursor
237  if (pcs.empty())
238  {
239  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
240  }
241  if (pcs.size() > 1)
242  {
243  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
244  }
245  bf.setMZ(pcs[0].getMZ());
246  data.push_back(bf);
247  }
248  data_size = data.size();
249 
250  SpectraDistance_ llc;
251  llc.setParameters(param_.copy("precursor_method:", true));
252  SingleLinkage sl;
253  DistanceMatrix<float> dist; // will be filled
255 
256  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
257  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
258  }
259 
260  // extract the clusters
261  ClusterAnalyzer ca;
262  std::vector<std::vector<Size> > clusters;
263  // count number of real tree nodes (not the -1 ones):
264  Size node_count = 0;
265  for (Size ii = 0; ii < tree.size(); ++ii)
266  {
267  if (tree[ii].distance >= 1)
268  {
269  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
270  }
271  if (tree[ii].distance != -1)
272  {
273  ++node_count;
274  }
275  }
276  ca.cut(data_size - node_count, tree, clusters);
277 
278  // convert to blocks
279  MergeBlocks spectra_to_merge;
280 
281  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
282  {
283  if (clusters[i_outer].size() <= 1)
284  {
285  continue;
286  }
287  // init block with first cluster element
288  Size cl_index0 = clusters[i_outer][0];
289  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
290  // add all other elements
291  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
292  {
293  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
294  }
295  }
296 
297  // do it
298  mergeSpectra_(exp, spectra_to_merge, 2);
299 
300  exp.sortSpectra();
301  }
302 
311  static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
312  {
313  if (mz1 == mz2 || tol_ppm <= 0)
314  {
315  return true;
316  }
317 
318  const int min_c = 1;
319  const int max_iso_diff = 5; // maximum charge difference 5 is more than enough
320  const double max_charge_diff_ratio = 3.0; // maximum ratio between charges (large / small charge)
321 
322  for (int c1 = min_c; c1 <= max_c; ++c1)
323  {
324  double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
325 
326  for (int c2 = min_c; c2 <= max_c; ++c2)
327  {
328  if (c1 / c2 > max_charge_diff_ratio)
329  {
330  continue;
331  }
332  if (c2 / c1 > max_charge_diff_ratio)
333  {
334  break;
335  }
336 
337  double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
338 
339  if (fabs(mass1 - mass2) > max_iso_diff)
340  {
341  continue;
342  }
343  for (int i = -max_iso_diff; i <= max_iso_diff; ++i)
344  {
345  if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
346  {
347  return true;
348  }
349  }
350  }
351  }
352  return false;
353  }
354 
362  template <typename MapType>
363  void average(MapType& exp, const String& average_type, int ms_level = -1)
364  {
365  // MS level to be averaged
366  if (ms_level < 0)
367  {
368  ms_level = param_.getValue("average_gaussian:ms_level");
369  if (average_type == "tophat")
370  {
371  ms_level = param_.getValue("average_tophat:ms_level");
372  }
373  }
374 
375  // spectrum type (profile, centroid or automatic)
376  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
377  if (average_type == "tophat")
378  {
379  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
380  }
381 
382  // parameters for Gaussian averaging
383  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
384  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
385  double cutoff(param_.getValue("average_gaussian:cutoff"));
386  double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
387  int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
388 
389  // parameters for Top-Hat averaging
390  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
391  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
392  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
393  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
394  if ((range_scans % 2) == 0)
395  {
396  ++range_scans;
397  }
398  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
399 
400  AverageBlocks spectra_to_average_over;
401 
402  // loop over RT
403  int n(0); // spectrum index
404  int cntr(0); // spectrum counter
405  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
406  {
407  if (Int(it_rt->getMSLevel()) == ms_level)
408  {
409  int m; // spectrum index
410  int steps;
411  bool terminate_now;
412  typename MapType::const_iterator it_rt_2;
413 
414  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
415  steps = 0;
416  m = n + 1;
417  it_rt_2 = it_rt + 1;
418  terminate_now = false;
419  while (it_rt_2 != exp.end() && !terminate_now)
420  {
421  if (Int(it_rt_2->getMSLevel()) == ms_level)
422  {
423  bool add = true;
424  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
425  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
426  it_rt_2->getPrecursors().size() > 0)
427  {
428  double mz1 = it_rt->getPrecursors()[0].getMZ();
429  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
430  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
431  }
432 
433  if (add)
434  {
435  double weight = 1;
436  if (average_type == "gaussian")
437  {
438  //factor * (rt_2 -rt)^2
439  double base = it_rt_2->getRT() - it_rt->getRT();
440  weight = std::exp(factor * base * base);
441  }
442  std::pair<Size, double> p(m, weight);
443  spectra_to_average_over[n].push_back(p);
444  }
445  ++steps;
446  }
447  if (average_type == "gaussian")
448  {
449  // Gaussian
450  double base = it_rt_2->getRT() - it_rt->getRT();
451  terminate_now = std::exp(factor * base * base) < cutoff;
452  }
453  else if (unit)
454  {
455  // Top-Hat with RT unit = scans
456  terminate_now = (steps > range_scans);
457  }
458  else
459  {
460  // Top-Hat with RT unit = seconds
461  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
462  }
463  ++m;
464  ++it_rt_2;
465  }
466 
467  // go backward
468  steps = 0;
469  m = n;
470  it_rt_2 = it_rt;
471  terminate_now = false;
472  while (it_rt_2 != exp.begin() && !terminate_now)
473  {
474  if (Int(it_rt_2->getMSLevel()) == ms_level)
475  {
476  bool add = true;
477  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
478  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
479  it_rt_2->getPrecursors().size() > 0)
480  {
481  double mz1 = it_rt->getPrecursors()[0].getMZ();
482  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
483  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
484  }
485  if (add)
486  {
487  double weight = 1;
488  if (average_type == "gaussian")
489  {
490  double base = it_rt_2->getRT() - it_rt->getRT();
491  weight = std::exp(factor * base * base);
492  }
493  std::pair<Size, double> p(m, weight);
494  spectra_to_average_over[n].push_back(p);
495  }
496  ++steps;
497  }
498  if (average_type == "gaussian")
499  {
500  // Gaussian
501  double base = it_rt_2->getRT() - it_rt->getRT();
502  terminate_now = std::exp(factor * base * base) < cutoff;
503  }
504  else if (unit)
505  {
506  // Top-Hat with RT unit = scans
507  terminate_now = (steps > range_scans);
508  }
509  else
510  {
511  // Top-Hat with RT unit = seconds
512  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
513  }
514  --m;
515  --it_rt_2;
516  }
517  ++cntr;
518  }
519  ++n;
520  }
521 
522  if (cntr == 0)
523  {
524  //return;
525  throw Exception::InvalidParameter(__FILE__,
526  __LINE__,
527  OPENMS_PRETTY_FUNCTION,
528  "Input mzML does not have any spectra of MS level specified by ms_level.");
529  }
530 
531  // normalize weights
532  for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
533  {
534  double sum(0.0);
535  for (const auto& weight: it->second)
536  {
537  sum += weight.second;
538  }
539 
540  for (auto& weight: it->second)
541  {
542  weight.second /= sum;
543  }
544  }
545 
546  // determine type of spectral data (profile or centroided)
548  if (spectrum_type == "automatic")
549  {
550  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
551  type = exp[idx].getType(true);
552  }
553  else if (spectrum_type == "profile")
554  {
556  }
557  else if (spectrum_type == "centroid")
558  {
560  }
561  else
562  {
563  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
564  }
565 
566  // generate new spectra
567  if (type == SpectrumSettings::CENTROID)
568  {
569  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
570  }
571  else
572  {
573  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
574  }
575 
576  exp.sortSpectra();
577  }
578 
579  // @}
580 
581 protected:
582 
593  template <typename MapType>
594  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
595  {
596  double mz_binning_width(param_.getValue("mz_binning_width"));
597  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
598 
599  // merge spectra
600  MapType merged_spectra;
601 
602  std::map<Size, Size> cluster_sizes;
603  std::set<Size> merged_indices;
604 
605  // set up alignment
606  SpectrumAlignment sas;
607  Param p;
608  p.setValue("tolerance", mz_binning_width);
609  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
610  {
611  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
612  }
613 
614  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
615  sas.setParameters(p);
616  std::vector<std::pair<Size, Size> > alignment;
617 
618  Size count_peaks_aligned(0);
619  Size count_peaks_overall(0);
620 
621  // each BLOCK
622  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
623  {
624  ++cluster_sizes[it->second.size() + 1]; // for stats
625 
626  typename MapType::SpectrumType consensus_spec = exp[it->first];
627  consensus_spec.setMSLevel(ms_level);
628 
629  merged_indices.insert(it->first);
630 
631  double rt_average = consensus_spec.getRT();
632  double precursor_mz_average = 0.0;
633  Size precursor_count(0);
634  if (!consensus_spec.getPrecursors().empty())
635  {
636  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
637  ++precursor_count;
638  }
639 
640  count_peaks_overall += consensus_spec.size();
641 
642  String consensus_native_id = consensus_spec.getNativeID();
643 
644  // block elements
645  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
646  {
647  consensus_spec.unify(exp[*sit]); // append meta info
648  merged_indices.insert(*sit);
649 
650  rt_average += exp[*sit].getRT();
651  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
652  {
653  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
654  ++precursor_count;
655  }
656 
657  // add native ID to consensus native ID, comma separated
658  consensus_native_id += ",";
659  consensus_native_id += exp[*sit].getNativeID();
660 
661  // merge data points
662  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
663  count_peaks_aligned += alignment.size();
664  count_peaks_overall += exp[*sit].size();
665 
666  Size align_index(0);
667  Size spec_b_index(0);
668 
669  // sanity check for number of peaks
670  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
671  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
672  {
673  if (alignment.empty() || alignment[align_index].second != spec_b_index)
674  // ... add unaligned peak
675  {
676  consensus_spec.push_back(*pit);
677  }
678  // or add aligned peak height to ALL corresponding existing peaks
679  else
680  {
681  Size counter(0);
682  Size copy_of_align_index(align_index);
683 
684  while (!alignment.empty() &&
685  copy_of_align_index < alignment.size() &&
686  alignment[copy_of_align_index].second == spec_b_index)
687  {
688  ++copy_of_align_index;
689  ++counter;
690  } // Count the number of peaks in a which correspond to a single b peak.
691 
692  while (!alignment.empty() &&
693  align_index < alignment.size() &&
694  alignment[align_index].second == spec_b_index)
695  {
696  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
697  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
698  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
699  if (align_index == alignment.size())
700  {
701  alignment.clear(); // end reached -> avoid going into this block again
702  }
703  }
704  align_size = align_size + 1 - counter; //Decrease align_size by number of
705  }
706  ++spec_b_index;
707  }
708  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
709  if (spec_a + spec_b - align_size != consensus_spec.size())
710  {
711  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
712  }
713  }
714  rt_average /= it->second.size() + 1;
715  consensus_spec.setRT(rt_average);
716 
717  // set new consensus native ID
718  consensus_spec.setNativeID(consensus_native_id);
719 
720  if (ms_level >= 2)
721  {
722  if (precursor_count)
723  {
724  precursor_mz_average /= precursor_count;
725  }
726  auto& pcs = consensus_spec.getPrecursors();
727  pcs.resize(1);
728  pcs[0].setMZ(precursor_mz_average);
729  consensus_spec.setPrecursors(pcs);
730  }
731 
732  if (consensus_spec.empty())
733  {
734  continue;
735  }
736  else
737  {
738  merged_spectra.addSpectrum(std::move(consensus_spec));
739  }
740  }
741 
742  OPENMS_LOG_INFO << "Cluster sizes:\n";
743  for (const auto& cl_size : cluster_sizes)
744  {
745  OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
746  }
747 
748  char buffer[200];
749  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
750  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
751  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
752 
753  // remove all spectra that were within a cluster
754  typename MapType::SpectrumType empty_spec;
755  MapType exp_tmp;
756  for (Size i = 0; i < exp.size(); ++i)
757  {
758  if (merged_indices.count(i) == 0) // save unclustered ones
759  {
760  exp_tmp.addSpectrum(exp[i]);
761  exp[i] = empty_spec;
762  }
763  }
764 
765  //Meta_Data will not be cleared
766  exp.clear(false);
767  exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
768  std::make_move_iterator(exp_tmp.end()));
769 
770  // ... and add consensus spectra
771  exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
772  std::make_move_iterator(merged_spectra.end()));
773 
774  }
775 
796  template <typename MapType>
797  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
798  {
799  MapType exp_tmp; // temporary experiment for averaged spectra
800 
801  double mz_binning_width(param_.getValue("mz_binning_width"));
802  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
803 
804  unsigned progress = 0;
805  std::stringstream progress_message;
806  progress_message << "averaging profile spectra of MS level " << ms_level;
807  startProgress(0, spectra_to_average_over.size(), progress_message.str());
808 
809  // loop over blocks
810  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
811  {
812  setProgress(++progress);
813 
814  // loop over spectra in blocks
815  std::vector<double> mz_positions_all; // m/z positions from all spectra
816  for (const auto& spec : it->second)
817  {
818  // loop over m/z positions
819  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
820  {
821  mz_positions_all.push_back(it_mz->getMZ());
822  }
823  }
824 
825  sort(mz_positions_all.begin(), mz_positions_all.end());
826 
827  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
828  std::vector<double> intensities;
829  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
830  double delta_mz(mz_binning_width); // for m/z unit Da
831  for (const auto mz_pos : mz_positions_all)
832  {
833  if (mz_binning_unit == "ppm")
834  {
835  delta_mz = mz_binning_width * mz_pos / 1000000;
836  }
837 
838  if ((mz_pos - last_mz) > delta_mz)
839  {
840  mz_positions.push_back(mz_pos);
841  intensities.push_back(0.0);
842  last_mz = mz_pos;
843  }
844  }
845 
846  // loop over spectra in blocks
847  for (const auto& spec : it->second)
848  {
849  SplineInterpolatedPeaks spline(exp[spec.first]);
851 
852  // loop over m/z positions
853  for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
854  {
855  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
856  {
857  intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
858  }
859  }
860  }
861 
862  // update spectrum
863  typename MapType::SpectrumType average_spec = exp[it->first];
864  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
865 
866  // refill spectrum
867  for (Size i = 0; i < mz_positions.size(); ++i)
868  {
869  typename MapType::PeakType peak;
870  peak.setMZ(mz_positions[i]);
871  peak.setIntensity(intensities[i]);
872  average_spec.push_back(peak);
873  }
874 
875  // store spectrum temporarily
876  exp_tmp.addSpectrum(std::move(average_spec));
877  }
878 
879  endProgress();
880 
881  // loop over blocks
882  int n(0);
883  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
884  {
885  exp[it->first] = exp_tmp[n];
886  ++n;
887  }
888  }
889 
905  template <typename MapType>
906  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
907  {
908  MapType exp_tmp; // temporary experiment for averaged spectra
909 
910  double mz_binning_width(param_.getValue("mz_binning_width"));
911  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
912 
913  unsigned progress = 0;
914  ProgressLogger logger;
915  std::stringstream progress_message;
916  progress_message << "averaging centroid spectra of MS level " << ms_level;
917  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
918 
919  // loop over blocks
920  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
921  {
922  logger.setProgress(++progress);
923 
924  // collect peaks from all spectra
925  // loop over spectra in blocks
926  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
927  for (const auto& weightedMZ: it->second)
928  {
929  // loop over m/z positions
930  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
931  {
932  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
933  mz_intensity_all.push_back(mz_intensity);
934  }
935  }
936 
937  sort(mz_intensity_all.begin(), mz_intensity_all.end());
938 
939  // generate new spectrum
940  std::vector<double> mz_new;
941  std::vector<double> intensity_new;
942  double last_mz = std::numeric_limits<double>::min();
943  double delta_mz = mz_binning_width;
944  double sum_mz(0);
945  double sum_intensity(0);
946  Size count(0);
947  for (const auto& mz_pos : mz_intensity_all)
948  {
949  if (mz_binning_unit == "ppm")
950  {
951  delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
952  }
953 
954  if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
955  {
956  mz_new.push_back(sum_mz / count);
957  intensity_new.push_back(sum_intensity); // intensities already weighted
958 
959  sum_mz = 0;
960  sum_intensity = 0;
961 
962  last_mz = mz_pos.first;
963  count = 0;
964  }
965 
966  sum_mz += mz_pos.first;
967  sum_intensity += mz_pos.second;
968  ++count;
969  }
970  if (count > 0)
971  {
972  mz_new.push_back(sum_mz / count);
973  intensity_new.push_back(sum_intensity); // intensities already weighted
974  }
975 
976  // update spectrum
977  typename MapType::SpectrumType average_spec = exp[it->first];
978  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
979 
980  // refill spectrum
981  for (Size i = 0; i < mz_new.size(); ++i)
982  {
983  typename MapType::PeakType peak;
984  peak.setMZ(mz_new[i]);
985  peak.setIntensity(intensity_new[i]);
986  average_spec.push_back(peak);
987  }
988 
989  // store spectrum temporarily
990  exp_tmp.addSpectrum(std::move(average_spec));
991  }
992 
993  logger.endProgress();
994 
995  // loop over blocks
996  int n(0);
997  for (const auto& spectral_index : spectra_to_average_over)
998  {
999  exp[spectral_index.first] = std::move(exp_tmp[n]);
1000  ++n;
1001  }
1002  }
1003  };
1004 }
A more convenient string class.
Definition: String.h:58
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:204
void sortByPosition()
Lexicographically sorts the peaks by their position.
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:127
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode&#39;s)
Definition: ClusterAnalyzer.h:51
void endProgress() const
Ends the progress display.
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:70
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:594
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:67
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:470
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
profile data
Definition: SpectrumSettings.h:74
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:160
Iterator begin()
Definition: MSExperiment.h:182
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:63
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Base::const_iterator const_iterator
Definition: MSExperiment.h:117
Size size() const
The number of spectra.
Definition: MSExperiment.h:147
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setParameters(const Param &param)
Sets the parameters.
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:126
SpectraDistance_()
Definition: SpectraMerger.h:80
A basic LC-MS feature.
Definition: BaseFeature.h:58
const double PROTON_MASS_U
Definition: Constants.h:116
Iterator end()
Definition: MSExperiment.h:192
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method...
Definition: SpectraMerger.h:88
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:216
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:198
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
double rt_max_
Definition: SpectraMerger.h:119
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:101
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:94
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
void setProgress(SignedSize value) const
Sets the current progress.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:53
void setMSLevel(UInt ms_level)
Sets the MS level.
Definition: SpectraMerger.h:76
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:339
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:311
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via &#39;tolerance&#39; parameter) ali...
Definition: SpectrumAlignment.h:67
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:363
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:69
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:906
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:55
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:59
Illegal self operation exception.
Definition: Exception.h:370
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:105
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
double mz_max_
Definition: SpectraMerger.h:120
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:109
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void clear(bool clear_meta_data)
Clears all data and meta data.
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:797
void setNativeID(const String &native_id)
sets the native identifier for the spectrum, used by the acquisition software.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:211
const String & getNativeID() const
returns the native identifier for the spectrum, used by the acquisition software. ...
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:63
double getRT() const
centroid data or stick data
Definition: SpectrumSettings.h:73
int Int
Signed integer type.
Definition: Types.h:102
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:130
Not all required information provided.
Definition: Exception.h:186
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:465