OpenMS  3.0.0
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
49 #include <vector>
50 
51 namespace OpenMS
52 {
53 
62  class OPENMS_DLLAPI SpectraMerger :
63  public DefaultParamHandler, public ProgressLogger
64  {
65 
66 protected:
67 
68  /* Determine distance between two spectra
69 
70  Distance is determined as
71 
72  (d_rt/rt_max_ + d_mz/mz_max_) / 2
73 
74  */
76  public DefaultParamHandler
77  {
78 public:
80  DefaultParamHandler("SpectraDistance")
81  {
82  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
83  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
84  defaultsToParam_(); // calls updateMembers_
85  }
86 
87  void updateMembers_() override
88  {
89  rt_max_ = (double) param_.getValue("rt_tolerance");
90  mz_max_ = (double) param_.getValue("mz_tolerance");
91  }
92 
93  double getSimilarity(const double d_rt, const double d_mz) const
94  {
95  // 1 - distance
96  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
97  }
98 
99  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
100  double operator()(const BaseFeature& first, const BaseFeature& second) const
101  {
102  // get RT distance:
103  double d_rt = fabs(first.getRT() - second.getRT());
104  double d_mz = fabs(first.getMZ() - second.getMZ());
105 
106  if (d_rt > rt_max_ || d_mz > mz_max_)
107  {
108  return 0;
109  }
110 
111  // calculate similarity (0-1):
112  double sim = getSimilarity(d_rt, d_mz);
113 
114  return sim;
115  }
116 
117 protected:
118  double rt_max_;
119  double mz_max_;
120 
121  }; // end of SpectraDistance
122 
123 public:
124 
126  typedef std::map<Size, std::vector<Size> > MergeBlocks;
127 
129  typedef std::map<Size, std::vector<std::pair<Size, double> > > AverageBlocks;
130 
131  // @name Constructors and Destructors
132  // @{
135 
137  SpectraMerger(const SpectraMerger& source);
138 
140  SpectraMerger(SpectraMerger&& source) = default;
141 
143  ~SpectraMerger() override;
144  // @}
145 
146  // @name Operators
147  // @{
150 
152  SpectraMerger& operator=(SpectraMerger&& source) = default;
153  // @}
154 
155  // @name Merging functions
156  // @{
158  template <typename MapType>
160  {
161  IntList ms_levels = param_.getValue("block_method:ms_levels");
162  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
163  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
164 
165  if (rt_max_length == 0) // no rt restriction set?
166  {
167  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
168  }
169 
170  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
171  {
172  MergeBlocks spectra_to_merge;
173  Size idx_block(0);
174  SignedSize block_size_count(rt_block_size + 1);
175  Size idx_spectrum(0);
176  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
177  {
178  if (Int(it1->getMSLevel()) == *it_mslevel)
179  {
180  // block full if it contains a maximum number of scans or if maximum rt length spanned
181  if (++block_size_count >= rt_block_size ||
182  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
183  {
184  block_size_count = 0;
185  idx_block = idx_spectrum;
186  }
187  else
188  {
189  spectra_to_merge[idx_block].push_back(idx_spectrum);
190  }
191  }
192 
193  ++idx_spectrum;
194  }
195  // check if last block had sacrifice spectra
196  if (block_size_count == 0) //block just got initialized
197  {
198  spectra_to_merge[idx_block] = std::vector<Size>();
199  }
200 
201  // merge spectra, remove all old MS spectra and add new consensus spectra
202  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
203  }
204 
205  exp.sortSpectra();
206  }
207 
209  template <typename MapType>
211  {
212 
213  // convert spectra's precursors to clusterizable data
214  Size data_size;
215  std::vector<BinaryTreeNode> tree;
216  std::map<Size, Size> index_mapping;
217  // local scope to save memory - we do not need the clustering stuff later
218  {
219  std::vector<BaseFeature> data;
220 
221  for (Size i = 0; i < exp.size(); ++i)
222  {
223  if (exp[i].getMSLevel() != 2)
224  {
225  continue;
226  }
227 
228  // remember which index in distance data ==> experiment index
229  index_mapping[data.size()] = i;
230 
231  // make cluster element
232  BaseFeature bf;
233  bf.setRT(exp[i].getRT());
234  const auto& pcs = exp[i].getPrecursors();
235  // keep the first Precursor
236  if (pcs.empty())
237  {
238  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
239  }
240  if (pcs.size() > 1)
241  {
242  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
243  }
244  bf.setMZ(pcs[0].getMZ());
245  data.push_back(bf);
246  }
247  data_size = data.size();
248 
249  SpectraDistance_ llc;
250  llc.setParameters(param_.copy("precursor_method:", true));
251  SingleLinkage sl;
252  DistanceMatrix<float> dist; // will be filled
254 
255  //ch.setThreshold(0.99);
256  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
257  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
258  }
259 
260  // extract the clusters
261  ClusterAnalyzer ca;
262  std::vector<std::vector<Size> > clusters;
263  // count number of real tree nodes (not the -1 ones):
264  Size node_count = 0;
265  for (Size ii = 0; ii < tree.size(); ++ii)
266  {
267  if (tree[ii].distance >= 1)
268  {
269  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
270  }
271  if (tree[ii].distance != -1)
272  {
273  ++node_count;
274  }
275  }
276  ca.cut(data_size - node_count, tree, clusters);
277 
278  //std::cerr << "Treesize: " << (tree.size()+1) << " #clusters: " << clusters.size() << std::endl;
279  //std::cerr << "tree:\n" << ca.newickTree(tree, true) << "\n";
280 
281  // convert to blocks
282  MergeBlocks spectra_to_merge;
283 
284  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
285  {
286  if (clusters[i_outer].size() <= 1)
287  {
288  continue;
289  }
290  // init block with first cluster element
291  Size cl_index0 = clusters[i_outer][0];
292  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
293  // add all other elements
294  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
295  {
296  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[clusters[i_outer][i_inner]]);
297  }
298  }
299 
300  // do it
301  mergeSpectra_(exp, spectra_to_merge, 2);
302 
303  exp.sortSpectra();
304  }
305 
314  static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
315  {
316  if (mz1 == mz2 || tol_ppm <= 0)
317  {
318  return true;
319  }
320 
321  const int min_c = 1;
322  const int max_iso_diff = 5; // maximum charge difference 5 is more than enough
323  const double max_charge_diff_ratio = 3.0; // maximum ratio between charges (large / small charge)
324 
325  for (int c1 = min_c; c1 <= max_c; ++c1)
326  {
327  double mass1 = (mz1 - Constants::PROTON_MASS_U) * c1;
328 
329  for (int c2 = min_c; c2 <= max_c; ++c2)
330  {
331  if (c1 / c2 > max_charge_diff_ratio)
332  {
333  continue;
334  }
335  if (c2 / c1 > max_charge_diff_ratio)
336  {
337  break;
338  }
339 
340  double mass2 = (mz2 - Constants::PROTON_MASS_U) * c2;
341 
342  if (fabs(mass1 - mass2) > max_iso_diff)
343  {
344  continue;
345  }
346  for (int i = -max_iso_diff; i <= max_iso_diff; ++i)
347  {
348  if (fabs(mass1 - mass2 + i * Constants::ISOTOPE_MASSDIFF_55K_U) < mass1 * tol_ppm * 1e-6)
349  {
350  return true;
351  }
352  }
353  }
354  }
355  return false;
356  }
357 
365  template <typename MapType>
366  void average(MapType& exp, const String& average_type, int ms_level = -1)
367  {
368  // MS level to be averaged
369  if (ms_level < 0)
370  {
371  ms_level = param_.getValue("average_gaussian:ms_level");
372  if (average_type == "tophat")
373  {
374  ms_level = param_.getValue("average_tophat:ms_level");
375  }
376  }
377 
378  // spectrum type (profile, centroid or automatic)
379  std::string spectrum_type = param_.getValue("average_gaussian:spectrum_type");
380  if (average_type == "tophat")
381  {
382  spectrum_type = std::string(param_.getValue("average_tophat:spectrum_type"));
383  }
384 
385  // parameters for Gaussian averaging
386  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
387  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
388  double cutoff(param_.getValue("average_gaussian:cutoff"));
389  double precursor_mass_ppm = param_.getValue("average_gaussian:precursor_mass_tol");
390  int precursor_max_charge = param_.getValue("average_gaussian:precursor_max_charge");
391 
392  // parameters for Top-Hat averaging
393  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
394  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
395  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
396  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
397  if ((range_scans % 2) == 0)
398  {
399  ++range_scans;
400  }
401  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
402 
403  AverageBlocks spectra_to_average_over;
404 
405  // loop over RT
406  int n(0); // spectrum index
407  int cntr(0); // spectrum counter
408  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
409  {
410  if (Int(it_rt->getMSLevel()) == ms_level)
411  {
412  int m; // spectrum index
413  int steps;
414  bool terminate_now;
415  typename MapType::const_iterator it_rt_2;
416 
417  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
418  steps = 0;
419  m = n + 1;
420  it_rt_2 = it_rt + 1;
421  terminate_now = false;
422  while (it_rt_2 != exp.end() && !terminate_now)
423  {
424  if (Int(it_rt_2->getMSLevel()) == ms_level)
425  {
426  bool add = true;
427  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
428  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
429  it_rt_2->getPrecursors().size() > 0)
430  {
431  double mz1 = it_rt->getPrecursors()[0].getMZ();
432  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
433  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
434  }
435 
436  if (add)
437  {
438  double weight = 1;
439  if (average_type == "gaussian")
440  {
441  //factor * (rt_2 -rt)^2
442  double base = it_rt_2->getRT() - it_rt->getRT();
443  weight = std::exp(factor * base * base);
444  }
445  std::pair<Size, double> p(m, weight);
446  spectra_to_average_over[n].push_back(p);
447  }
448  ++steps;
449  }
450  if (average_type == "gaussian")
451  {
452  // Gaussian
453  double base = it_rt_2->getRT() - it_rt->getRT();
454  terminate_now = std::exp(factor * base * base) < cutoff;
455  }
456  else if (unit)
457  {
458  // Top-Hat with RT unit = scans
459  terminate_now = (steps > range_scans);
460  }
461  else
462  {
463  // Top-Hat with RT unit = seconds
464  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
465  }
466  ++m;
467  ++it_rt_2;
468  }
469 
470  // go backward
471  steps = 0;
472  m = n;
473  it_rt_2 = it_rt;
474  terminate_now = false;
475  while (it_rt_2 != exp.begin() && !terminate_now)
476  {
477  if (Int(it_rt_2->getMSLevel()) == ms_level)
478  {
479  bool add = true;
480  // if precursor_mass_ppm >=0, two spectra should have the same mass. otherwise it_rt_2 is skipped.
481  if (precursor_mass_ppm >= 0 && ms_level >= 2 && it_rt->getPrecursors().size() > 0 &&
482  it_rt_2->getPrecursors().size() > 0)
483  {
484  double mz1 = it_rt->getPrecursors()[0].getMZ();
485  double mz2 = it_rt_2->getPrecursors()[0].getMZ();
486  add = areMassesMatched(mz1, mz2, precursor_mass_ppm, precursor_max_charge);
487  }
488  if (add)
489  {
490  double weight = 1;
491  if (average_type == "gaussian")
492  {
493  double base = it_rt_2->getRT() - it_rt->getRT();
494  weight = std::exp(factor * base * base);
495  }
496  std::pair<Size, double> p(m, weight);
497  spectra_to_average_over[n].push_back(p);
498  }
499  ++steps;
500  }
501  if (average_type == "gaussian")
502  {
503  // Gaussian
504  double base = it_rt_2->getRT() - it_rt->getRT();
505  terminate_now = std::exp(factor * base * base) < cutoff;
506  }
507  else if (unit)
508  {
509  // Top-Hat with RT unit = scans
510  terminate_now = (steps > range_scans);
511  }
512  else
513  {
514  // Top-Hat with RT unit = seconds
515  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
516  }
517  --m;
518  --it_rt_2;
519  }
520  ++cntr;
521  }
522  ++n;
523  }
524 
525  if (cntr == 0)
526  {
527  //return;
528  throw Exception::InvalidParameter(__FILE__,
529  __LINE__,
530  OPENMS_PRETTY_FUNCTION,
531  "Input mzML does not have any spectra of MS level specified by ms_level.");
532  }
533 
534  // normalize weights
535  for (AverageBlocks::iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
536  {
537  double sum(0.0);
538  for (const auto& weight: it->second)
539  {
540  sum += weight.second;
541  }
542 
543  for (auto& weight: it->second)
544  {
545  weight.second /= sum;
546  }
547  }
548 
549  // determine type of spectral data (profile or centroided)
551  if (spectrum_type == "automatic")
552  {
553  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
554  type = exp[idx].getType(true);
555  }
556  else if (spectrum_type == "profile")
557  {
559  }
560  else if (spectrum_type == "centroid")
561  {
563  }
564  else
565  {
566  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
567  }
568 
569  // generate new spectra
570  if (type == SpectrumSettings::CENTROID)
571  {
572  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
573  }
574  else
575  {
576  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
577  }
578 
579  exp.sortSpectra();
580  }
581 
582  // @}
583 
584 protected:
585 
596  template <typename MapType>
597  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
598  {
599  double mz_binning_width(param_.getValue("mz_binning_width"));
600  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
601 
602  // merge spectra
603  MapType merged_spectra;
604 
605  std::map<Size, Size> cluster_sizes;
606  std::set<Size> merged_indices;
607 
608  // set up alignment
609  SpectrumAlignment sas;
610  Param p;
611  p.setValue("tolerance", mz_binning_width);
612  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
613  {
614  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
615  }
616 
617  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
618  sas.setParameters(p);
619  std::vector<std::pair<Size, Size> > alignment;
620 
621  Size count_peaks_aligned(0);
622  Size count_peaks_overall(0);
623 
624  // each BLOCK
625  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
626  {
627  ++cluster_sizes[it->second.size() + 1]; // for stats
628 
629  typename MapType::SpectrumType consensus_spec = exp[it->first];
630  consensus_spec.setMSLevel(ms_level);
631 
632  //consensus_spec.unify(exp[it->first]); // append meta info
633  merged_indices.insert(it->first);
634 
635  //typename MapType::SpectrumType all_peaks = exp[it->first];
636  double rt_average = consensus_spec.getRT();
637  double precursor_mz_average = 0.0;
638  Size precursor_count(0);
639  if (!consensus_spec.getPrecursors().empty())
640  {
641  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
642  ++precursor_count;
643  }
644 
645  count_peaks_overall += consensus_spec.size();
646 
647  // block elements
648  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
649  {
650  consensus_spec.unify(exp[*sit]); // append meta info
651  merged_indices.insert(*sit);
652 
653  rt_average += exp[*sit].getRT();
654  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
655  {
656  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
657  ++precursor_count;
658  }
659 
660  // merge data points
661  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
662  //std::cerr << "alignment of " << it->first << " with " << *sit << " yielded " << alignment.size() << " common peaks!\n";
663  count_peaks_aligned += alignment.size();
664  count_peaks_overall += exp[*sit].size();
665 
666  Size align_index(0);
667  Size spec_b_index(0);
668 
669  // sanity check for number of peaks
670  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
671  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
672  {
673  if (alignment.empty() || alignment[align_index].second != spec_b_index)
674  // ... add unaligned peak
675  {
676  consensus_spec.push_back(*pit);
677  }
678  // or add aligned peak height to ALL corresponding existing peaks
679  else
680  {
681  Size counter(0);
682  Size copy_of_align_index(align_index);
683 
684  while (!alignment.empty() &&
685  copy_of_align_index < alignment.size() &&
686  alignment[copy_of_align_index].second == spec_b_index)
687  {
688  ++copy_of_align_index;
689  ++counter;
690  } // Count the number of peaks in a which correspond to a single b peak.
691 
692  while (!alignment.empty() &&
693  align_index < alignment.size() &&
694  alignment[align_index].second == spec_b_index)
695  {
696  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
697  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
698  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
699  if (align_index == alignment.size())
700  {
701  alignment.clear(); // end reached -> avoid going into this block again
702  }
703  }
704  align_size = align_size + 1 - counter; //Decrease align_size by number of
705  }
706  ++spec_b_index;
707  }
708  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
709  if (spec_a + spec_b - align_size != consensus_spec.size())
710  {
711  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
712  }
713  }
714  rt_average /= it->second.size() + 1;
715  consensus_spec.setRT(rt_average);
716 
717  if (ms_level >= 2)
718  {
719  if (precursor_count)
720  {
721  precursor_mz_average /= precursor_count;
722  }
723  auto& pcs = consensus_spec.getPrecursors();
724  //if (pcs.size()>1) OPENMS_LOG_WARN << "Removing excessive precursors - leaving only one per MS2 spectrum.\n";
725  pcs.resize(1);
726  pcs[0].setMZ(precursor_mz_average);
727  consensus_spec.setPrecursors(pcs);
728  }
729 
730  if (consensus_spec.empty())
731  {
732  continue;
733  }
734  else
735  {
736  merged_spectra.addSpectrum(std::move(consensus_spec));
737  }
738  }
739 
740  OPENMS_LOG_INFO << "Cluster sizes:\n";
741  for (const auto& cl_size : cluster_sizes)
742  {
743  OPENMS_LOG_INFO << " size " << cl_size.first << ": " << cl_size.second << "x\n";
744  }
745 
746  char buffer[200];
747  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
748  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
749  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
750 
751  // remove all spectra that were within a cluster
752  typename MapType::SpectrumType empty_spec;
753  MapType exp_tmp;
754  for (Size i = 0; i < exp.size(); ++i)
755  {
756  if (merged_indices.count(i) == 0) // save unclustered ones
757  {
758  exp_tmp.addSpectrum(exp[i]);
759  exp[i] = empty_spec;
760  }
761  }
762 
763  //typedef std::vector<typename MapType::SpectrumType> Base;
764  //exp.Base::operator=(exp_tmp);
765  //Meta_Data will not be cleared
766  exp.clear(false);
767  exp.getSpectra().insert(exp.end(), std::make_move_iterator(exp_tmp.begin()),
768  std::make_move_iterator(exp_tmp.end()));
769 
770  // exp.erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<typename MapType::SpectrumType>(ListUtils::create<int>(String(ms_level)), false)), exp.end());
771 
772  // ... and add consensus spectra
773  exp.getSpectra().insert(exp.end(), std::make_move_iterator(merged_spectra.begin()),
774  std::make_move_iterator(merged_spectra.end()));
775 
776  }
777 
798  template <typename MapType>
799  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
800  {
801  MapType exp_tmp; // temporary experiment for averaged spectra
802 
803  double mz_binning_width(param_.getValue("mz_binning_width"));
804  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
805 
806  unsigned progress = 0;
807  std::stringstream progress_message;
808  progress_message << "averaging profile spectra of MS level " << ms_level;
809  startProgress(0, spectra_to_average_over.size(), progress_message.str());
810 
811  // loop over blocks
812  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
813  {
814  setProgress(++progress);
815 
816  // loop over spectra in blocks
817  std::vector<double> mz_positions_all; // m/z positions from all spectra
818  for (const auto& spec : it->second)
819  {
820  // loop over m/z positions
821  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[spec.first].begin(); it_mz < exp[spec.first].end(); ++it_mz)
822  {
823  mz_positions_all.push_back(it_mz->getMZ());
824  }
825  }
826 
827  sort(mz_positions_all.begin(), mz_positions_all.end());
828 
829  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
830  std::vector<double> intensities;
831  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
832  double delta_mz(mz_binning_width); // for m/z unit Da
833  for (const auto mz_pos : mz_positions_all)
834  {
835  if (mz_binning_unit == "ppm")
836  {
837  delta_mz = mz_binning_width * mz_pos / 1000000;
838  }
839 
840  if ((mz_pos - last_mz) > delta_mz)
841  {
842  mz_positions.push_back(mz_pos);
843  intensities.push_back(0.0);
844  last_mz = mz_pos;
845  }
846  }
847 
848  // loop over spectra in blocks
849  for (const auto& spec : it->second)
850  {
851  SplineInterpolatedPeaks spline(exp[spec.first]);
853 
854  // loop over m/z positions
855  for (Size i = spline.getPosMin(); i < mz_positions.size(); ++i)
856  {
857  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
858  {
859  intensities[i] += nav.eval(mz_positions[i]) * (spec.second); // spline-interpolated intensity * weight
860  }
861  }
862  }
863 
864  // update spectrum
865  typename MapType::SpectrumType average_spec = exp[it->first];
866  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
867  //average_spec.setMSLevel(ms_level);
868 
869  // refill spectrum
870  for (Size i = 0; i < mz_positions.size(); ++i)
871  {
872  typename MapType::PeakType peak;
873  peak.setMZ(mz_positions[i]);
874  peak.setIntensity(intensities[i]);
875  average_spec.push_back(peak);
876  }
877 
878  // store spectrum temporarily
879  exp_tmp.addSpectrum(std::move(average_spec));
880  }
881 
882  endProgress();
883 
884  // loop over blocks
885  int n(0);
886  //typename MapType::SpectrumType empty_spec;
887  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
888  {
889  exp[it->first] = exp_tmp[n];
890  //exp_tmp[n] = empty_spec;
891  ++n;
892  }
893  }
894 
910  template <typename MapType>
911  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
912  {
913  MapType exp_tmp; // temporary experiment for averaged spectra
914 
915  double mz_binning_width(param_.getValue("mz_binning_width"));
916  std::string mz_binning_unit(param_.getValue("mz_binning_width_unit"));
917 
918  unsigned progress = 0;
919  ProgressLogger logger;
920  std::stringstream progress_message;
921  progress_message << "averaging centroid spectra of MS level " << ms_level;
922  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
923 
924  // loop over blocks
925  for (AverageBlocks::const_iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
926  {
927  logger.setProgress(++progress);
928 
929  // collect peaks from all spectra
930  // loop over spectra in blocks
931  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
932  for (const auto& weightedMZ: it->second)
933  {
934  // loop over m/z positions
935  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[weightedMZ.first].begin(); it_mz < exp[weightedMZ.first].end(); ++it_mz)
936  {
937  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * weightedMZ.second)); // m/z, intensity * weight
938  mz_intensity_all.push_back(mz_intensity);
939  }
940  }
941 
942  sort(mz_intensity_all.begin(), mz_intensity_all.end());
943 
944  // generate new spectrum
945  std::vector<double> mz_new;
946  std::vector<double> intensity_new;
947  double last_mz = std::numeric_limits<double>::min();
948  double delta_mz = mz_binning_width;
949  double sum_mz(0);
950  double sum_intensity(0);
951  Size count(0);
952  for (const auto& mz_pos : mz_intensity_all)
953  {
954  if (mz_binning_unit == "ppm")
955  {
956  delta_mz = mz_binning_width * (mz_pos.first) / 1000000;
957  }
958 
959  if (((mz_pos.first - last_mz) > delta_mz) && (count > 0))
960  {
961  mz_new.push_back(sum_mz / count);
962  intensity_new.push_back(sum_intensity); // intensities already weighted
963 
964  sum_mz = 0;
965  sum_intensity = 0;
966 
967  last_mz = mz_pos.first;
968  count = 0;
969  }
970 
971  sum_mz += mz_pos.first;
972  sum_intensity += mz_pos.second;
973  ++count;
974  }
975  if (count > 0)
976  {
977  mz_new.push_back(sum_mz / count);
978  intensity_new.push_back(sum_intensity); // intensities already weighted
979  }
980 
981  // update spectrum
982  typename MapType::SpectrumType average_spec = exp[it->first];
983  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
984  //average_spec.setMSLevel(ms_level);
985 
986  // refill spectrum
987  for (Size i = 0; i < mz_new.size(); ++i)
988  {
989  typename MapType::PeakType peak;
990  peak.setMZ(mz_new[i]);
991  peak.setIntensity(intensity_new[i]);
992  average_spec.push_back(peak);
993  }
994 
995  // store spectrum temporarily
996  exp_tmp.addSpectrum(std::move(average_spec));
997  }
998 
999  logger.endProgress();
1000 
1001  // loop over blocks
1002  int n(0);
1003  for (const auto& spectral_index : spectra_to_average_over)
1004  {
1005  exp[spectral_index.first] = std::move(exp_tmp[n]);
1006  ++n;
1007  }
1008  }
1009  };
1010 }
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:465
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:470
A basic LC-MS feature.
Definition: BaseFeature.h:59
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:52
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:64
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
void setParameters(const Param &param)
Sets the parameters.
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:68
Illegal self operation exception.
Definition: Exception.h:372
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
Not all required information provided.
Definition: Exception.h:188
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:72
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
Iterator begin()
Definition: MSExperiment.h:149
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
Iterator end()
Definition: MSExperiment.h:159
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
Size size() const
Definition: MSExperiment.h:119
Base::const_iterator const_iterator
Definition: MSExperiment.h:117
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:105
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
double getRT() const
void setMSLevel(UInt ms_level)
Sets the MS level.
void sortByPosition()
Lexicographically sorts the peaks by their position.
void clear(bool clear_meta_data)
Clears all data and meta data.
void setRT(double rt)
Sets the absolute retention time (in seconds)
Management and storage of parameters / INI files.
Definition: Param.h:70
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:110
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:119
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:198
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:204
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:216
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:53
void setProgress(SignedSize value) const
Sets the current progress.
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void endProgress() const
Ends the progress display.
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
Definition: SpectraMerger.h:77
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:93
SpectraDistance_()
Definition: SpectraMerger.h:79
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:100
double mz_max_
Definition: SpectraMerger.h:119
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:87
double rt_max_
Definition: SpectraMerger.h:118
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:64
void average(MapType &exp, const String &average_type, int ms_level=-1)
average over neighbouring spectra
Definition: SpectraMerger.h:366
static bool areMassesMatched(double mz1, double mz2, double tol_ppm, int max_c)
check if the first and second mzs might be from the same mass
Definition: SpectraMerger.h:314
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:911
SpectraMerger(const SpectraMerger &source)
copy constructor
SpectraMerger()
default constructor
std::map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:129
~SpectraMerger() override
destructor
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:210
SpectraMerger & operator=(SpectraMerger &&source)=default
move-assignment operator
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:799
SpectraMerger & operator=(const SpectraMerger &source)
assignment operator
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:597
std::map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:126
SpectraMerger(SpectraMerger &&source)=default
move constructor
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:159
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:69
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:71
@ PROFILE
profile data
Definition: SpectrumSettings.h:74
@ CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:110
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:60
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
A more convenient string class.
Definition: String.h:60
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:107
const double ISOTOPE_MASSDIFF_55K_U
Definition: Constants.h:126
const double PROTON_MASS_U
Definition: Constants.h:116
FLASHIda C++ to C# (or vice versa) bridge functions The functions here are called in C# to invoke fun...
Definition: FeatureDeconvolution.h:48