OpenMS  2.5.0
SpectraMerger.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Chris Bielow, Andreas Bertsch, Lars Nilse $
33 // --------------------------------------------------------------------------
34 //
35 #pragma once
36 
49 #include <vector>
50 
51 namespace OpenMS
52 {
53 
62  class OPENMS_DLLAPI SpectraMerger :
63  public DefaultParamHandler, public ProgressLogger
64  {
65 
66 protected:
67 
68  /* Determine distance between two spectra
69 
70  Distance is determined as
71 
72  (d_rt/rt_max_ + d_mz/mz_max_) / 2
73 
74  */
76  public DefaultParamHandler
77  {
78 public:
80  DefaultParamHandler("SpectraDistance")
81  {
82  defaults_.setValue("rt_tolerance", 10.0, "Maximal RT distance (in [s]) for two spectra's precursors.");
83  defaults_.setValue("mz_tolerance", 1.0, "Maximal m/z distance (in Da) for two spectra's precursors.");
84  defaultsToParam_(); // calls updateMembers_
85  }
86 
87  void updateMembers_() override
88  {
89  rt_max_ = (double) param_.getValue("rt_tolerance");
90  mz_max_ = (double) param_.getValue("mz_tolerance");
91  }
92 
93  double getSimilarity(const double d_rt, const double d_mz) const
94  {
95  // 1 - distance
96  return 1 - ((d_rt / rt_max_ + d_mz / mz_max_) / 2);
97  }
98 
99  // measure of SIMILARITY (not distance, i.e. 1-distance)!!
100  double operator()(const BaseFeature& first, const BaseFeature& second) const
101  {
102  // get RT distance:
103  double d_rt = fabs(first.getRT() - second.getRT());
104  double d_mz = fabs(first.getMZ() - second.getMZ());
105 
106  if (d_rt > rt_max_ || d_mz > mz_max_)
107  {
108  return 0;
109  }
110 
111  // calculate similarity (0-1):
112  double sim = getSimilarity(d_rt, d_mz);
113 
114  return sim;
115  }
116 
117 protected:
118  double rt_max_;
119  double mz_max_;
120 
121  }; // end of SpectraDistance
122 
123 public:
124 
127 
130 
131  // @name Constructors and Destructors
132  // @{
134  SpectraMerger();
135 
137  SpectraMerger(const SpectraMerger& source);
138 
140  ~SpectraMerger() override;
141  // @}
142 
143  // @name Operators
144  // @{
146  SpectraMerger& operator=(const SpectraMerger& source);
147  // @}
148 
149  // @name Merging functions
150  // @{
152  template <typename MapType>
154  {
155  IntList ms_levels = param_.getValue("block_method:ms_levels");
156  Int rt_block_size(param_.getValue("block_method:rt_block_size"));
157  double rt_max_length = (param_.getValue("block_method:rt_max_length"));
158 
159  if (rt_max_length == 0) // no rt restriction set?
160  {
161  rt_max_length = (std::numeric_limits<double>::max)(); // set max rt span to very large value
162  }
163 
164  for (IntList::iterator it_mslevel = ms_levels.begin(); it_mslevel < ms_levels.end(); ++it_mslevel)
165  {
166  MergeBlocks spectra_to_merge;
167  Size idx_block(0);
168  SignedSize block_size_count(rt_block_size + 1);
169  Size idx_spectrum(0);
170  for (typename MapType::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1)
171  {
172  if (Int(it1->getMSLevel()) == *it_mslevel)
173  {
174  // block full if it contains a maximum number of scans or if maximum rt length spanned
175  if (++block_size_count >= rt_block_size ||
176  exp[idx_spectrum].getRT() - exp[idx_block].getRT() > rt_max_length)
177  {
178  block_size_count = 0;
179  idx_block = idx_spectrum;
180  }
181  else
182  {
183  spectra_to_merge[idx_block].push_back(idx_spectrum);
184  }
185  }
186 
187  ++idx_spectrum;
188  }
189  // check if last block had sacrifice spectra
190  if (block_size_count == 0) //block just got initialized
191  {
192  spectra_to_merge[idx_block] = std::vector<Size>();
193  }
194 
195  // merge spectra, remove all old MS spectra and add new consensus spectra
196  mergeSpectra_(exp, spectra_to_merge, *it_mslevel);
197  }
198 
199  exp.sortSpectra();
200  }
201 
203  template <typename MapType>
205  {
206 
207  // convert spectra's precursors to clusterizable data
208  Size data_size;
209  std::vector<BinaryTreeNode> tree;
210  Map<Size, Size> index_mapping;
211  // local scope to save memory - we do not need the clustering stuff later
212  {
213  std::vector<BaseFeature> data;
214 
215  for (Size i = 0; i < exp.size(); ++i)
216  {
217  if (exp[i].getMSLevel() != 2)
218  {
219  continue;
220  }
221 
222  // remember which index in distance data ==> experiment index
223  index_mapping[data.size()] = i;
224 
225  // make cluster element
226  BaseFeature bf;
227  bf.setRT(exp[i].getRT());
228  std::vector<Precursor> pcs = exp[i].getPrecursors();
229  if (pcs.empty())
230  {
231  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Scan #") + String(i) + " does not contain any precursor information! Unable to cluster!");
232  }
233  if (pcs.size() > 1)
234  {
235  OPENMS_LOG_WARN << "More than one precursor found. Using first one!" << std::endl;
236  }
237  bf.setMZ(pcs[0].getMZ());
238  data.push_back(bf);
239  }
240  data_size = data.size();
241 
242  SpectraDistance_ llc;
243  llc.setParameters(param_.copy("precursor_method:", true));
244  SingleLinkage sl;
245  DistanceMatrix<float> dist; // will be filled
247 
248  //ch.setThreshold(0.99);
249  // clustering ; threshold is implicitly at 1.0, i.e. distances of 1.0 (== similarity 0) will not be clustered
250  ch.cluster<BaseFeature, SpectraDistance_>(data, llc, sl, tree, dist);
251  }
252 
253  // extract the clusters
254  ClusterAnalyzer ca;
255  std::vector<std::vector<Size> > clusters;
256  // count number of real tree nodes (not the -1 ones):
257  Size node_count = 0;
258  for (Size ii = 0; ii < tree.size(); ++ii)
259  {
260  if (tree[ii].distance >= 1)
261  {
262  tree[ii].distance = -1; // manually set to disconnect, as SingleLinkage does not support it
263  }
264  if (tree[ii].distance != -1)
265  {
266  ++node_count;
267  }
268  }
269  ca.cut(data_size - node_count, tree, clusters);
270 
271  //std::cerr << "Treesize: " << (tree.size()+1) << " #clusters: " << clusters.size() << std::endl;
272  //std::cerr << "tree:\n" << ca.newickTree(tree, true) << "\n";
273 
274  // convert to blocks
275  MergeBlocks spectra_to_merge;
276 
277  for (Size i_outer = 0; i_outer < clusters.size(); ++i_outer)
278  {
279  if (clusters[i_outer].size() <= 1)
280  {
281  continue;
282  }
283  // init block with first cluster element
284  Size cl_index0 = clusters[i_outer][0];
285  spectra_to_merge[index_mapping[cl_index0]] = std::vector<Size>();
286  // add all other elements
287  for (Size i_inner = 1; i_inner < clusters[i_outer].size(); ++i_inner)
288  {
289  Size cl_index = clusters[i_outer][i_inner];
290  spectra_to_merge[index_mapping[cl_index0]].push_back(index_mapping[cl_index]);
291  }
292  }
293 
294  // do it
295  mergeSpectra_(exp, spectra_to_merge, 2);
296 
297  exp.sortSpectra();
298  }
299 
306  template <typename MapType>
307  void average(MapType& exp, const String& average_type)
308  {
309  // MS level to be averaged
310  int ms_level = param_.getValue("average_gaussian:ms_level");
311  if (average_type == "tophat")
312  {
313  ms_level = param_.getValue("average_tophat:ms_level");
314  }
315 
316  // spectrum type (profile, centroid or automatic)
317  String spectrum_type = param_.getValue("average_gaussian:spectrum_type");
318  if (average_type == "tophat")
319  {
320  spectrum_type = param_.getValue("average_tophat:spectrum_type");
321  }
322 
323  // parameters for Gaussian averaging
324  double fwhm(param_.getValue("average_gaussian:rt_FWHM"));
325  double factor = -4 * log(2.0) / (fwhm * fwhm); // numerical factor within Gaussian
326  double cutoff(param_.getValue("average_gaussian:cutoff"));
327 
328  // parameters for Top-Hat averaging
329  bool unit(param_.getValue("average_tophat:rt_unit") == "scans"); // true if RT unit is 'scans', false if RT unit is 'seconds'
330  double range(param_.getValue("average_tophat:rt_range")); // range of spectra to be averaged over
331  double range_seconds = range / 2; // max. +/- <range_seconds> seconds from master spectrum
332  int range_scans = static_cast<int>(range); // in case of unit scans, the param is used as integer
333  if ((range_scans % 2) == 0)
334  {
335  ++range_scans;
336  }
337  range_scans = (range_scans - 1) / 2; // max. +/- <range_scans> scans from master spectrum
338 
339  AverageBlocks spectra_to_average_over;
340 
341  // loop over RT
342  int n(0); // spectrum index
343  for (typename MapType::const_iterator it_rt = exp.begin(); it_rt != exp.end(); ++it_rt)
344  {
345  if (Int(it_rt->getMSLevel()) == ms_level)
346  {
347  int m; // spectrum index
348  int steps;
349  bool terminate_now;
350  typename MapType::const_iterator it_rt_2;
351 
352  // go forward (start at next downstream spectrum; the current spectrum will be covered when looking backwards)
353  steps = 0;
354  m = n + 1;
355  it_rt_2 = it_rt + 1;
356  terminate_now = false;
357  while (it_rt_2 != exp.end() && !terminate_now)
358  {
359  if (Int(it_rt_2->getMSLevel()) == ms_level)
360  {
361  double weight = 1;
362  if (average_type == "gaussian")
363  {
364  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
365  }
366  std::pair<Size, double> p(m, weight);
367  spectra_to_average_over[n].push_back(p);
368  ++steps;
369  }
370  if (average_type == "gaussian")
371  {
372  // Gaussian
373  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
374  }
375  else if (unit)
376  {
377  // Top-Hat with RT unit = scans
378  terminate_now = (steps > range_scans);
379  }
380  else
381  {
382  // Top-Hat with RT unit = seconds
383  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
384  }
385  ++m;
386  ++it_rt_2;
387  }
388 
389  // go backward
390  steps = 0;
391  m = n;
392  it_rt_2 = it_rt;
393  terminate_now = false;
394  while (it_rt_2 != exp.begin() && !terminate_now)
395  {
396  if (Int(it_rt_2->getMSLevel()) == ms_level)
397  {
398  double weight = 1;
399  if (average_type == "gaussian")
400  {
401  weight = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2));
402  }
403  std::pair<Size, double> p(m, weight);
404  spectra_to_average_over[n].push_back(p);
405  ++steps;
406  }
407  if (average_type == "gaussian")
408  {
409  // Gaussian
410  terminate_now = std::exp(factor * pow(it_rt_2->getRT() - it_rt->getRT(), 2)) < cutoff;
411  }
412  else if (unit)
413  {
414  // Top-Hat with RT unit = scans
415  terminate_now = (steps > range_scans);
416  }
417  else
418  {
419  // Top-Hat with RT unit = seconds
420  terminate_now = (std::abs(it_rt_2->getRT() - it_rt->getRT()) > range_seconds);
421  }
422  --m;
423  --it_rt_2;
424  }
425 
426  }
427  ++n;
428  }
429 
430  // normalize weights
431  for (AverageBlocks::Iterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
432  {
433  double sum(0.0);
434  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
435  {
436  sum += it2->second;
437  }
438 
439  for (std::vector<std::pair<Size, double> >::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
440  {
441  (*it2).second /= sum;
442  }
443  }
444 
445  // determine type of spectral data (profile or centroided)
447  if (spectrum_type == "automatic")
448  {
449  Size idx = spectra_to_average_over.begin()->first; // index of first spectrum to be averaged
450  type = exp[idx].getType(true);
451  }
452  else if (spectrum_type == "profile")
453  {
455  }
456  else if (spectrum_type == "centroid")
457  {
459  }
460  else
461  {
462  throw Exception::InvalidParameter(__FILE__,__LINE__,OPENMS_PRETTY_FUNCTION, "Spectrum type has to be one of automatic, profile or centroid.");
463  }
464 
465  // generate new spectra
466  if (type == SpectrumSettings::CENTROID)
467  {
468  averageCentroidSpectra_(exp, spectra_to_average_over, ms_level);
469  }
470  else
471  {
472  averageProfileSpectra_(exp, spectra_to_average_over, ms_level);
473  }
474 
475  exp.sortSpectra();
476  }
477 
478  // @}
479 
480 protected:
481 
492  template <typename MapType>
493  void mergeSpectra_(MapType& exp, const MergeBlocks& spectra_to_merge, const UInt ms_level)
494  {
495  double mz_binning_width(param_.getValue("mz_binning_width"));
496  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
497 
498  // merge spectra
499  MapType merged_spectra;
500 
501  Map<Size, Size> cluster_sizes;
502  std::set<Size> merged_indices;
503 
504  // set up alignment
505  SpectrumAlignment sas;
506  Param p;
507  p.setValue("tolerance", mz_binning_width);
508  if (!(mz_binning_unit == "Da" || mz_binning_unit == "ppm"))
509  {
510  throw Exception::IllegalSelfOperation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); // sanity check
511  }
512 
513  p.setValue("is_relative_tolerance", mz_binning_unit == "Da" ? "false" : "true");
514  sas.setParameters(p);
515  std::vector<std::pair<Size, Size> > alignment;
516 
517  Size count_peaks_aligned(0);
518  Size count_peaks_overall(0);
519 
520  // each BLOCK
521  for (auto it = spectra_to_merge.begin(); it != spectra_to_merge.end(); ++it)
522  {
523  ++cluster_sizes[it->second.size() + 1]; // for stats
524 
525  typename MapType::SpectrumType consensus_spec = exp[it->first];
526  consensus_spec.setMSLevel(ms_level);
527 
528  //consensus_spec.unify(exp[it->first]); // append meta info
529  merged_indices.insert(it->first);
530 
531  //typename MapType::SpectrumType all_peaks = exp[it->first];
532  double rt_average = consensus_spec.getRT();
533  double precursor_mz_average = 0.0;
534  Size precursor_count(0);
535  if (!consensus_spec.getPrecursors().empty())
536  {
537  precursor_mz_average = consensus_spec.getPrecursors()[0].getMZ();
538  ++precursor_count;
539  }
540 
541  count_peaks_overall += consensus_spec.size();
542 
543  // block elements
544  for (auto sit = it->second.begin(); sit != it->second.end(); ++sit)
545  {
546  consensus_spec.unify(exp[*sit]); // append meta info
547  merged_indices.insert(*sit);
548 
549  rt_average += exp[*sit].getRT();
550  if (ms_level >= 2 && exp[*sit].getPrecursors().size() > 0)
551  {
552  precursor_mz_average += exp[*sit].getPrecursors()[0].getMZ();
553  ++precursor_count;
554  }
555 
556  // merge data points
557  sas.getSpectrumAlignment(alignment, consensus_spec, exp[*sit]);
558  //std::cerr << "alignment of " << it->first << " with " << *sit << " yielded " << alignment.size() << " common peaks!\n";
559  count_peaks_aligned += alignment.size();
560  count_peaks_overall += exp[*sit].size();
561 
562  Size align_index(0);
563  Size spec_b_index(0);
564 
565  // sanity check for number of peaks
566  Size spec_a = consensus_spec.size(), spec_b = exp[*sit].size(), align_size = alignment.size();
567  for (auto pit = exp[*sit].begin(); pit != exp[*sit].end(); ++pit)
568  {
569  if (alignment.size() == 0 || alignment[align_index].second != spec_b_index)
570  // ... add unaligned peak
571  {
572  consensus_spec.push_back(*pit);
573  }
574  // or add aligned peak height to ALL corresponding existing peaks
575  else
576  {
577  Size counter(0);
578  Size copy_of_align_index(align_index);
579 
580  while (alignment.size() > 0 &&
581  copy_of_align_index < alignment.size() &&
582  alignment[copy_of_align_index].second == spec_b_index)
583  {
584  ++copy_of_align_index;
585  ++counter;
586  } // Count the number of peaks in a which correspond to a single b peak.
587 
588  while (alignment.size() > 0 &&
589  align_index < alignment.size() &&
590  alignment[align_index].second == spec_b_index)
591  {
592  consensus_spec[alignment[align_index].first].setIntensity(consensus_spec[alignment[align_index].first].getIntensity() +
593  (pit->getIntensity() / (double)counter)); // add the intensity divided by the number of peaks
594  ++align_index; // this aligned peak was explained, wait for next aligned peak ...
595  if (align_index == alignment.size())
596  {
597  alignment.clear(); // end reached -> avoid going into this block again
598  }
599  }
600  align_size = align_size + 1 - counter; //Decrease align_size by number of
601  }
602  ++spec_b_index;
603  }
604  consensus_spec.sortByPosition(); // sort, otherwise next alignment will fail
605  if (spec_a + spec_b - align_size != consensus_spec.size())
606  {
607  OPENMS_LOG_WARN << "wrong number of features after merge. Expected: " << spec_a + spec_b - align_size << " got: " << consensus_spec.size() << "\n";
608  }
609  }
610  rt_average /= it->second.size() + 1;
611  consensus_spec.setRT(rt_average);
612 
613  if (ms_level >= 2)
614  {
615  if (precursor_count)
616  {
617  precursor_mz_average /= precursor_count;
618  }
619  std::vector<Precursor> pcs = consensus_spec.getPrecursors();
620  //if (pcs.size()>1) OPENMS_LOG_WARN << "Removing excessive precursors - leaving only one per MS2 spectrum.\n";
621  pcs.resize(1);
622  pcs[0].setMZ(precursor_mz_average);
623  consensus_spec.setPrecursors(pcs);
624  }
625 
626  if (consensus_spec.empty())
627  {
628  continue;
629  }
630  else
631  {
632  merged_spectra.addSpectrum(consensus_spec);
633  }
634  }
635 
636  OPENMS_LOG_INFO << "Cluster sizes:\n";
637  for (Map<Size, Size>::const_iterator it = cluster_sizes.begin(); it != cluster_sizes.end(); ++it)
638  {
639  OPENMS_LOG_INFO << " size " << it->first << ": " << it->second << "x\n";
640  }
641 
642  char buffer[200];
643  sprintf(buffer, "%d/%d (%.2f %%) of blocked spectra", (int)count_peaks_aligned,
644  (int)count_peaks_overall, float(count_peaks_aligned) / float(count_peaks_overall) * 100.);
645  OPENMS_LOG_INFO << "Number of merged peaks: " << String(buffer) << "\n";
646 
647  // remove all spectra that were within a cluster
648  typename MapType::SpectrumType empty_spec;
649  MapType exp_tmp;
650  for (Size i = 0; i < exp.size(); ++i)
651  {
652  if (merged_indices.count(i) == 0) // save unclustered ones
653  {
654  exp_tmp.addSpectrum(exp[i]);
655  exp[i] = empty_spec;
656  }
657  }
658 
659  //typedef std::vector<typename MapType::SpectrumType> Base;
660  //exp.Base::operator=(exp_tmp);
661  exp.clear(false);
662  exp.getSpectra().insert(exp.end(), exp_tmp.begin(), exp_tmp.end());
663 
664  // exp.erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<typename MapType::SpectrumType>(ListUtils::create<int>(String(ms_level)), false)), exp.end());
665 
666  // ... and add consensus spectra
667  exp.getSpectra().insert(exp.end(), merged_spectra.begin(), merged_spectra.end());
668 
669  }
670 
691  template <typename MapType>
692  void averageProfileSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
693  {
694  MapType exp_tmp; // temporary experiment for averaged spectra
695 
696  double mz_binning_width(param_.getValue("mz_binning_width"));
697  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
698 
699  unsigned progress = 0;
700  std::stringstream progress_message;
701  progress_message << "averaging profile spectra of MS level " << ms_level;
702  startProgress(0, spectra_to_average_over.size(), progress_message.str());
703 
704  // loop over blocks
705  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
706  {
707  setProgress(++progress);
708 
709  // loop over spectra in blocks
710  std::vector<double> mz_positions_all; // m/z positions from all spectra
711  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
712  {
713  // loop over m/z positions
714  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
715  {
716  mz_positions_all.push_back(it_mz->getMZ());
717  }
718  }
719 
720  sort(mz_positions_all.begin(), mz_positions_all.end());
721 
722  std::vector<double> mz_positions; // positions at which the averaged spectrum should be evaluated
723  std::vector<double> intensities;
724  double last_mz = std::numeric_limits<double>::min(); // last m/z position pushed through from mz_position to mz_position_2
725  double delta_mz(mz_binning_width); // for m/z unit Da
726  for (std::vector<double>::iterator it_mz = mz_positions_all.begin(); it_mz < mz_positions_all.end(); ++it_mz)
727  {
728  if (mz_binning_unit == "ppm")
729  {
730  delta_mz = mz_binning_width * (*it_mz) / 1000000;
731  }
732 
733  if (((*it_mz) - last_mz) > delta_mz)
734  {
735  mz_positions.push_back(*it_mz);
736  intensities.push_back(0.0);
737  last_mz = *it_mz;
738  }
739  }
740 
741  // loop over spectra in blocks
742  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
743  {
744  SplineInterpolatedPeaks spline(exp[it2->first]);
746 
747  // loop over m/z positions
748  for (Size i = 0; i < mz_positions.size(); ++i)
749  {
750  if ((spline.getPosMin() < mz_positions[i]) && (mz_positions[i] < spline.getPosMax()))
751  {
752  intensities[i] += nav.eval(mz_positions[i]) * (it2->second); // spline-interpolated intensity * weight
753  }
754  }
755  }
756 
757  // update spectrum
758  typename MapType::SpectrumType average_spec = exp[it->first];
759  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
760  //average_spec.setMSLevel(ms_level);
761 
762  // refill spectrum
763  for (Size i = 0; i < mz_positions.size(); ++i)
764  {
765  typename MapType::PeakType peak;
766  peak.setMZ(mz_positions[i]);
767  peak.setIntensity(intensities[i]);
768  average_spec.push_back(peak);
769  }
770 
771  // store spectrum temporarily
772  exp_tmp.addSpectrum(average_spec);
773  }
774 
775  endProgress();
776 
777  // loop over blocks
778  int n(0);
779  //typename MapType::SpectrumType empty_spec;
780  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
781  {
782  exp[it->first] = exp_tmp[n];
783  //exp_tmp[n] = empty_spec;
784  ++n;
785  }
786 
787  }
788 
804  template <typename MapType>
805  void averageCentroidSpectra_(MapType& exp, const AverageBlocks& spectra_to_average_over, const UInt ms_level)
806  {
807  MapType exp_tmp; // temporary experiment for averaged spectra
808 
809  double mz_binning_width(param_.getValue("mz_binning_width"));
810  String mz_binning_unit(param_.getValue("mz_binning_width_unit"));
811 
812  unsigned progress = 0;
813  ProgressLogger logger;
814  std::stringstream progress_message;
815  progress_message << "averaging centroid spectra of MS level " << ms_level;
816  logger.startProgress(0, spectra_to_average_over.size(), progress_message.str());
817 
818  // loop over blocks
819  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
820  {
821  logger.setProgress(++progress);
822 
823  // collect peaks from all spectra
824  // loop over spectra in blocks
825  std::vector<std::pair<double, double> > mz_intensity_all; // m/z positions and peak intensities from all spectra
826  for (std::vector<std::pair<Size, double> >::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
827  {
828  // loop over m/z positions
829  for (typename MapType::SpectrumType::ConstIterator it_mz = exp[it2->first].begin(); it_mz < exp[it2->first].end(); ++it_mz)
830  {
831  std::pair<double, double> mz_intensity(it_mz->getMZ(), (it_mz->getIntensity() * it2->second)); // m/z, intensity * weight
832  mz_intensity_all.push_back(mz_intensity);
833  }
834  }
835 
836  sort(mz_intensity_all.begin(), mz_intensity_all.end(), SpectraMerger::compareByFirst);
837 
838  // generate new spectrum
839  std::vector<double> mz_new;
840  std::vector<double> intensity_new;
841  double last_mz = std::numeric_limits<double>::min();
842  double delta_mz = mz_binning_width;
843  double sum_mz(0);
844  double sum_intensity(0);
845  Size count(0);
846  for (std::vector<std::pair<double, double> >::const_iterator it_mz = mz_intensity_all.begin(); it_mz != mz_intensity_all.end(); ++it_mz)
847  {
848  if (mz_binning_unit == "ppm")
849  {
850  delta_mz = mz_binning_width * (it_mz->first) / 1000000;
851  }
852 
853  if (((it_mz->first - last_mz) > delta_mz) && (count > 0))
854  {
855  mz_new.push_back(sum_mz / count);
856  intensity_new.push_back(sum_intensity); // intensities already weighted
857 
858  sum_mz = 0;
859  sum_intensity = 0;
860 
861  last_mz = it_mz->first;
862  count = 0;
863  }
864 
865  sum_mz += it_mz->first;
866  sum_intensity += it_mz->second;
867  ++count;
868  }
869  if (count > 0)
870  {
871  mz_new.push_back(sum_mz / count);
872  intensity_new.push_back(sum_intensity); // intensities already weighted
873  }
874 
875  // update spectrum
876  typename MapType::SpectrumType average_spec = exp[it->first];
877  average_spec.clear(false); // Precursors are part of the meta data, which are not deleted.
878  //average_spec.setMSLevel(ms_level);
879 
880  // refill spectrum
881  for (Size i = 0; i < mz_new.size(); ++i)
882  {
883  typename MapType::PeakType peak;
884  peak.setMZ(mz_new[i]);
885  peak.setIntensity(intensity_new[i]);
886  average_spec.push_back(peak);
887  }
888 
889  // store spectrum temporarily
890  exp_tmp.addSpectrum(average_spec);
891 
892  }
893 
894  logger.endProgress();
895 
896  // loop over blocks
897  int n(0);
898  for (AverageBlocks::ConstIterator it = spectra_to_average_over.begin(); it != spectra_to_average_over.end(); ++it)
899  {
900  exp[it->first] = exp_tmp[n];
901  ++n;
902  }
903 
904  }
905 
909  bool static compareByFirst(std::pair<double, double> i, std::pair<double, double> j)
910  {
911  return i.first < j.first;
912  }
913 
914  };
915 
916 }
OpenMS::SpectrumAlignment
Aligns the peaks of two sorted spectra Method 1: Using a banded (width via 'tolerance' parameter) ali...
Definition: SpectrumAlignment.h:67
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:73
OpenMS::SpectraMerger::SpectraDistance_::getSimilarity
double getSimilarity(const double d_rt, const double d_mz) const
Definition: SpectraMerger.h:93
OpenMS::SpectraMerger
Merges blocks of MS or MS2 spectra.
Definition: SpectraMerger.h:62
OpenMS::Param::setValue
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
SingleLinkage.h
OpenMS::ProgressLogger::setProgress
void setProgress(SignedSize value) const
Sets the current progress.
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::SpectraMerger::AverageBlocks
Map< Size, std::vector< std::pair< Size, double > > > AverageBlocks
blocks of spectra (master-spectrum index to update to spectra to average over)
Definition: SpectraMerger.h:129
LogStream.h
ClusterAnalyzer.h
OpenMS::SplineInterpolatedPeaks::Navigator::eval
double eval(double pos)
returns spline interpolated intensity at this position (fast access since we can start search from la...
OpenMS::SpectraMerger::SpectraDistance_::operator()
double operator()(const BaseFeature &first, const BaseFeature &second) const
Definition: SpectraMerger.h:100
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::SpectraMerger::SpectraDistance_::SpectraDistance_
SpectraDistance_()
Definition: SpectraMerger.h:79
OpenMS::Peak1D::setMZ
void setMZ(CoordinateType mz)
Mutable access to m/z.
Definition: Peak1D.h:121
OpenMS::Peak2D::setMZ
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
BaseFeature.h
OpenMS::SpectraMerger::averageCentroidSpectra_
void averageCentroidSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (centroid mode)
Definition: SpectraMerger.h:805
OpenMS::SpectraMerger::SpectraDistance_::rt_max_
double rt_max_
Definition: SpectraMerger.h:118
SpectraMerger.h
OpenMS::MSExperiment::size
Size size() const
Definition: MSExperiment.h:127
OpenMS::Peak2D::getMZ
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS::ClusterHierarchical::cluster
void cluster(std::vector< Data > &data, const SimilarityComparator &comparator, const ClusterFunctor &clusterer, std::vector< BinaryTreeNode > &cluster_tree, DistanceMatrix< float > &original_distance)
Clustering function.
Definition: ClusterHierarchical.h:112
OpenMS::SpectrumSettings::setPrecursors
void setPrecursors(const std::vector< Precursor > &precursors)
sets the precursors
OpenMS::Exception::InvalidParameter
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:347
OpenMS::Peak1D::setIntensity
void setIntensity(IntensityType intensity)
Mutable access to the data point intensity (height)
Definition: Peak1D.h:112
OpenMS::Map
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
OpenMS::SpectraMerger::SpectraDistance_::updateMembers_
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Definition: SpectraMerger.h:87
OpenMS::MSExperiment::clear
void clear(bool clear_meta_data)
Clears all data and meta data.
OpenMS::SpectraMerger::SpectraDistance_::mz_max_
double mz_max_
Definition: SpectraMerger.h:119
OpenMS::MSExperiment::const_iterator
Base::const_iterator const_iterator
Definition: MSExperiment.h:125
MSExperiment.h
double
OpenMS::MSSpectrum::setMSLevel
void setMSLevel(UInt ms_level)
Sets the MS level.
OpenMS::SpectrumSettings::getPrecursors
const std::vector< Precursor > & getPrecursors() const
returns a const reference to the precursors
OpenMS::MSExperiment::begin
Iterator begin()
Definition: MSExperiment.h:157
OpenMS::Peak2D::getRT
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
OpenMS::SingleLinkage
SingleLinkage ClusterMethod.
Definition: SingleLinkage.h:57
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:144
MzMLFile.h
OpenMS::SplineInterpolatedPeaks::Navigator
iterator class for access of spline packages
Definition: SplineInterpolatedPeaks.h:111
int
OpenMS::DistanceMatrix
A two-dimensional distance matrix, similar to OpenMS::Matrix.
Definition: DistanceMatrix.h:67
OpenMS::ClusterAnalyzer
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:51
OpenMS::Exception::IllegalSelfOperation
Illegal self operation exception.
Definition: Exception.h:378
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:195
OpenMS::MSSpectrum::sortByPosition
void sortByPosition()
Lexicographically sorts the peaks by their position.
CompleteLinkage.h
SpectrumAlignment.h
OpenMS::MSExperiment::end
Iterator end()
Definition: MSExperiment.h:167
RangeUtils.h
OpenMS::ProgressLogger::endProgress
void endProgress() const
Ends the progress display.
OpenMS::SplineInterpolatedPeaks::getNavigator
SplineInterpolatedPeaks::Navigator getNavigator(double scaling=0.7)
returns an iterator for access of spline packages
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
OpenMS::MSExperiment::getSpectra
const std::vector< MSSpectrum > & getSpectra() const
returns the spectrum list
OpenMS::MSExperiment::ConstIterator
std::vector< SpectrumType >::const_iterator ConstIterator
Non-mutable iterator.
Definition: MSExperiment.h:113
OpenMS::MSSpectrum::getRT
double getRT() const
OpenMS::Peak2D::setRT
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::SpectrumSettings::SpectrumType
SpectrumType
Spectrum peak type.
Definition: SpectrumSettings.h:70
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::SpectrumSettings::CENTROID
centroid data or stick data
Definition: SpectrumSettings.h:73
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
TOPPBase.h
OpenMS::Peak1D
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:54
OpenMS::SpectraMerger::averageProfileSpectra_
void averageProfileSpectra_(MapType &exp, const AverageBlocks &spectra_to_average_over, const UInt ms_level)
average spectra (profile mode)
Definition: SpectraMerger.h:692
ProgressLogger.h
FileHandler.h
DefaultParamHandler.h
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:58
OpenMS::SpectraMerger::MergeBlocks
Map< Size, std::vector< Size > > MergeBlocks
blocks of spectra (master-spectrum index to sacrifice-spectra(the ones being merged into the master-s...
Definition: SpectraMerger.h:126
OpenMS::SpectraMerger::mergeSpectra_
void mergeSpectra_(MapType &exp, const MergeBlocks &spectra_to_merge, const UInt ms_level)
merges blocks of spectra of a certain level
Definition: SpectraMerger.h:493
OpenMS::BaseFeature
A basic LC-MS feature.
Definition: BaseFeature.h:56
OpenMS::FileHandler::loadExperiment
bool loadExperiment(const String &filename, MSExperiment &exp, FileTypes::Type force_type=FileTypes::UNKNOWN, ProgressLogger::LogType log=ProgressLogger::NONE, const bool rewrite_source_file=true, const bool compute_hash=true)
Loads a file into an MSExperiment.
OpenMS::SpectrumSettings::PROFILE
profile data
Definition: SpectrumSettings.h:74
OpenMS::Map::Iterator
Base::iterator Iterator
Definition: Map.h:80
OpenMS::MSExperiment::sortSpectra
void sortSpectra(bool sort_mz=true)
Sorts the data points by retention time.
SplineInterpolatedPeaks.h
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::SpectrumSettings::unify
void unify(const SpectrumSettings &rhs)
merge another spectrum setting into this one (data is usually appended, except for spectrum type whic...
OpenMS::SignedSize
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
OpenMS::Math::sum
static double sum(IteratorType begin, IteratorType end)
Calculates the sum of a range of values.
Definition: StatisticFunctions.h:120
OpenMS::SpectraMerger::mergeSpectraPrecursors
void mergeSpectraPrecursors(MapType &exp)
merges spectra with similar precursors (must have MS2 level)
Definition: SpectraMerger.h:204
OpenMS::MSExperiment::addSpectrum
void addSpectrum(const MSSpectrum &spectrum)
adds a spectrum to the list
OpenMS::SpectraMerger::compareByFirst
static bool compareByFirst(std::pair< double, double > i, std::pair< double, double > j)
comparator for sorting peaks (m/z, intensity)
Definition: SpectraMerger.h:909
StandardTypes.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::MSSpectrum
The representation of a 1D spectrum.
Definition: MSSpectrum.h:67
OpenMS::ClusterAnalyzer::cut
void cut(const Size cluster_quantity, const std::vector< BinaryTreeNode > &tree, std::vector< std::vector< Size > > &clusters)
Method to calculate a partition resulting from a certain step in clustering given by the number of cl...
OpenMS::SpectrumAlignment::getSpectrumAlignment
void getSpectrumAlignment(std::vector< std::pair< Size, Size > > &alignment, const SpectrumType1 &s1, const SpectrumType2 &s2) const
Definition: SpectrumAlignment.h:88
OpenMS::SpectraMerger::mergeSpectraBlockWise
void mergeSpectraBlockWise(MapType &exp)
Definition: SpectraMerger.h:153
OpenMS::MSSpectrum::clear
void clear(bool clear_meta_data)
Clears all data and meta data.
OpenMS::SplineInterpolatedPeaks::getPosMax
double getPosMax() const
returns the maximum m/z (or RT) of the spectrum
OpenMS::IntList
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::SpectraMerger::SpectraDistance_
Definition: SpectraMerger.h:75
OpenMS::SplineInterpolatedPeaks::getPosMin
double getPosMin() const
returns the minimum m/z (or RT) of the spectrum
OpenMS::Map::ConstIterator
Base::const_iterator ConstIterator
Definition: Map.h:81
OpenMS::FileHandler
Facilitates file handling by file type recognition.
Definition: FileHandler.h:62
OpenMS::SpectraMerger::average
void average(MapType &exp, const String &average_type)
average over neighbouring spectra
Definition: SpectraMerger.h:307
OpenMS::ClusterHierarchical
Hierarchical clustering with generic clustering functions.
Definition: ClusterHierarchical.h:63
ClusterHierarchical.h
OpenMS::FileHandler::storeExperiment
void storeExperiment(const String &filename, const MSExperiment &exp, ProgressLogger::LogType log=ProgressLogger::NONE)
Stores an MSExperiment to a file.
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
OpenMS::MSSpectrum::setRT
void setRT(double rt)
Sets the absolute retention time (in seconds)
OpenMS::SplineInterpolatedPeaks
Data structure for spline interpolation of MS1 spectra and chromatograms.
Definition: SplineInterpolatedPeaks.h:61
OpenMS::ProgressLogger::startProgress
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.