OpenMS
IsotopeWaveletTransform.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg$
6 // $Authors: $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
20 
21 #include <cmath>
22 #include <vector>
23 #include <map>
24 #include <sstream>
25 #include <fstream>
26 #include <iomanip>
27 
28 // TODO: move this to cpp and use STL once it is available in clang on Mac
29 #include <boost/math/special_functions/bessel.hpp>
30 
31 // This code has quite a few strange things in it triggering warnings which
32 // clutters the rest of the diagnostics
33 #pragma clang diagnostic push
34 #pragma clang diagnostic ignored "-Wfloat-equal"
35 #pragma clang diagnostic ignored "-Wconversion"
36 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
37 
38 namespace OpenMS
39 {
40 
44  template <typename PeakType>
46  {
47 public:
48 
49 
51  struct BoxElement
52  {
53  double mz;
54  UInt c;
55  double score;
56  double intens;
57  double ref_intens;
58  double RT;
62  };
63 
64  typedef std::multimap<UInt, BoxElement> Box;
65 
66 
71  {
73 
74 public:
75 
78  reference_(nullptr), trans_intens_(nullptr)
79  {
80  }
81 
83  TransSpectrum(const MSSpectrum* reference) :
84  reference_(reference)
85  {
86  trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
87  }
88 
90  virtual ~TransSpectrum()
91  {
92  delete (trans_intens_);
93  }
94 
95  virtual void destroy()
96  {
97  delete (trans_intens_);
98  trans_intens_ = NULL;
99  delete (reference_);
100  reference_ = NULL;
101  }
102 
104  inline double getRT() const
105  {
106  return reference_->getRT();
107  }
108 
110  inline double getMZ(const UInt i) const
111  {
112  return (*reference_)[i].getMZ();
113  }
114 
116  inline double getRefIntensity(const UInt i) const
117  {
118  return (*reference_)[i].getIntensity();
119  }
120 
122  inline double getTransIntensity(const UInt i) const
123  {
124  return (*trans_intens_)[i];
125  }
126 
128  inline void setTransIntensity(const UInt i, const double intens)
129  {
130  (*trans_intens_)[i] = intens;
131  }
132 
134  inline Size size() const
135  {
136  return trans_intens_->size();
137  }
138 
140  inline const MSSpectrum* getRefSpectrum()
141  {
142  return reference_;
143  }
144 
146  inline const MSSpectrum* getRefSpectrum() const
147  {
148  return reference_;
149  }
150 
153  inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
154  {
155  return reference_->MZBegin(mz);
156  }
157 
160  inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
161  {
162  return reference_->MZEnd(mz);
163  }
164 
167  inline typename MSSpectrum::const_iterator end() const
168  {
169  return reference_->end();
170  }
171 
174  inline typename MSSpectrum::const_iterator begin() const
175  {
176  return reference_->begin();
177  }
178 
179 protected:
180 
182  std::vector<float>* trans_intens_;
183 
184  };
185 
186 
187 
193  IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
194 
196  virtual ~IsotopeWaveletTransform();
197 
198 
203  virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
204 
209  virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
210 
230  virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
231  const double ampl_cutoff, const bool check_PPMs);
232 
233  virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
234 
241  void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
242  const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
243 
244 
249  FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
250 
252  virtual std::multimap<double, Box> getClosedBoxes()
253  { return closed_boxes_; }
254 
255 
260  inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
261  {
262  return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
263  }
264 
271  inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
272  {
273  return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
274  }
275 
276  inline double getSigma() const
277  {
278  return sigma_;
279  }
280 
281  inline void setSigma(const double sigma)
282  {
283  sigma_ = sigma;
284  }
285 
286  virtual void computeMinSpacing(const MSSpectrum& c_ref);
287 
288  inline double getMinSpacing() const
289  {
290  return min_spacing_;
291  }
292 
293  inline Size getMaxScanSize() const
294  {
295  return max_scan_size_;
296  }
297 
298 protected:
299 
300 
304 
305 
306  inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
307 
308 
315  virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
316  const double seed_mz, const UInt c, const double ampl_cutoff);
317 
324  virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
325  const double seed_mz, const UInt c, const double ampl_cutoff);
326 
327 
335  virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
336  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
337 
345  virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
346  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
347 
348  virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
349 
350 
352  inline double getAvIntens_(const TransSpectrum& scan);
354  inline double getAvIntens_(const MSSpectrum& scan);
355 
357  inline double getSdIntens_(const TransSpectrum& scan, const double mean);
359  inline double getSdIntens_(const MSSpectrum& scan, const double mean);
360 
371  virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
372  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
373 
389  virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
390  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
391 
397  inline double getAvMZSpacing_(const MSSpectrum& scan);
398 
399 
404  void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
405  const UInt scan_index, const UInt c, const bool check_PPMs);
406 
411  virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
412  const UInt scan_index, const UInt c, const bool check_PPMs);
413 
414 
419  void extendBox_(const PeakMap& map, const Box& box);
420 
423  inline double peptideMassRule_(const double c_mass) const
424  {
425  double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
426  double old_frac_mass = c_mass - (Int)(c_mass);
427  double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
428  double new_frac_mass = new_mass - (Int)(new_mass);
429 
430  if (new_frac_mass - old_frac_mass > 0.5)
431  {
432  new_mass -= 1.;
433  }
434 
435  if (new_frac_mass - old_frac_mass < -0.5)
436  {
437  new_mass += 1.;
438  }
439 
440  return new_mass;
441  }
442 
446  inline double getPPMs_(const double mass_a, const double mass_b) const
447  {
448  return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
449  }
450 
451  //internally used data structures for the sweep line algorithm
452  std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
453  std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
454 
456  std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
457  std::vector<double> interpol_xs_, interpol_ys_;
458 
461  bool hr_data_;
464  std::vector<int> indices_;
465 
467  std::vector<float> scores_, zeros_;
468  };
469 
470  template <typename PeakType>
471  bool intensityComparator(const PeakType& a, const PeakType& b)
472  {
473  return a.getIntensity() > b.getIntensity();
474  }
475 
476  template <typename PeakType>
478  {
479  return a.getIntensity() < b.getIntensity();
480  }
481 
482  template <typename PeakType>
484  {
485  return a->getIntensity() > b->getIntensity();
486  }
487 
488  template <typename PeakType>
489  bool positionComparator(const PeakType& a, const PeakType& b)
490  {
491  return a.getMZ() < b.getMZ();
492  }
493 
494  template <typename PeakType>
496  {
497  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
498  av_MZ_spacing_ = 1;
499  max_scan_size_ = 0;
500  max_mz_cutoff_ = 3;
501  max_num_peaks_per_pattern_ = 3;
502  hr_data_ = false;
503  intenstype_ = "ref";
504  }
505 
506  template <typename PeakType>
507  IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
508  {
509  max_charge_ = max_charge;
510  max_scan_size_ = max_scan_size;
511  hr_data_ = hr_data;
512  intenstype_ = intenstype;
513  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
514  if (max_scan_size <= 0) //only important for the CPU
515  {
516  IsotopeWavelet::init(max_mz, max_charge);
517  }
518 
519  av_MZ_spacing_ = 1;
520  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
521  max_num_peaks_per_pattern_ = IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
522 
523  Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
524  Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
525  psi_.reserve(to_reserve); //The wavelet
526  prod_.reserve(to_reserve);
527  xs_.reserve(to_reserve);
530  }
531 
532  template <typename PeakType>
534  {
535  delete (tmp_boxes_);
536  }
537 
538  template <typename PeakType>
540  {
541  Int spec_size((Int)c_ref.size());
542  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
543  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
544  UInt charge = c + 1;
545  double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
546 
547  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
548  {
549  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
550  old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
551  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
552  c_diff = 0;
553  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
554 
555  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
556  {
557  if (current_conv_pos >= spec_size)
558  {
559  value += 0.5 * old * min_spacing_;
560  break;
561  }
562 
563  c_mz = c_ref[current_conv_pos].getMZ();
564  c_diff = c_mz + origin;
565 
566  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
567  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
568 
569  value += 0.5 * (current + old) * (c_mz - old_pos);
570 
571  old = current;
572  old_pos = c_mz;
573  }
574 
575 
576 
577  c_trans[my_local_pos].setIntensity(value);
578  }
579  }
580 
581  template <typename PeakType>
583  {
584  Int spec_size((Int)c_ref.size());
585  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
586  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
587  UInt charge = c + 1;
588  double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
589 
590  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
591  {
592  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
593 
594 
595  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
596  c_diff = 0;
597  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
598 
599  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
600  {
601  if (current_conv_pos >= spec_size)
602  {
603  break;
604  }
605 
606  c_mz = c_ref[current_conv_pos].getMZ();
607  c_diff = c_mz + origin;
608 
609  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
610  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
611 
612  value += current;
613  }
614 
615  c_trans[my_local_pos].setIntensity(value);
616  }
617  }
618 
619  template <typename PeakType>
621  {
622  data_length_ = (UInt) c_ref.size();
623  computeMinSpacing(c_ref);
624  Int wavelet_length = 0, quarter_length = 0;
625 
626  if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
627  {
628  UInt c_mz_cutoff;
629  typename MSSpectrum::const_iterator start_iter, end_iter;
630  for (UInt i = 0; i < data_length_; ++i)
631  {
632  c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
633  start_iter = c_ref.MZEnd(c_ref[i].getMZ());
634  end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
635  wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
636  end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
637  quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
638  }
639  }
640  else
641  {
642  //CHANGED
643  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_ - 1].getMZ(), max_charge_);
644  wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
645  }
646  //... done
647 
648 
649  if (wavelet_length > (Int) c_ref.size())
650  {
651  std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
652  std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
653  std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
654  }
655 
656  Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS / min_spacing_);
657  from_max_to_left_ = max_index;
658  from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
659  }
660 
661  template <typename PeakType>
663  {
664  min_spacing_ = INT_MAX;
665  for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
666  {
667  min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
668  }
669  }
670 
671  template <typename PeakType>
673  const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
674  {
675  Size scan_size(candidates.size());
677  typename MSSpectrum::const_iterator iter_start, iter_end, seed_iter, iter2;
678  double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
679  UInt MZ_start, MZ_end;
680 
681  MSSpectrum diffed(candidates);
682  diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
683 
684 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
685  std::stringstream stream;
686  stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
687  std::ofstream ofile(stream.str().c_str());
688 #endif
689 
690  if (!hr_data_) //LowRes data
691  {
692  for (UInt i = 0; i < scan_size - 2; ++i)
693  {
694  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
695  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
696  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
697 
698  if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
699  {
700  diffed[i + 1].setIntensity(0);
701  }
702 
703 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
704  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
705 #endif
706  }
707  }
708  else //HighRes data
709  {
710  for (UInt i = 0; i < scan_size - 2; ++i)
711  {
712  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
713  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
714  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
715 
716  if (!(bwd >= 0 && fwd <= 0))
717  {
718  diffed[i + 1].setIntensity(0);
719  }
720 
721 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
722  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
723 #endif
724  }
725  }
726 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
727  ofile.close();
728 #endif
729 
730  ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
731 
732  //Sort the transform in descending order according to the intensities present in the transform
733  c_sorted_candidate.sortByIntensity();
734 
735 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
736  std::stringstream stream2;
737  stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
738  std::ofstream ofile2(stream2.str().c_str());
739  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
740  {
741  ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
742  }
743  ofile2.close();
744 #endif
745 
746  std::vector<UInt> processed(scan_size, 0);
747 
748  if (ampl_cutoff < 0)
749  {
750  threshold = 0;
751  }
752  else
753  {
754  c_av_intens = getAvIntens_(candidates);
755  c_sd_intens = getSdIntens_(candidates, c_av_intens);
756  threshold = ampl_cutoff * c_sd_intens + c_av_intens;
757  }
758 
759  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
760  {
761  if (iter->getIntensity() <= 0)
762  {
763  break;
764  }
765 
766  seed_mz = iter->getMZ();
767  seed_iter = ref.MZBegin(seed_mz);
768 
769  if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
770  {
771  continue;
772  }
773 
774  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
775  //Mark the region as processed
776  //Do not move this further down, since we have to mark this as processed in any case,
777  //even when score <=0; otherwise we would look around the maximum's position unless
778  //any significant point is found
779  iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
780  iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
781  if (iter_end == ref.end())
782  {
783  --iter_end;
784  }
785 
786  MZ_start = distance(ref.begin(), iter_start);
787  MZ_end = distance(ref.begin(), iter_end);
788 
789  memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
790 
791  c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
792 
793 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
794  if (trunc(seed_mz) == 874)
795  std::cout << seed_mz << "\t" << c_score << std::endl;
796 #endif
797 
798  if (c_score <= 0 && c_score != -1000)
799  {
800  continue;
801  }
802 
803  //Push the seed into its corresponding box (or create a new one, if necessary)
804  //Do ***NOT*** move this further down!
805  push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
806 
807  help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
808  iter2 = candidates.MZBegin(help_mz);
809  if (iter2 == candidates.end() || iter2 == candidates.begin())
810  {
811  continue;
812  }
813 
814  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
815  {
816  //In the other case, we are too close to the peak, leading to incorrect derivatives.
817  if (iter2 != candidates.end())
818  {
819  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
820  }
821  }
822 
823  help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
824  iter2 = candidates.MZBegin(help_mz);
825  if (iter2 == candidates.end() || iter2 == candidates.begin())
826  {
827  continue;
828  }
829 
830  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
831  {
832  //In the other case, we are too close to the peak, leading to incorrect derivatives.
833  if (iter2 != candidates.end())
834  {
835  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
836  }
837  }
838  }
839 
840  clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
841  }
842 
843  template <typename PeakType>
845  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
846  {
847  double c_score = 0, c_val;
848  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
849  Int signal_size((Int)candidate.size());
850  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
851  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
852 
853  //p_h_ind indicates if we are looking for a whole or a peak
854  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
855 
856  std::vector<double> positions(end);
857  for (Int i = 0; i < end; ++i)
858  {
859  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
860  }
861 
862  double l_score = 0, mid_val = 0;
863  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
864  for (Int v = 1; v <= end; ++v, ++p_h_ind)
865  {
866  do
867  {
868  if (start_index < signal_size - 1)
869  ++start_index;
870  else
871  break;
872  }
873  while (candidate[start_index].getMZ() < positions[v - 1]);
874 
875  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
876  {
877  continue;
878  }
879 
880  c_left_iter2 = candidate.begin() + start_index - 1;
881  c_right_iter2 = c_left_iter2 + 1;
882 
883  c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
884 
885  if (v == (int)(ceil(end / 2.)))
886  {
887  l_score = c_score;
888  mid_val = c_val;
889  }
890 
891  if (p_h_ind % 2 == 1) //I.e. a whole
892  {
893  c_score -= c_val;
894 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
895  if (trunc(seed_mz) == 874)
896  std::cout << -c_val << std::endl;
897 #endif
898  }
899  else
900  {
901  c_score += c_val;
902 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
903  if (trunc(seed_mz) == 874)
904  std::cout << c_val << std::endl;
905 #endif
906  }
907 
908 
909  start_index = distance(candidate.begin(), c_left_iter2);
910  }
911 
912 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
913  std::ofstream ofile_score("scores.dat", ios::app);
914  std::ofstream ofile_check_score("check_scores.dat", ios::app);
915  ofile_score.close();
916  ofile_check_score.close();
917 #endif
918 
919 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
920  if (trunc(seed_mz) == 874)
921  std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
922 #endif
923 
924  if (c_score - mid_val <= 0)
925  {
926  return 0;
927  }
928 
929  if (c_score - mid_val <= ampl_cutoff)
930  {
931  return -1000;
932  }
933 
934  if (l_score <= 0 || c_score - l_score - mid_val <= 0)
935  {
936  return 0;
937  }
938 
939  return c_score;
940  }
941 
942  template <typename PeakType>
944  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
945  {
946  double c_score = 0, c_val;
947  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
948  Int signal_size((Int)candidate.size());
949 
950  //p_h_ind indicates if we are looking for a whole or a peak
951  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
952 
953  std::vector<double> positions(end);
954  for (Int i = 0; i < end; ++i)
955  {
956  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
957  }
958 
959  double l_score = 0, mid_val = 0;
960  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
961  for (Int v = 1; v <= end; ++v, ++p_h_ind)
962  {
963  do
964  {
965  if (start_index < signal_size - 1)
966  ++start_index;
967  else
968  break;
969  }
970  while (candidate.getMZ(start_index) < positions[v - 1]);
971 
972  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
973  {
974  continue;
975  }
976 
977  c_left_iter2 = candidate.begin() + start_index - 1;
978  c_right_iter2 = c_left_iter2 + 1;
979 
980  c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
981  if (v == (int)(ceil(end / 2.)))
982  {
983  l_score = c_score;
984  mid_val = c_val;
985  }
986 
987  if (p_h_ind % 2 == 1) //I.e. a whole
988  {
989  c_score -= c_val;
990  }
991  else
992  {
993  c_score += c_val;
994  }
995 
996  start_index = distance(candidate.begin(), c_left_iter2);
997  }
998 
999 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1000  std::ofstream ofile_score("scores.dat", ios::app);
1001  std::ofstream ofile_check_score("check_scores.dat", ios::app);
1002  ofile_score << c_check_point << "\t" << c_score << std::endl;
1003  ofile_score.close();
1004  ofile_check_score.close();
1005 #endif
1006 
1007  if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1008  {
1009  return 0;
1010  }
1011 
1012  return c_score;
1013  }
1014 
1015  template <typename PeakType>
1017  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1018  {
1019  typename std::multimap<double, Box>::iterator iter;
1020  typename Box::iterator box_iter;
1021  std::vector<BoxElement> final_box;
1022  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1023  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1024 
1025  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1026  {
1027 
1028  Box& c_box = iter->second;
1029  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1030  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1031 
1032  //Now, let's get the RT boundaries for the box
1033  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1034  {
1035  if (box_iter->second.score == 0) //virtual helping point
1036  {
1037  if (count != 0)
1038  continue; //it is in any way not pure virtual
1039 
1040  c_mz = box_iter->second.mz;
1041  virtual_av_intens += box_iter->second.intens;
1042  virtual_av_abs_intens += fabs(box_iter->second.intens);
1043  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1044  ++virtual_count;
1045  }
1046  else
1047  {
1048  c_mz = box_iter->second.mz;
1049  av_score += box_iter->second.score;
1050  av_intens += box_iter->second.intens;
1051  av_abs_intens += fabs(box_iter->second.intens);
1052  av_mz += c_mz * fabs(box_iter->second.intens);
1053  ++count;
1054  }
1055  }
1056 
1057  if (count == 0) //pure virtual helping box
1058  {
1059  av_intens = virtual_av_intens / virtual_count;
1060  av_score = 0;
1061  av_mz = virtual_av_mz / virtual_av_abs_intens;
1062  }
1063  else
1064  {
1065  av_intens /= count;
1066  av_score /= count;
1067  av_mz /= av_abs_intens;
1068  }
1069 
1070  BoxElement c_box_element;
1071  c_box_element.mz = av_mz;
1072  c_box_element.c = c;
1073  c_box_element.score = av_score;
1074  c_box_element.intens = av_intens;
1075 
1076  c_box_element.RT = c_box.begin()->second.RT;
1077  final_box.push_back(c_box_element);
1078  }
1079 
1080  Size num_o_feature = final_box.size();
1081  if (num_o_feature == 0)
1082  {
1083  tmp_boxes_->at(c).clear();
1084  return;
1085  }
1086 
1087  //Computing the derivatives
1088  std::vector<double> bwd_diffs(num_o_feature, 0);
1089 
1090  bwd_diffs[0] = 0;
1091  for (Size i = 1; i < num_o_feature; ++i)
1092  {
1093  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1094  }
1095 
1096 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1097  std::ofstream ofile_bwd("bwd_cpu.dat");
1098  for (Size i = 0; i < num_o_feature; ++i)
1099  {
1100  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1101  }
1102  ofile_bwd.close();
1103 #endif
1104 
1105 
1106  for (Size i = 0; i < num_o_feature - 1; ++i)
1107  {
1108  while (i < num_o_feature - 2)
1109  {
1110  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1111  break;
1112  ++i;
1113  }
1114 
1115  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1116  {
1117  checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1118  continue;
1119  }
1120  }
1121 
1122  tmp_boxes_->at(c).clear();
1123  }
1124 
1125  template <typename PeakType>
1127  {
1128  double av_intens = 0;
1129  for (UInt i = 0; i < scan.size(); ++i)
1130  {
1131  if (scan[i].getIntensity() >= 0)
1132  {
1133  av_intens += scan[i].getIntensity();
1134  }
1135  }
1136  return av_intens / (double)scan.size();
1137  }
1138 
1139  template <typename PeakType>
1141  {
1142  double res = 0, intens;
1143  for (UInt i = 0; i < scan.size(); ++i)
1144  {
1145  if (scan[i].getIntensity() >= 0)
1146  {
1147  intens = scan[i].getIntensity();
1148  res += (intens - mean) * (intens - mean);
1149  }
1150  }
1151  return sqrt(res / (double)(scan.size() - 1));
1152  }
1153 
1154  template <typename PeakType>
1155  double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1156  {
1157  std::vector<double> diffs(scan.size() - 1, 0);
1158  for (UInt i = 0; i < scan.size() - 1; ++i)
1159  {
1160  diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1161  }
1162 
1163  sort(diffs.begin(), diffs.end());
1164  double av_MZ_spacing = 0;
1165  for (UInt i = 0; i < diffs.size() / 2; ++i)
1166  {
1167  av_MZ_spacing += diffs[i];
1168  }
1169 
1170  return av_MZ_spacing / (diffs.size() / 2);
1171  }
1172 
1173  template <typename PeakType>
1175  {
1176  double av_intens = 0;
1177  for (UInt i = 0; i < scan.size(); ++i)
1178  {
1179  if (scan.getTransIntensity(i) >= 0)
1180  {
1181  av_intens += scan.getTransIntensity(i);
1182  }
1183  }
1184  return av_intens / (double)scan.size();
1185  }
1186 
1187  template <typename PeakType>
1189  {
1190  double res = 0, intens;
1191  for (UInt i = 0; i < scan.size(); ++i)
1192  {
1193  if (scan.getTransIntensity(i) >= 0)
1194  {
1195  intens = scan.getTransIntensity(i);
1196  res += (intens - mean) * (intens - mean);
1197  }
1198  }
1199  return sqrt(res / (double)(scan.size() - 1));
1200  }
1201 
1202  template <typename PeakType>
1203  void IsotopeWaveletTransform<PeakType>::push2Box_(const double mz, const UInt scan, UInt c,
1204  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1205  {
1206  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1207 
1208  typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1209  typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1210 
1211  if (lower_iter != open_boxes_.end())
1212  {
1213  //Ugly, but necessary due to the implementation of STL lower_bound
1214  if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1215  {
1216  --lower_iter;
1217  }
1218  }
1219 
1220  typename std::multimap<double, Box>::iterator insert_iter;
1221  bool create_new_box = true;
1222  if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1223  {
1224  //There is another special case to be considered here:
1225  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1226  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1227  if (!open_boxes_.empty())
1228  {
1229  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1230  {
1231  create_new_box = false;
1232  insert_iter = lower_iter;
1233  }
1234  }
1235  else
1236  {
1237  create_new_box = true;
1238  }
1239  }
1240  else
1241  {
1242  if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1243  {
1244  insert_iter = lower_iter;
1245  create_new_box = false;
1246  }
1247  else
1248  {
1249  create_new_box = true;
1250  }
1251  }
1252 
1253 
1254  if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1255  {
1256  //Here is the question if you should figure out the smallest charge .... and then
1257 
1258  //Figure out which entry is closer to m/z
1259  double dist_lower = fabs(lower_iter->first - mz);
1260  double dist_upper = fabs(upper_iter->first - mz);
1261  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1262  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1263 
1264  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1265  {
1266  create_new_box = true;
1267  }
1268  else
1269  {
1270  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1271  create_new_box = false;
1272  }
1273  }
1274 
1275  BoxElement element;
1276  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1277  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1278 
1279 
1280  if (create_new_box == false)
1281  {
1282  std::pair<UInt, BoxElement> help2(scan, element);
1283  insert_iter->second.insert(help2);
1284 
1285  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1286  Box replacement(insert_iter->second);
1287 
1288  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1289  //Also note that we already inserted the new entry, leading to size-1.
1290  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1291  c_mz /= ((double) insert_iter->second.size());
1292 
1293  //Now let's remove the old and insert the new one
1294  open_boxes_.erase(insert_iter);
1295  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1296  open_boxes_.insert(help3);
1297  }
1298  else
1299  {
1300  std::pair<UInt, BoxElement> help2(scan, element);
1301  std::multimap<UInt, BoxElement> help3;
1302  help3.insert(help2);
1303  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1304  open_boxes_.insert(help4);
1305  }
1306  }
1307 
1308  template <typename PeakType>
1309  void IsotopeWaveletTransform<PeakType>::push2TmpBox_(const double mz, const UInt scan, UInt c,
1310  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1311  {
1312  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1313 
1314  std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1315  typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1316  typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1317 
1318  if (lower_iter != tmp_box.end())
1319  {
1320  //Ugly, but necessary due to the implementation of STL lower_bound
1321  if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1322  {
1323  --lower_iter;
1324  }
1325  }
1326 
1327  typename std::multimap<double, Box>::iterator insert_iter;
1328  bool create_new_box = true;
1329  if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1330  {
1331  //There is another special case to be considered here:
1332  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1333  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1334  if (!tmp_box.empty())
1335  {
1336  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1337  {
1338  create_new_box = false;
1339  insert_iter = lower_iter;
1340  }
1341  }
1342  else
1343  {
1344  create_new_box = true;
1345  }
1346  }
1347  else
1348  {
1349  if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1350  {
1351  insert_iter = lower_iter;
1352  create_new_box = false;
1353  }
1354  else
1355  {
1356  create_new_box = true;
1357  }
1358  }
1359 
1360 
1361  if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1362  {
1363  //Figure out which entry is closer to m/z
1364  double dist_lower = fabs(lower_iter->first - mz);
1365  double dist_upper = fabs(upper_iter->first - mz);
1366  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1367  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1368 
1369  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1370  {
1371  create_new_box = true;
1372  }
1373  else
1374  {
1375  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1376  create_new_box = false;
1377  }
1378  }
1379 
1380  BoxElement element;
1381  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1382  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1383 
1384  if (create_new_box == false)
1385  {
1386  std::pair<UInt, BoxElement> help2(scan, element);
1387  insert_iter->second.insert(help2);
1388 
1389  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1390  Box replacement(insert_iter->second);
1391 
1392  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1393  //Also note that we already inserted the new entry, leading to size-1.
1394  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1395  c_mz /= ((double) insert_iter->second.size());
1396 
1397  //Now let's remove the old and insert the new one
1398  tmp_box.erase(insert_iter);
1399  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1400  tmp_box.insert(help3);
1401  }
1402  else
1403  {
1404  std::pair<UInt, BoxElement> help2(scan, element);
1405  std::multimap<UInt, BoxElement> help3;
1406  help3.insert(help2);
1407 
1408  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1409  tmp_box.insert(help4);
1410  }
1411  }
1412 
1413  template <typename PeakType>
1414  void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1415  const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1416  {
1417  typename std::multimap<double, Box>::iterator iter, iter2;
1418 
1419  if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1420  {
1421  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1422  {
1423 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1424  std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1425  typename Box::iterator dings;
1426  for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1427  std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1428 #endif
1429  end_boxes_.insert(*iter);
1430  }
1431  open_boxes_.clear();
1432  return;
1433  }
1434 
1435  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1436  {
1437 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1438  if (front_bound > 0)
1439  {
1440  std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1441  }
1442 #endif
1443 
1444  //For each Box we need to figure out, if and when the last RT value has been inserted
1445  UInt lastScan = (--(iter->second.end()))->first;
1446  if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1447  {
1448  if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1449  {
1450  iter2 = iter;
1451  ++iter2;
1452 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1453  std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1454 #endif
1455  front_boxes_.insert(*iter);
1456  open_boxes_.erase(iter);
1457  iter = iter2;
1458  continue;
1459  }
1460 
1461  iter2 = iter;
1462  ++iter2;
1463  //Please do **NOT** simplify the upcoming lines.
1464  //The 'obvious' overhead is necessary since the object represented by iter might be erased
1465  //by push2Box which might be called by extendBox_.
1466  if (iter->second.size() >= RT_votes_cutoff)
1467  {
1468  //extendBox_ (map, iter->second);
1469  iter = iter2;
1470  closed_boxes_.insert(*(--iter));
1471  }
1472  open_boxes_.erase(iter);
1473  iter = iter2;
1474  }
1475  else
1476  {
1477  ++iter;
1478  }
1479  }
1480  }
1481 
1482  template <typename PeakType>
1484  {
1485 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1486  std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1487 #endif
1488 
1489  //Determining the elution profile
1490  typename Box::const_iterator iter;
1491  std::vector<double> elution_profile(box.size());
1492  UInt index = 0;
1493  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1494  {
1495  for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1496  {
1497  elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1498  }
1499  elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1500  }
1501 
1502  double max = 0;
1503  Int max_index = INT_MIN;
1504  for (Size i = 0; i < elution_profile.size(); ++i)
1505  {
1506  if (elution_profile[i] > max)
1507  {
1508  max_index = i;
1509  max = elution_profile[i];
1510  }
1511  }
1512 
1513  Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1514 
1515  double av_elution = 0;
1516  for (Size i = 0; i < elution_profile.size(); ++i)
1517  {
1518  av_elution += elution_profile[i];
1519  }
1520  av_elution /= (double)elution_profile.size();
1521 
1522  double sd_elution = 0;
1523  for (Size i = 0; i < elution_profile.size(); ++i)
1524  {
1525  sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1526  }
1527  sd_elution /= (double)(elution_profile.size() - 1);
1528  sd_elution = sqrt(sd_elution);
1529 
1530  //Determine average m/z monoisotopic pos
1531  double av_mz = 0;
1532  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1533  {
1534  av_mz += iter->second.mz;
1535 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1536  std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1537 #endif
1538  }
1539  av_mz /= (double)box.size();
1540 
1541 
1542  //Boundary check
1543  if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1544  {
1545  return;
1546  }
1547 
1548  UInt pre_index = box.begin()->second.RT_index - 1;
1549  typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1550  double pre_elution = 0;
1551 
1552  double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1553  double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1554 
1555  typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1556  for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1557  {
1558  pre_elution += mz_iter->getIntensity();
1559  }
1560 
1561 
1562  //Do we need to extend at all?
1563  if (pre_elution <= av_elution - 2 * sd_elution)
1564  {
1565  return;
1566  }
1567 
1568  Int first_index = box.begin()->second.RT_index;
1569  for (Int i = 1; i < max_extension; ++i)
1570  {
1571  Int c_index = first_index - i;
1572  if (c_index < 0)
1573  {
1574  break;
1575  }
1576 
1577  //CHECK Majority vote for charge???????????????
1578 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1579  std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1580 #endif
1581 
1582  push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1583  map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1584  }
1585  }
1586 
1587  template <typename PeakType>
1589  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1590  {
1591  typename std::multimap<double, Box>::iterator iter;
1592  typename Box::iterator box_iter;
1593  std::vector<BoxElement> final_box;
1594  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1595  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1596 
1597  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1598  {
1599  Box& c_box = iter->second;
1600  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1601  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1602 
1603  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1604  {
1605  if (box_iter->second.score == 0) //virtual helping point
1606  {
1607  if (count != 0)
1608  continue; //it is in any way not pure virtual
1609 
1610  c_mz = box_iter->second.mz;
1611  virtual_av_intens += box_iter->second.intens;
1612  virtual_av_abs_intens += fabs(box_iter->second.intens);
1613  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1614  ++virtual_count;
1615  }
1616  else
1617  {
1618  c_mz = box_iter->second.mz;
1619  av_score += box_iter->second.score;
1620  av_intens += box_iter->second.intens;
1621  av_abs_intens += fabs(box_iter->second.intens);
1622  av_mz += c_mz * fabs(box_iter->second.intens);
1623  ++count;
1624  }
1625  }
1626 
1627  if (count == 0) //pure virtual helping box
1628  {
1629  av_intens = virtual_av_intens / virtual_count;
1630  av_score = 0;
1631  av_mz = virtual_av_mz / virtual_av_abs_intens;
1632  }
1633  else
1634  {
1635  av_intens /= count;
1636  av_score /= count;
1637  av_mz /= av_abs_intens;
1638  }
1639 
1640  BoxElement c_box_element;
1641  c_box_element.mz = av_mz;
1642  c_box_element.c = c;
1643  c_box_element.score = av_score;
1644  c_box_element.intens = av_intens;
1645 
1646  c_box_element.RT = c_box.begin()->second.RT;
1647 
1648  final_box.push_back(c_box_element);
1649  }
1650 
1651  UInt num_o_feature = final_box.size();
1652  if (num_o_feature == 0)
1653  {
1654  tmp_boxes_->at(c).clear();
1655  return;
1656  }
1657 
1658  //Computing the derivatives
1659  std::vector<double> bwd_diffs(num_o_feature, 0);
1660 
1661  bwd_diffs[0] = 0;
1662  for (UInt i = 1; i < num_o_feature; ++i)
1663  {
1664  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1665  }
1666 
1667 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1668  std::ofstream ofile_bwd("bwd_gpu.dat");
1669  for (UInt i = 0; i < num_o_feature; ++i)
1670  {
1671  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1672  }
1673  ofile_bwd.close();
1674 #endif
1675 
1676  for (UInt i = 0; i < num_o_feature - 1; ++i)
1677  {
1678  while (i < num_o_feature - 2)
1679  {
1680  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1681  break;
1682  ++i;
1683  }
1684 
1685  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1686  {
1687  checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1688  continue;
1689  }
1690  }
1691 
1692  tmp_boxes_->at(c).clear();
1693  }
1694 
1695  template <typename PeakType>
1697  {
1698  FeatureMap feature_map;
1699  typename std::multimap<double, Box>::iterator iter;
1700  typename Box::iterator box_iter;
1701  UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1702  double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1703 
1704  for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1705  {
1706  sum_of_ref_intenses_g = 0;
1707  Box& c_box = iter->second;
1708  std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1709  bool restart = false;
1710 
1711  //Let's first determine the charge
1712  //Therefore, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1713  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1714  {
1715 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1716  if (trunc(box_iter->second.mz) == 874)
1717  std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1718 #endif
1719 
1720  if (box_iter->second.score == -1000)
1721  {
1722  restart = true;
1723  break;
1724  }
1725 
1726  charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1727  ++charge_binary_votes[box_iter->second.c];
1728  }
1729 
1730  if (restart)
1731  {
1732  continue;
1733  }
1734 
1735  //... determining the best fitting charge
1736  best_charge_index = 0; best_charge_score = 0;
1737  for (UInt i = 0; i < max_charge_; ++i)
1738  {
1739  if (charge_votes[i] > best_charge_score)
1740  {
1741  best_charge_index = i;
1742  best_charge_score = charge_votes[i];
1743  }
1744  }
1745 
1746  //Pattern found in too few RT scan
1747  if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1748  {
1749  continue;
1750  }
1751 
1752  c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1753 
1754  av_intens = 0; av_ref_intens = 0; av_score = 0; av_mz = 0; av_RT = 0;
1755  //Now, let's get the RT boundaries for the box
1756  std::vector<DPosition<2> > point_set;
1757  double sum_of_ref_intenses_l;
1758  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1759  {
1760  sum_of_ref_intenses_l = 0;
1761  c_mz = box_iter->second.mz;
1762  c_RT = box_iter->second.RT;
1763 
1764  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1765 
1766  point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1767  //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1768  point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1769 
1770 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1771  std::cout << "Intenstype: " << intenstype_ << std::endl;
1772 #endif
1773  if (intenstype_ == "ref")
1774  {
1775  //Find monoisotopic max
1776  const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1777  //'Correct' possible shift
1778  for (unsigned int i = 0; i < mz_cutoff; ++i)
1779  {
1780  typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1781 
1782  hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1783 
1784  while (h_iter != c_spec.begin())
1785  {
1786 
1787 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1788  if (trunc(c_mz) == 874)
1789  {
1790  std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1791  }
1792 #endif
1793 
1794  --h_iter;
1795  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1796  {
1797  hc_iter = h_iter;
1798  }
1799 
1800  if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1801  {
1802  break;
1803  }
1804  }
1805 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1806  if (trunc(c_mz) == 874)
1807  {
1808  std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1809  }
1810 #endif
1811  sum_of_ref_intenses_l += hc_iter->getIntensity();
1812 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1813  if (trunc(c_mz) == 874)
1814  {
1815  std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1816  }
1817 #endif
1818  }
1819  }
1820 
1821  if (best_charge_index == box_iter->second.c)
1822  {
1823  av_score += box_iter->second.score;
1824  av_intens += box_iter->second.intens;
1825  av_ref_intens += box_iter->second.ref_intens;
1826  sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1827  av_mz += c_mz * box_iter->second.intens;
1828  }
1829  av_RT += c_RT;
1830  }
1831 
1832  av_mz /= av_intens;
1833  av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1834  av_score /= (double)charge_binary_votes[best_charge_index];
1835  av_RT /= (double)c_box.size();
1836 
1837 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1838  if (trunc(av_mz) == 874)
1839  std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1840 #endif
1841 
1842  Feature c_feature;
1843  ConvexHull2D c_conv_hull;
1844  c_conv_hull.addPoints(point_set);
1845  c_feature.setCharge(c_charge);
1846  c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1847 
1848  //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1849  if (intenstype_ == "corrected")
1850  {
1851  double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1852  av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0.0, 2.0 * lambda);
1853  }
1854  if (intenstype_ == "ref")
1855  {
1856 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1857  if (trunc(c_mz) == 874)
1858  {
1859  std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1860  }
1861 #endif
1862 
1863  av_intens = sum_of_ref_intenses_g;
1864  }
1865 
1866  c_feature.setMZ(av_mz);
1867  c_feature.setIntensity(av_intens);
1868  c_feature.setRT(av_RT);
1869  c_feature.setOverallQuality(av_score);
1870  feature_map.push_back(c_feature);
1871  }
1872 
1873  return feature_map;
1874  }
1875 
1876  template <typename PeakType>
1878  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1879  {
1880  typename MSSpectrum::const_iterator iter, ref_iter;
1881  UInt peak_cutoff;
1882  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1883 
1884  iter = candidate.MZBegin(seed_mz);
1885  //we can ignore those cases
1886  if (iter == candidate.begin() || iter == candidate.end())
1887  {
1888  return false;
1889  }
1890 
1891  std::pair<double, double> reals;
1892  ref_iter = ref.MZBegin(seed_mz);
1893  //Correct the position
1894  double real_mz, real_intens;
1895  if (check_PPMs)
1896  {
1897  //reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1898  //real_mz = reals.first; real_intens = reals.second;
1899  //if (real_mz <= 0 || real_intens <= 0)
1900  //{
1901  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1902  while (h_iter != ref.begin())
1903  {
1904  --h_iter;
1905  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1906  {
1907  hc_iter = h_iter;
1908  }
1909  else
1910  {
1911  break;
1912  }
1913 
1914  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1915  {
1916  return false;
1917  }
1918  }
1919  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1920  real_mz = reals.first; real_intens = reals.second;
1921  if (real_mz <= 0 || real_intens <= 0)
1922  {
1923  return false;
1924  }
1925  real_mz = h_iter->getMZ();
1926  real_intens = h_iter->getIntensity();
1927  //}
1928  }
1929  else
1930  {
1931  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
1932  real_mz = reals.first; real_intens = reals.second;
1933 
1934  if (real_mz <= 0 || real_intens <= 0)
1935  {
1936  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1937  while (h_iter != ref.begin())
1938  {
1939  --h_iter;
1940  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1941  {
1942  hc_iter = h_iter;
1943  }
1944  else
1945  {
1946  break;
1947  }
1948 
1949  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1950  {
1951  return false;
1952  }
1953  }
1954  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
1955  if (real_mz <= 0 || real_intens <= 0)
1956  {
1957  return false;
1958  }
1959  real_mz = h_iter->getMZ();
1960  real_intens = h_iter->getIntensity();
1961  }
1962  }
1963 
1964  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
1965 
1966  if (c_score <= 0)
1967  {
1968  return false;
1969  }
1970 
1971  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
1972  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
1973  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
1974  if (real_r_MZ_iter == ref.end())
1975  {
1976  --real_r_MZ_iter;
1977  }
1978 
1979 
1980  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
1981  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
1982 
1983  if (prev_score == -1000)
1984  {
1985  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
1986  }
1987  else
1988  {
1989  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
1990  }
1991  return true;
1992  }
1993 
1994  template <typename PeakType>
1996  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1997  {
1998  typename MSSpectrum::const_iterator iter, ref_iter;
1999  UInt peak_cutoff;
2000  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2001 
2002  iter = candidate.MZBegin(seed_mz);
2003  //we can ignore those cases
2004  if (iter == candidate.begin() || iter == candidate.end())
2005  {
2006  return false;
2007  }
2008 
2009  std::pair<double, double> reals;
2010  ref_iter = ref.MZBegin(seed_mz);
2011  //Correct the position
2012  double real_mz, real_intens;
2013  if (check_PPMs)
2014  {
2015  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2016  real_mz = reals.first; real_intens = reals.second;
2017  //if (real_mz <= 0 || real_intens <= 0)
2018  //{
2019  auto h_iter = ref_iter, hc_iter = ref_iter;
2020  while (h_iter != ref.begin())
2021  {
2022  --h_iter;
2023  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2024  {
2025  hc_iter = h_iter;
2026  }
2027  else
2028  {
2029  break;
2030  }
2031 
2032  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2033  {
2034  return false;
2035  }
2036  }
2037  ++h_iter;
2038  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2039  real_mz = reals.first; real_intens = reals.second;
2040 
2041 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2042  std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2043 #endif
2044 
2045  if (real_mz <= 0 || real_intens <= 0)
2046  {
2047  return false;
2048  }
2049  real_mz = h_iter->getMZ();
2050  real_intens = h_iter->getIntensity();
2051  //}
2052  }
2053  else
2054  {
2055  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2056  real_mz = reals.first; real_intens = reals.second;
2057 
2058  if (real_mz <= 0 || real_intens <= 0)
2059  {
2060  auto h_iter = ref_iter, hc_iter = ref_iter;
2061  while (h_iter != ref.begin())
2062  {
2063  --h_iter;
2064  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2065  {
2066  hc_iter = h_iter;
2067  }
2068  else
2069  {
2070  break;
2071  }
2072 
2073  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2074  {
2075  return false;
2076  }
2077  }
2078  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
2079  if (real_mz <= 0 || real_intens <= 0)
2080  {
2081  return false;
2082  }
2083  real_mz = h_iter->getMZ();
2084  real_intens = h_iter->getIntensity();
2085  }
2086  }
2087 
2088  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2089 
2090  if (c_score <= 0)
2091  {
2092  return false;
2093  }
2094 
2095  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2096  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2097  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2098  if (real_r_MZ_iter == ref.end())
2099  {
2100  --real_r_MZ_iter;
2101  }
2102 
2103 
2104  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2105  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2106 
2107  if (prev_score == -1000)
2108  {
2109  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2110  }
2111  else
2112  {
2113  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2114  }
2115 
2116  return true;
2117  }
2118 
2119  template <typename PeakType>
2120  std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2121  {
2122  double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2123  double ppms = getPPMs_(peptideMassRule_(mass), mass);
2125  {
2126 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2127  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2128 #endif
2129  return std::pair<double, double>(-1, -1);
2130  }
2131 
2132 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2133  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2134 #endif
2135  return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2136  }
2137 
2138 } //namespace
2139 
2140 #pragma clang diagnostic pop
2141 
2142 
void setCharge(const ChargeType &ch)
Set charge state.
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:196
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:44
Iterator begin()
See std::vector documentation.
Definition: ConstRefVector.h:371
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:693
Iterator end()
See std::vector documentation.
Definition: ConstRefVector.h:377
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling.
Definition: ConvexHull2D.h:47
void addPoints(const PointArrayType &points)
void push_back(const VectorElement &f)
Definition: ExposedVector.h:161
A container for features.
Definition: FeatureMap.h:80
An LC-MS feature.
Definition: Feature.h:46
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
void setOverallQuality(QualityType q)
Set the overall quality.
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:71
double getRT() const
Definition: IsotopeWaveletTransform.h:104
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:146
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:83
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:116
TransSpectrum()
Definition: IsotopeWaveletTransform.h:77
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:167
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:110
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:90
std::vector< float > * trans_intens_
The intensities of the transform.
Definition: IsotopeWaveletTransform.h:182
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:128
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:140
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:160
Size size() const
Definition: IsotopeWaveletTransform.h:134
virtual void destroy()
Definition: IsotopeWaveletTransform.h:95
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:122
const MSSpectrum * reference_
The reference spectrum.
Definition: IsotopeWaveletTransform.h:181
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:153
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:174
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:46
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1588
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:533
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:459
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:456
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:467
UInt data_length_
Definition: IsotopeWaveletTransform.h:460
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one.
Definition: IsotopeWaveletTransform.h:1203
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:456
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:446
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:452
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:456
UInt max_charge_
Definition: IsotopeWaveletTransform.h:460
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:463
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2120
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:293
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:452
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score.
Definition: IsotopeWaveletTransform.h:943
double score
The associated score.
Definition: IsotopeWaveletTransform.h:55
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:495
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:456
double mz
The monoisotopic position.
Definition: IsotopeWaveletTransform.h:53
double sigma_
Definition: IsotopeWaveletTransform.h:455
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1414
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:467
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:452
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:453
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:464
UInt MZ_begin
Index.
Definition: IsotopeWaveletTransform.h:60
double ref_intens
Definition: IsotopeWaveletTransform.h:57
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:457
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:271
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:252
UInt c
Note, this is not the charge (it is charge-1!!!)
Definition: IsotopeWaveletTransform.h:54
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1188
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:455
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:539
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass.
Definition: IsotopeWaveletTransform.h:423
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:463
String intenstype_
Definition: IsotopeWaveletTransform.h:462
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:457
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:466
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1309
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1174
double RT
The elution time (not the scan index)
Definition: IsotopeWaveletTransform.h:58
void extendBox_(const PeakMap &map, const Box &box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1483
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1696
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:672
double getSigma() const
Definition: IsotopeWaveletTransform.h:276
UInt RT_index
The elution time (map) index.
Definition: IsotopeWaveletTransform.h:59
double intens
The transformed intensity at the monoisotopic mass.
Definition: IsotopeWaveletTransform.h:56
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:452
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:260
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:620
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1155
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:582
UInt MZ_end
Index.
Definition: IsotopeWaveletTransform.h:61
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:281
double min_spacing_
Definition: IsotopeWaveletTransform.h:466
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:1995
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:460
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:288
bool hr_data_
Definition: IsotopeWaveletTransform.h:461
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:662
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:64
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:456
Internally used data structure.
Definition: IsotopeWaveletTransform.h:52
static UInt getNumPeakCutOff(const double mass, const UInt z)
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually,...
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
Iterator begin()
Definition: MSExperiment.h:156
Size size() const
The number of spectra.
Definition: MSExperiment.h:121
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
double getRT() const
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:29
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:172
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:178
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:190
IntensityType getIntensity() const
Definition: Peak2D.h:142
void setIntensity(IntensityType intensity)
Sets data point intensity (height)
Definition: Peak2D.h:148
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:76
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:108
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:94
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:42
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:23
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:28
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:17
const double c
Definition: Constants.h:188
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:25
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:24
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:29
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:30
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:471
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:483
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:489
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:477