OpenMS  2.7.0
IsotopeWaveletTransform.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
46 #include <cmath>
47 #include <cmath>
48 #include <boost/math/special_functions/bessel.hpp>
49 #include <vector>
50 #include <map>
51 #include <sstream>
52 #include <fstream>
53 #include <iomanip>
54 
55 // This code has quite a few strange things in it triggering warnings which
56 // clutters the rest of the diagnostics
57 #pragma clang diagnostic push
58 #pragma clang diagnostic ignored "-Wfloat-equal"
59 #pragma clang diagnostic ignored "-Wconversion"
60 #pragma clang diagnostic ignored "-Wshorten-64-to-32"
61 
62 namespace OpenMS
63 {
64 
68  template <typename PeakType>
70  {
71 public:
72 
73 
75  struct BoxElement
76  {
77  double mz;
78  UInt c;
79  double score;
80  double intens;
81  double ref_intens;
82  double RT;
86  };
87 
88  typedef std::multimap<UInt, BoxElement> Box;
89 
90 
95  {
97 
98 public:
99 
102  reference_(nullptr), trans_intens_(nullptr)
103  {
104  }
105 
107  TransSpectrum(const MSSpectrum* reference) :
108  reference_(reference)
109  {
110  trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
111  }
112 
114  virtual ~TransSpectrum()
115  {
116  delete (trans_intens_);
117  }
118 
119  virtual void destroy()
120  {
121  delete (trans_intens_);
122  trans_intens_ = NULL;
123  delete (reference_);
124  reference_ = NULL;
125  }
126 
128  inline double getRT() const
129  {
130  return reference_->getRT();
131  }
132 
134  inline double getMZ(const UInt i) const
135  {
136  return (*reference_)[i].getMZ();
137  }
138 
140  inline double getRefIntensity(const UInt i) const
141  {
142  return (*reference_)[i].getIntensity();
143  }
144 
146  inline double getTransIntensity(const UInt i) const
147  {
148  return (*trans_intens_)[i];
149  }
150 
152  inline void setTransIntensity(const UInt i, const double intens)
153  {
154  (*trans_intens_)[i] = intens;
155  }
156 
158  inline Size size() const
159  {
160  return trans_intens_->size();
161  }
162 
164  inline const MSSpectrum* getRefSpectrum()
165  {
166  return reference_;
167  }
168 
170  inline const MSSpectrum* getRefSpectrum() const
171  {
172  return reference_;
173  }
174 
177  inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
178  {
179  return reference_->MZBegin(mz);
180  }
181 
184  inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
185  {
186  return reference_->MZEnd(mz);
187  }
188 
191  inline typename MSSpectrum::const_iterator end() const
192  {
193  return reference_->end();
194  }
195 
198  inline typename MSSpectrum::const_iterator begin() const
199  {
200  return reference_->begin();
201  }
202 
203 protected:
204 
206  std::vector<float>* trans_intens_;
207 
208  };
209 
210 
211 
217  IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
218 
220  virtual ~IsotopeWaveletTransform();
221 
222 
227  virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
228 
233  virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
234 
254  virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
255  const double ampl_cutoff, const bool check_PPMs);
256 
257  virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
258 
265  void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
266  const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
267 
268 
273  FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
274 
276  virtual std::multimap<double, Box> getClosedBoxes()
277  { return closed_boxes_; }
278 
279 
284  inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
285  {
286  return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
287  }
288 
295  inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
296  {
297  return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
298  }
299 
300  inline double getSigma() const
301  {
302  return sigma_;
303  }
304 
305  inline void setSigma(const double sigma)
306  {
307  sigma_ = sigma;
308  }
309 
310  virtual void computeMinSpacing(const MSSpectrum& c_ref);
311 
312  inline double getMinSpacing() const
313  {
314  return min_spacing_;
315  }
316 
317  inline Size getMaxScanSize() const
318  {
319  return max_scan_size_;
320  }
321 
322 protected:
323 
324 
328 
329 
330  inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
331 
332 
339  virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
340  const double seed_mz, const UInt c, const double ampl_cutoff);
341 
348  virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
349  const double seed_mz, const UInt c, const double ampl_cutoff);
350 
351 
359  virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
360  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
361 
369  virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
370  const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
371 
372  virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
373 
374 
376  inline double getAvIntens_(const TransSpectrum& scan);
378  inline double getAvIntens_(const MSSpectrum& scan);
379 
381  inline double getSdIntens_(const TransSpectrum& scan, const double mean);
383  inline double getSdIntens_(const MSSpectrum& scan, const double mean);
384 
395  virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
396  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
397 
413  virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
414  const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
415 
421  inline double getAvMZSpacing_(const MSSpectrum& scan);
422 
423 
428  void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
429  const UInt scan_index, const UInt c, const bool check_PPMs);
430 
435  virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
436  const UInt scan_index, const UInt c, const bool check_PPMs);
437 
438 
443  void extendBox_(const PeakMap& map, const Box& box);
444 
447  inline double peptideMassRule_(const double c_mass) const
448  {
449  double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
450  double old_frac_mass = c_mass - (Int)(c_mass);
451  double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
452  double new_frac_mass = new_mass - (Int)(new_mass);
453 
454  if (new_frac_mass - old_frac_mass > 0.5)
455  {
456  new_mass -= 1.;
457  }
458 
459  if (new_frac_mass - old_frac_mass < -0.5)
460  {
461  new_mass += 1.;
462  }
463 
464  return new_mass;
465  }
466 
470  inline double getPPMs_(const double mass_a, const double mass_b) const
471  {
472  return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
473  }
474 
475  //internally used data structures for the sweep line algorithm
476  std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
477  std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
478 
480  std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
481  std::vector<double> interpol_xs_, interpol_ys_;
482 
485  bool hr_data_;
488  std::vector<int> indices_;
489 
491  std::vector<float> scores_, zeros_;
492  };
493 
494  template <typename PeakType>
495  bool intensityComparator(const PeakType& a, const PeakType& b)
496  {
497  return a.getIntensity() > b.getIntensity();
498  }
499 
500  template <typename PeakType>
502  {
503  return a.getIntensity() < b.getIntensity();
504  }
505 
506  template <typename PeakType>
508  {
509  return a->getIntensity() > b->getIntensity();
510  }
511 
512  template <typename PeakType>
513  bool positionComparator(const PeakType& a, const PeakType& b)
514  {
515  return a.getMZ() < b.getMZ();
516  }
517 
518  template <typename PeakType>
520  {
521  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
522  av_MZ_spacing_ = 1;
523  max_scan_size_ = 0;
524  max_mz_cutoff_ = 3;
525  max_num_peaks_per_pattern_ = 3;
526  hr_data_ = false;
527  intenstype_ = "ref";
528  }
529 
530  template <typename PeakType>
531  IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
532  {
533  max_charge_ = max_charge;
534  max_scan_size_ = max_scan_size;
535  hr_data_ = hr_data;
536  intenstype_ = intenstype;
537  tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
538  if (max_scan_size <= 0) //only important for the CPU
539  {
540  IsotopeWavelet::init(max_mz, max_charge);
541  }
542 
543  av_MZ_spacing_ = 1;
544  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
545  max_num_peaks_per_pattern_ = IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
546 
547  Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
548  Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
549  psi_.reserve(to_reserve); //The wavelet
550  prod_.reserve(to_reserve);
551  xs_.reserve(to_reserve);
554  }
555 
556  template <typename PeakType>
558  {
559  delete (tmp_boxes_);
560  }
561 
562  template <typename PeakType>
564  {
565  Int spec_size((Int)c_ref.size());
566  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
567  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
568  UInt charge = c + 1;
569  double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
570 
571  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
572  {
573  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
574  old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
575  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
576  c_diff = 0;
577  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
578 
579  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
580  {
581  if (current_conv_pos >= spec_size)
582  {
583  value += 0.5 * old * min_spacing_;
584  break;
585  }
586 
587  c_mz = c_ref[current_conv_pos].getMZ();
588  c_diff = c_mz + origin;
589 
590  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
591  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
592 
593  value += 0.5 * (current + old) * (c_mz - old_pos);
594 
595  old = current;
596  old_pos = c_mz;
597  }
598 
599 
600 
601  c_trans[my_local_pos].setIntensity(value);
602  }
603  }
604 
605  template <typename PeakType>
607  {
608  Int spec_size((Int)c_ref.size());
609  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
610  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
611  UInt charge = c + 1;
612  double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
613 
614  for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
615  {
616  value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
617 
618 
619  my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
620  c_diff = 0;
621  origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
622 
623  for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
624  {
625  if (current_conv_pos >= spec_size)
626  {
627  break;
628  }
629 
630  c_mz = c_ref[current_conv_pos].getMZ();
631  c_diff = c_mz + origin;
632 
633  //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
634  current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
635 
636  value += current;
637  }
638 
639  c_trans[my_local_pos].setIntensity(value);
640  }
641  }
642 
643  template <typename PeakType>
645  {
646  data_length_ = (UInt) c_ref.size();
647  computeMinSpacing(c_ref);
648  Int wavelet_length = 0, quarter_length = 0;
649 
650  if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
651  {
652  UInt c_mz_cutoff;
653  typename MSSpectrum::const_iterator start_iter, end_iter;
654  for (UInt i = 0; i < data_length_; ++i)
655  {
656  c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
657  start_iter = c_ref.MZEnd(c_ref[i].getMZ());
658  end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
659  wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
660  end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
661  quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
662  }
663  }
664  else
665  {
666  //CHANGED
667  max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_ - 1].getMZ(), max_charge_);
668  wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
669  }
670  //... done
671 
672 
673  if (wavelet_length > (Int) c_ref.size())
674  {
675  std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
676  std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
677  std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
678  }
679 
680  Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS / min_spacing_);
681  from_max_to_left_ = max_index;
682  from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
683  }
684 
685  template <typename PeakType>
687  {
688  min_spacing_ = INT_MAX;
689  for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
690  {
691  min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
692  }
693  }
694 
695  template <typename PeakType>
697  const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
698  {
699  Size scan_size(candidates.size());
701  typename MSSpectrum::const_iterator iter_start, iter_end, seed_iter, iter2;
702  double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
703  UInt MZ_start, MZ_end;
704 
705  MSSpectrum diffed(candidates);
706  diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
707 
708 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
709  std::stringstream stream;
710  stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
711  std::ofstream ofile(stream.str().c_str());
712 #endif
713 
714  if (!hr_data_) //LowRes data
715  {
716  for (UInt i = 0; i < scan_size - 2; ++i)
717  {
718  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
719  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
720  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
721 
722  if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
723  {
724  diffed[i + 1].setIntensity(0);
725  }
726 
727 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
728  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
729 #endif
730  }
731  }
732  else //HighRes data
733  {
734  for (UInt i = 0; i < scan_size - 2; ++i)
735  {
736  share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
737  bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
738  fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
739 
740  if (!(bwd >= 0 && fwd <= 0))
741  {
742  diffed[i + 1].setIntensity(0);
743  }
744 
745 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
746  ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
747 #endif
748  }
749  }
750 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
751  ofile.close();
752 #endif
753 
754  ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
755 
756  //Sort the transform in descending order according to the intensities present in the transform
757  c_sorted_candidate.sortByIntensity();
758 
759 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
760  std::stringstream stream2;
761  stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
762  std::ofstream ofile2(stream2.str().c_str());
763  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
764  {
765  ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
766  }
767  ofile2.close();
768 #endif
769 
770  std::vector<UInt> processed(scan_size, 0);
771 
772  if (ampl_cutoff < 0)
773  {
774  threshold = 0;
775  }
776  else
777  {
778  c_av_intens = getAvIntens_(candidates);
779  c_sd_intens = getSdIntens_(candidates, c_av_intens);
780  threshold = ampl_cutoff * c_sd_intens + c_av_intens;
781  }
782 
783  for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
784  {
785  if (iter->getIntensity() <= 0)
786  {
787  break;
788  }
789 
790  seed_mz = iter->getMZ();
791  seed_iter = ref.MZBegin(seed_mz);
792 
793  if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
794  {
795  continue;
796  }
797 
798  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
799  //Mark the region as processed
800  //Do not move this further down, since we have to mark this as processed in any case,
801  //even when score <=0; otherwise we would look around the maximum's position unless
802  //any significant point is found
803  iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
804  iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
805  if (iter_end == ref.end())
806  {
807  --iter_end;
808  }
809 
810  MZ_start = distance(ref.begin(), iter_start);
811  MZ_end = distance(ref.begin(), iter_end);
812 
813  memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
814 
815  c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
816 
817 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
818  if (trunc(seed_mz) == 874)
819  std::cout << seed_mz << "\t" << c_score << std::endl;
820 #endif
821 
822  if (c_score <= 0 && c_score != -1000)
823  {
824  continue;
825  }
826 
827  //Push the seed into its corresponding box (or create a new one, if necessary)
828  //Do ***NOT*** move this further down!
829  push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
830 
831  help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
832  iter2 = candidates.MZBegin(help_mz);
833  if (iter2 == candidates.end() || iter2 == candidates.begin())
834  {
835  continue;
836  }
837 
838  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
839  {
840  //In the other case, we are too close to the peak, leading to incorrect derivatives.
841  if (iter2 != candidates.end())
842  {
843  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
844  }
845  }
846 
847  help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
848  iter2 = candidates.MZBegin(help_mz);
849  if (iter2 == candidates.end() || iter2 == candidates.begin())
850  {
851  continue;
852  }
853 
854  if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
855  {
856  //In the other case, we are too close to the peak, leading to incorrect derivatives.
857  if (iter2 != candidates.end())
858  {
859  push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
860  }
861  }
862  }
863 
864  clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
865  }
866 
867  template <typename PeakType>
869  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
870  {
871  double c_score = 0, c_val;
872  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
873  Int signal_size((Int)candidate.size());
874  //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
875  //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
876 
877  //p_h_ind indicates if we are looking for a whole or a peak
878  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
879 
880  std::vector<double> positions(end);
881  for (Int i = 0; i < end; ++i)
882  {
883  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
884  }
885 
886  double l_score = 0, mid_val = 0;
887  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
888  for (Int v = 1; v <= end; ++v, ++p_h_ind)
889  {
890  do
891  {
892  if (start_index < signal_size - 1)
893  ++start_index;
894  else
895  break;
896  }
897  while (candidate[start_index].getMZ() < positions[v - 1]);
898 
899  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
900  {
901  continue;
902  }
903 
904  c_left_iter2 = candidate.begin() + start_index - 1;
905  c_right_iter2 = c_left_iter2 + 1;
906 
907  c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
908 
909  if (v == (int)(ceil(end / 2.)))
910  {
911  l_score = c_score;
912  mid_val = c_val;
913  }
914 
915  if (p_h_ind % 2 == 1) //I.e. a whole
916  {
917  c_score -= c_val;
918 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
919  if (trunc(seed_mz) == 874)
920  std::cout << -c_val << std::endl;
921 #endif
922  }
923  else
924  {
925  c_score += c_val;
926 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
927  if (trunc(seed_mz) == 874)
928  std::cout << c_val << std::endl;
929 #endif
930  }
931 
932 
933  start_index = distance(candidate.begin(), c_left_iter2);
934  }
935 
936 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
937  std::ofstream ofile_score("scores.dat", ios::app);
938  std::ofstream ofile_check_score("check_scores.dat", ios::app);
939  ofile_score.close();
940  ofile_check_score.close();
941 #endif
942 
943 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
944  if (trunc(seed_mz) == 874)
945  std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
946 #endif
947 
948  if (c_score - mid_val <= 0)
949  {
950  return 0;
951  }
952 
953  if (c_score - mid_val <= ampl_cutoff)
954  {
955  return -1000;
956  }
957 
958  if (l_score <= 0 || c_score - l_score - mid_val <= 0)
959  {
960  return 0;
961  }
962 
963  return c_score;
964  }
965 
966  template <typename PeakType>
968  const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
969  {
970  double c_score = 0, c_val;
971  typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
972  Int signal_size((Int)candidate.size());
973 
974  //p_h_ind indicates if we are looking for a whole or a peak
975  Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
976 
977  std::vector<double> positions(end);
978  for (Int i = 0; i < end; ++i)
979  {
980  positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
981  }
982 
983  double l_score = 0, mid_val = 0;
984  Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
985  for (Int v = 1; v <= end; ++v, ++p_h_ind)
986  {
987  do
988  {
989  if (start_index < signal_size - 1)
990  ++start_index;
991  else
992  break;
993  }
994  while (candidate.getMZ(start_index) < positions[v - 1]);
995 
996  if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
997  {
998  continue;
999  }
1000 
1001  c_left_iter2 = candidate.begin() + start_index - 1;
1002  c_right_iter2 = c_left_iter2 + 1;
1003 
1004  c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
1005  if (v == (int)(ceil(end / 2.)))
1006  {
1007  l_score = c_score;
1008  mid_val = c_val;
1009  }
1010 
1011  if (p_h_ind % 2 == 1) //I.e. a whole
1012  {
1013  c_score -= c_val;
1014  }
1015  else
1016  {
1017  c_score += c_val;
1018  }
1019 
1020  start_index = distance(candidate.begin(), c_left_iter2);
1021  }
1022 
1023 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1024  std::ofstream ofile_score("scores.dat", ios::app);
1025  std::ofstream ofile_check_score("check_scores.dat", ios::app);
1026  ofile_score << c_check_point << "\t" << c_score << std::endl;
1027  ofile_score.close();
1028  ofile_check_score.close();
1029 #endif
1030 
1031  if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1032  {
1033  return 0;
1034  }
1035 
1036  return c_score;
1037  }
1038 
1039  template <typename PeakType>
1041  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1042  {
1043  typename std::multimap<double, Box>::iterator iter;
1044  typename Box::iterator box_iter;
1045  std::vector<BoxElement> final_box;
1046  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1047  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1048 
1049  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1050  {
1051 
1052  Box& c_box = iter->second;
1053  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1054  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1055 
1056  //Now, let's get the RT boundaries for the box
1057  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1058  {
1059  if (box_iter->second.score == 0) //virtual helping point
1060  {
1061  if (count != 0)
1062  continue; //it is in any way not pure virtual
1063 
1064  c_mz = box_iter->second.mz;
1065  virtual_av_intens += box_iter->second.intens;
1066  virtual_av_abs_intens += fabs(box_iter->second.intens);
1067  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1068  ++virtual_count;
1069  }
1070  else
1071  {
1072  c_mz = box_iter->second.mz;
1073  av_score += box_iter->second.score;
1074  av_intens += box_iter->second.intens;
1075  av_abs_intens += fabs(box_iter->second.intens);
1076  av_mz += c_mz * fabs(box_iter->second.intens);
1077  ++count;
1078  }
1079  }
1080 
1081  if (count == 0) //pure virtual helping box
1082  {
1083  av_intens = virtual_av_intens / virtual_count;
1084  av_score = 0;
1085  av_mz = virtual_av_mz / virtual_av_abs_intens;
1086  }
1087  else
1088  {
1089  av_intens /= count;
1090  av_score /= count;
1091  av_mz /= av_abs_intens;
1092  }
1093 
1094  BoxElement c_box_element;
1095  c_box_element.mz = av_mz;
1096  c_box_element.c = c;
1097  c_box_element.score = av_score;
1098  c_box_element.intens = av_intens;
1099 
1100  c_box_element.RT = c_box.begin()->second.RT;
1101  final_box.push_back(c_box_element);
1102  }
1103 
1104  Size num_o_feature = final_box.size();
1105  if (num_o_feature == 0)
1106  {
1107  tmp_boxes_->at(c).clear();
1108  return;
1109  }
1110 
1111  //Computing the derivatives
1112  std::vector<double> bwd_diffs(num_o_feature, 0);
1113 
1114  bwd_diffs[0] = 0;
1115  for (Size i = 1; i < num_o_feature; ++i)
1116  {
1117  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1118  }
1119 
1120 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1121  std::ofstream ofile_bwd("bwd_cpu.dat");
1122  for (Size i = 0; i < num_o_feature; ++i)
1123  {
1124  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1125  }
1126  ofile_bwd.close();
1127 #endif
1128 
1129 
1130  for (Size i = 0; i < num_o_feature - 1; ++i)
1131  {
1132  while (i < num_o_feature - 2)
1133  {
1134  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1135  break;
1136  ++i;
1137  }
1138 
1139  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1140  {
1141  checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1142  continue;
1143  }
1144  }
1145 
1146  tmp_boxes_->at(c).clear();
1147  }
1148 
1149  template <typename PeakType>
1151  {
1152  double av_intens = 0;
1153  for (UInt i = 0; i < scan.size(); ++i)
1154  {
1155  if (scan[i].getIntensity() >= 0)
1156  {
1157  av_intens += scan[i].getIntensity();
1158  }
1159  }
1160  return av_intens / (double)scan.size();
1161  }
1162 
1163  template <typename PeakType>
1165  {
1166  double res = 0, intens;
1167  for (UInt i = 0; i < scan.size(); ++i)
1168  {
1169  if (scan[i].getIntensity() >= 0)
1170  {
1171  intens = scan[i].getIntensity();
1172  res += (intens - mean) * (intens - mean);
1173  }
1174  }
1175  return sqrt(res / (double)(scan.size() - 1));
1176  }
1177 
1178  template <typename PeakType>
1179  double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1180  {
1181  std::vector<double> diffs(scan.size() - 1, 0);
1182  for (UInt i = 0; i < scan.size() - 1; ++i)
1183  {
1184  diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1185  }
1186 
1187  sort(diffs.begin(), diffs.end());
1188  double av_MZ_spacing = 0;
1189  for (UInt i = 0; i < diffs.size() / 2; ++i)
1190  {
1191  av_MZ_spacing += diffs[i];
1192  }
1193 
1194  return av_MZ_spacing / (diffs.size() / 2);
1195  }
1196 
1197  template <typename PeakType>
1199  {
1200  double av_intens = 0;
1201  for (UInt i = 0; i < scan.size(); ++i)
1202  {
1203  if (scan.getTransIntensity(i) >= 0)
1204  {
1205  av_intens += scan.getTransIntensity(i);
1206  }
1207  }
1208  return av_intens / (double)scan.size();
1209  }
1210 
1211  template <typename PeakType>
1213  {
1214  double res = 0, intens;
1215  for (UInt i = 0; i < scan.size(); ++i)
1216  {
1217  if (scan.getTransIntensity(i) >= 0)
1218  {
1219  intens = scan.getTransIntensity(i);
1220  res += (intens - mean) * (intens - mean);
1221  }
1222  }
1223  return sqrt(res / (double)(scan.size() - 1));
1224  }
1225 
1226  template <typename PeakType>
1227  void IsotopeWaveletTransform<PeakType>::push2Box_(const double mz, const UInt scan, UInt c,
1228  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1229  {
1230  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1231 
1232  typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1233  typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1234 
1235  if (lower_iter != open_boxes_.end())
1236  {
1237  //Ugly, but necessary due to the implementation of STL lower_bound
1238  if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1239  {
1240  --lower_iter;
1241  }
1242  }
1243 
1244  typename std::multimap<double, Box>::iterator insert_iter;
1245  bool create_new_box = true;
1246  if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1247  {
1248  //There is another special case to be considered here:
1249  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1250  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1251  if (!open_boxes_.empty())
1252  {
1253  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1254  {
1255  create_new_box = false;
1256  insert_iter = lower_iter;
1257  }
1258  }
1259  else
1260  {
1261  create_new_box = true;
1262  }
1263  }
1264  else
1265  {
1266  if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1267  {
1268  insert_iter = lower_iter;
1269  create_new_box = false;
1270  }
1271  else
1272  {
1273  create_new_box = true;
1274  }
1275  }
1276 
1277 
1278  if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1279  {
1280  //Here is the question if you should figure out the smallest charge .... and then
1281 
1282  //Figure out which entry is closer to m/z
1283  double dist_lower = fabs(lower_iter->first - mz);
1284  double dist_upper = fabs(upper_iter->first - mz);
1285  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1286  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1287 
1288  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1289  {
1290  create_new_box = true;
1291  }
1292  else
1293  {
1294  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1295  create_new_box = false;
1296  }
1297  }
1298 
1299  BoxElement element;
1300  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1301  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1302 
1303 
1304  if (create_new_box == false)
1305  {
1306  std::pair<UInt, BoxElement> help2(scan, element);
1307  insert_iter->second.insert(help2);
1308 
1309  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1310  Box replacement(insert_iter->second);
1311 
1312  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1313  //Also note that we already inserted the new entry, leading to size-1.
1314  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1315  c_mz /= ((double) insert_iter->second.size());
1316 
1317  //Now let's remove the old and insert the new one
1318  open_boxes_.erase(insert_iter);
1319  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1320  open_boxes_.insert(help3);
1321  }
1322  else
1323  {
1324  std::pair<UInt, BoxElement> help2(scan, element);
1325  std::multimap<UInt, BoxElement> help3;
1326  help3.insert(help2);
1327  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1328  open_boxes_.insert(help4);
1329  }
1330  }
1331 
1332  template <typename PeakType>
1333  void IsotopeWaveletTransform<PeakType>::push2TmpBox_(const double mz, const UInt scan, UInt c,
1334  const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1335  {
1336  const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1337 
1338  std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1339  typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1340  typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1341 
1342  if (lower_iter != tmp_box.end())
1343  {
1344  //Ugly, but necessary due to the implementation of STL lower_bound
1345  if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1346  {
1347  --lower_iter;
1348  }
1349  }
1350 
1351  typename std::multimap<double, Box>::iterator insert_iter;
1352  bool create_new_box = true;
1353  if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1354  {
1355  //There is another special case to be considered here:
1356  //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1357  //then the lower bound for the new mz value is box.end and this would usually force a new entry
1358  if (!tmp_box.empty())
1359  {
1360  if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1361  {
1362  create_new_box = false;
1363  insert_iter = lower_iter;
1364  }
1365  }
1366  else
1367  {
1368  create_new_box = true;
1369  }
1370  }
1371  else
1372  {
1373  if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1374  {
1375  insert_iter = lower_iter;
1376  create_new_box = false;
1377  }
1378  else
1379  {
1380  create_new_box = true;
1381  }
1382  }
1383 
1384 
1385  if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1386  {
1387  //Figure out which entry is closer to m/z
1388  double dist_lower = fabs(lower_iter->first - mz);
1389  double dist_upper = fabs(upper_iter->first - mz);
1390  dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1391  dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1392 
1393  if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1394  {
1395  create_new_box = true;
1396  }
1397  else
1398  {
1399  insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1400  create_new_box = false;
1401  }
1402  }
1403 
1404  BoxElement element;
1405  element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1406  element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1407 
1408  if (create_new_box == false)
1409  {
1410  std::pair<UInt, BoxElement> help2(scan, element);
1411  insert_iter->second.insert(help2);
1412 
1413  //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1414  Box replacement(insert_iter->second);
1415 
1416  //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1417  //Also note that we already inserted the new entry, leading to size-1.
1418  double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1419  c_mz /= ((double) insert_iter->second.size());
1420 
1421  //Now let's remove the old and insert the new one
1422  tmp_box.erase(insert_iter);
1423  std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1424  tmp_box.insert(help3);
1425  }
1426  else
1427  {
1428  std::pair<UInt, BoxElement> help2(scan, element);
1429  std::multimap<UInt, BoxElement> help3;
1430  help3.insert(help2);
1431 
1432  std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1433  tmp_box.insert(help4);
1434  }
1435  }
1436 
1437  template <typename PeakType>
1438  void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1439  const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1440  {
1441  typename std::multimap<double, Box>::iterator iter, iter2;
1442 
1443  if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1444  {
1445  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1446  {
1447 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1448  std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1449  typename Box::iterator dings;
1450  for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1451  std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1452 #endif
1453  end_boxes_.insert(*iter);
1454  }
1455  open_boxes_.clear();
1456  return;
1457  }
1458 
1459  for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1460  {
1461 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1462  if (front_bound > 0)
1463  {
1464  std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1465  }
1466 #endif
1467 
1468  //For each Box we need to figure out, if and when the last RT value has been inserted
1469  UInt lastScan = (--(iter->second.end()))->first;
1470  if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1471  {
1472  if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1473  {
1474  iter2 = iter;
1475  ++iter2;
1476 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1477  std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1478 #endif
1479  front_boxes_.insert(*iter);
1480  open_boxes_.erase(iter);
1481  iter = iter2;
1482  continue;
1483  }
1484 
1485  iter2 = iter;
1486  ++iter2;
1487  //Please do **NOT** simplify the upcoming lines.
1488  //The 'obvious' overhead is necessary since the object represented by iter might be erased
1489  //by push2Box which might be called by extendBox_.
1490  if (iter->second.size() >= RT_votes_cutoff)
1491  {
1492  //extendBox_ (map, iter->second);
1493  iter = iter2;
1494  closed_boxes_.insert(*(--iter));
1495  }
1496  open_boxes_.erase(iter);
1497  iter = iter2;
1498  }
1499  else
1500  {
1501  ++iter;
1502  }
1503  }
1504  }
1505 
1506  template <typename PeakType>
1508  {
1509 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1510  std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1511 #endif
1512 
1513  //Determining the elution profile
1514  typename Box::const_iterator iter;
1515  std::vector<double> elution_profile(box.size());
1516  UInt index = 0;
1517  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1518  {
1519  for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1520  {
1521  elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1522  }
1523  elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1524  }
1525 
1526  double max = 0;
1527  Int max_index = INT_MIN;
1528  for (Size i = 0; i < elution_profile.size(); ++i)
1529  {
1530  if (elution_profile[i] > max)
1531  {
1532  max_index = i;
1533  max = elution_profile[i];
1534  }
1535  }
1536 
1537  Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1538 
1539  double av_elution = 0;
1540  for (Size i = 0; i < elution_profile.size(); ++i)
1541  {
1542  av_elution += elution_profile[i];
1543  }
1544  av_elution /= (double)elution_profile.size();
1545 
1546  double sd_elution = 0;
1547  for (Size i = 0; i < elution_profile.size(); ++i)
1548  {
1549  sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1550  }
1551  sd_elution /= (double)(elution_profile.size() - 1);
1552  sd_elution = sqrt(sd_elution);
1553 
1554  //Determine average m/z monoisotopic pos
1555  double av_mz = 0;
1556  for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1557  {
1558  av_mz += iter->second.mz;
1559 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1560  std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1561 #endif
1562  }
1563  av_mz /= (double)box.size();
1564 
1565 
1566  //Boundary check
1567  if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1568  {
1569  return;
1570  }
1571 
1572  UInt pre_index = box.begin()->second.RT_index - 1;
1573  typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1574  double pre_elution = 0;
1575 
1576  double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1577  double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1578 
1579  typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1580  for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1581  {
1582  pre_elution += mz_iter->getIntensity();
1583  }
1584 
1585 
1586  //Do we need to extend at all?
1587  if (pre_elution <= av_elution - 2 * sd_elution)
1588  {
1589  return;
1590  }
1591 
1592  Int c_index = max_extension;
1593  Int first_index = box.begin()->second.RT_index;
1594  for (Int i = 1; i < max_extension; ++i)
1595  {
1596  c_index = first_index - i;
1597  if (c_index < 0)
1598  {
1599  break;
1600  }
1601 
1602  //CHECK Majority vote for charge???????????????
1603 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1604  std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1605 #endif
1606 
1607  push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1608  map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1609  }
1610  }
1611 
1612  template <typename PeakType>
1614  const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1615  {
1616  typename std::multimap<double, Box>::iterator iter;
1617  typename Box::iterator box_iter;
1618  std::vector<BoxElement> final_box;
1619  double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1620  double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1621 
1622  for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1623  {
1624  Box& c_box = iter->second;
1625  av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1626  virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1627 
1628  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1629  {
1630  if (box_iter->second.score == 0) //virtual helping point
1631  {
1632  if (count != 0)
1633  continue; //it is in any way not pure virtual
1634 
1635  c_mz = box_iter->second.mz;
1636  virtual_av_intens += box_iter->second.intens;
1637  virtual_av_abs_intens += fabs(box_iter->second.intens);
1638  virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1639  ++virtual_count;
1640  }
1641  else
1642  {
1643  c_mz = box_iter->second.mz;
1644  av_score += box_iter->second.score;
1645  av_intens += box_iter->second.intens;
1646  av_abs_intens += fabs(box_iter->second.intens);
1647  av_mz += c_mz * fabs(box_iter->second.intens);
1648  ++count;
1649  }
1650  }
1651 
1652  if (count == 0) //pure virtual helping box
1653  {
1654  av_intens = virtual_av_intens / virtual_count;
1655  av_score = 0;
1656  av_mz = virtual_av_mz / virtual_av_abs_intens;
1657  }
1658  else
1659  {
1660  av_intens /= count;
1661  av_score /= count;
1662  av_mz /= av_abs_intens;
1663  }
1664 
1665  BoxElement c_box_element;
1666  c_box_element.mz = av_mz;
1667  c_box_element.c = c;
1668  c_box_element.score = av_score;
1669  c_box_element.intens = av_intens;
1670 
1671  c_box_element.RT = c_box.begin()->second.RT;
1672 
1673  final_box.push_back(c_box_element);
1674  }
1675 
1676  UInt num_o_feature = final_box.size();
1677  if (num_o_feature == 0)
1678  {
1679  tmp_boxes_->at(c).clear();
1680  return;
1681  }
1682 
1683  //Computing the derivatives
1684  std::vector<double> bwd_diffs(num_o_feature, 0);
1685 
1686  bwd_diffs[0] = 0;
1687  for (UInt i = 1; i < num_o_feature; ++i)
1688  {
1689  bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1690  }
1691 
1692 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1693  std::ofstream ofile_bwd("bwd_gpu.dat");
1694  for (UInt i = 0; i < num_o_feature; ++i)
1695  {
1696  ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1697  }
1698  ofile_bwd.close();
1699 #endif
1700 
1701  for (UInt i = 0; i < num_o_feature - 1; ++i)
1702  {
1703  while (i < num_o_feature - 2)
1704  {
1705  if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1706  break;
1707  ++i;
1708  }
1709 
1710  if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1711  {
1712  checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1713  continue;
1714  }
1715  }
1716 
1717  tmp_boxes_->at(c).clear();
1718  }
1719 
1720  template <typename PeakType>
1722  {
1723  FeatureMap feature_map;
1724  typename std::multimap<double, Box>::iterator iter;
1725  typename Box::iterator box_iter;
1726  UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1727  double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1728  bool restart = false;
1729 
1730  for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1731  {
1732  sum_of_ref_intenses_g = 0;
1733  Box& c_box = iter->second;
1734  std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1735  restart = false;
1736 
1737  //Let's first determine the charge
1738  //Therefore, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1739  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1740  {
1741 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1742  if (trunc(box_iter->second.mz) == 874)
1743  std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1744 #endif
1745 
1746  if (box_iter->second.score == -1000)
1747  {
1748  restart = true;
1749  break;
1750  }
1751 
1752  charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1753  ++charge_binary_votes[box_iter->second.c];
1754  }
1755 
1756  if (restart)
1757  {
1758  continue;
1759  }
1760 
1761  //... determining the best fitting charge
1762  best_charge_index = 0; best_charge_score = 0;
1763  for (UInt i = 0; i < max_charge_; ++i)
1764  {
1765  if (charge_votes[i] > best_charge_score)
1766  {
1767  best_charge_index = i;
1768  best_charge_score = charge_votes[i];
1769  }
1770  }
1771 
1772  //Pattern found in too few RT scan
1773  if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1774  {
1775  continue;
1776  }
1777 
1778  c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1779 
1780  av_intens = 0; av_ref_intens = 0; av_score = 0; av_mz = 0; av_RT = 0;
1781  //Now, let's get the RT boundaries for the box
1782  std::vector<DPosition<2> > point_set;
1783  double sum_of_ref_intenses_l;
1784  for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1785  {
1786  sum_of_ref_intenses_l = 0;
1787  c_mz = box_iter->second.mz;
1788  c_RT = box_iter->second.RT;
1789 
1790  mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1791 
1792  point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1793  //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1794  point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1795 
1796 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1797  std::cout << "Intenstype: " << intenstype_ << std::endl;
1798 #endif
1799  if (intenstype_ == "ref")
1800  {
1801  //Find monoisotopic max
1802  const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1803  //'Correct' possible shift
1804  for (unsigned int i = 0; i < mz_cutoff; ++i)
1805  {
1806  typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1807 
1808  hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1809 
1810  while (h_iter != c_spec.begin())
1811  {
1812 
1813 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1814  if (trunc(c_mz) == 874)
1815  {
1816  std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1817  }
1818 #endif
1819 
1820  --h_iter;
1821  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1822  {
1823  hc_iter = h_iter;
1824  }
1825 
1826  if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1827  {
1828  break;
1829  }
1830  }
1831 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1832  if (trunc(c_mz) == 874)
1833  {
1834  std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1835  }
1836 #endif
1837  sum_of_ref_intenses_l += hc_iter->getIntensity();
1838 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1839  if (trunc(c_mz) == 874)
1840  {
1841  std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1842  }
1843 #endif
1844  }
1845  }
1846 
1847  if (best_charge_index == box_iter->second.c)
1848  {
1849  av_score += box_iter->second.score;
1850  av_intens += box_iter->second.intens;
1851  av_ref_intens += box_iter->second.ref_intens;
1852  sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1853  av_mz += c_mz * box_iter->second.intens;
1854  }
1855  av_RT += c_RT;
1856  }
1857 
1858  av_mz /= av_intens;
1859  av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1860  av_score /= (double)charge_binary_votes[best_charge_index];
1861  av_RT /= (double)c_box.size();
1862 
1863 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1864  if (trunc(av_mz) == 874)
1865  std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1866 #endif
1867 
1868  Feature c_feature;
1869  ConvexHull2D c_conv_hull;
1870  c_conv_hull.addPoints(point_set);
1871  c_feature.setCharge(c_charge);
1872  c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1873 
1874  //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1875  if (intenstype_ == "corrected")
1876  {
1877  double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1878  av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0, 2 * lambda);
1879  }
1880  if (intenstype_ == "ref")
1881  {
1882 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1883  if (trunc(c_mz) == 874)
1884  {
1885  std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1886  }
1887 #endif
1888 
1889  av_intens = sum_of_ref_intenses_g;
1890  }
1891 
1892  c_feature.setMZ(av_mz);
1893  c_feature.setIntensity(av_intens);
1894  c_feature.setRT(av_RT);
1895  c_feature.setOverallQuality(av_score);
1896  feature_map.push_back(c_feature);
1897  }
1898 
1899  return feature_map;
1900  }
1901 
1902  template <typename PeakType>
1904  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1905  {
1906  typename MSSpectrum::const_iterator iter, ref_iter;
1907  UInt peak_cutoff;
1908  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1909 
1910  iter = candidate.MZBegin(seed_mz);
1911  //we can ignore those cases
1912  if (iter == candidate.begin() || iter == candidate.end())
1913  {
1914  return false;
1915  }
1916 
1917  std::pair<double, double> reals;
1918  ref_iter = ref.MZBegin(seed_mz);
1919  //Correct the position
1920  double real_mz, real_intens;
1921  if (check_PPMs)
1922  {
1923  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1924  real_mz = reals.first; real_intens = reals.second;
1925  //if (real_mz <= 0 || real_intens <= 0)
1926  //{
1927  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1928  while (h_iter != ref.begin())
1929  {
1930  --h_iter;
1931  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1932  {
1933  hc_iter = h_iter;
1934  }
1935  else
1936  {
1937  break;
1938  }
1939 
1940  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1941  {
1942  return false;
1943  }
1944  }
1945  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1946  real_mz = reals.first; real_intens = reals.second;
1947  if (real_mz <= 0 || real_intens <= 0)
1948  {
1949  return false;
1950  }
1951  real_mz = h_iter->getMZ();
1952  real_intens = h_iter->getIntensity();
1953  //}
1954  }
1955  else
1956  {
1957  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
1958  real_mz = reals.first; real_intens = reals.second;
1959 
1960  if (real_mz <= 0 || real_intens <= 0)
1961  {
1962  typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1963  while (h_iter != ref.begin())
1964  {
1965  --h_iter;
1966  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1967  {
1968  hc_iter = h_iter;
1969  }
1970  else
1971  {
1972  break;
1973  }
1974 
1975  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1976  {
1977  return false;
1978  }
1979  }
1980  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
1981  if (real_mz <= 0 || real_intens <= 0)
1982  {
1983  return false;
1984  }
1985  real_mz = h_iter->getMZ();
1986  real_intens = h_iter->getIntensity();
1987  }
1988  }
1989 
1990  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
1991 
1992  if (c_score <= 0)
1993  {
1994  return false;
1995  }
1996 
1997  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
1998  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
1999  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2000  if (real_r_MZ_iter == ref.end())
2001  {
2002  --real_r_MZ_iter;
2003  }
2004 
2005 
2006  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2007  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2008 
2009  if (prev_score == -1000)
2010  {
2011  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2012  }
2013  else
2014  {
2015  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2016  }
2017  return true;
2018  }
2019 
2020  template <typename PeakType>
2022  const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
2023  {
2024  typename MSSpectrum::const_iterator iter, ref_iter;
2025  UInt peak_cutoff;
2026  peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2027 
2028  iter = candidate.MZBegin(seed_mz);
2029  //we can ignore those cases
2030  if (iter == candidate.begin() || iter == candidate.end())
2031  {
2032  return false;
2033  }
2034 
2035  std::pair<double, double> reals;
2036  ref_iter = ref.MZBegin(seed_mz);
2037  //Correct the position
2038  double real_mz, real_intens;
2039  if (check_PPMs)
2040  {
2041  reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2042  real_mz = reals.first; real_intens = reals.second;
2043  //if (real_mz <= 0 || real_intens <= 0)
2044  //{
2045  auto h_iter = ref_iter, hc_iter = ref_iter;
2046  while (h_iter != ref.begin())
2047  {
2048  --h_iter;
2049  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2050  {
2051  hc_iter = h_iter;
2052  }
2053  else
2054  {
2055  break;
2056  }
2057 
2058  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2059  {
2060  return false;
2061  }
2062  }
2063  ++h_iter;
2064  reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2065  real_mz = reals.first; real_intens = reals.second;
2066 
2067 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2068  std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2069 #endif
2070 
2071  if (real_mz <= 0 || real_intens <= 0)
2072  {
2073  return false;
2074  }
2075  real_mz = h_iter->getMZ();
2076  real_intens = h_iter->getIntensity();
2077  //}
2078  }
2079  else
2080  {
2081  reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2082  real_mz = reals.first; real_intens = reals.second;
2083 
2084  if (real_mz <= 0 || real_intens <= 0)
2085  {
2086  auto h_iter = ref_iter, hc_iter = ref_iter;
2087  while (h_iter != ref.begin())
2088  {
2089  --h_iter;
2090  if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2091  {
2092  hc_iter = h_iter;
2093  }
2094  else
2095  {
2096  break;
2097  }
2098 
2099  if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2100  {
2101  return false;
2102  }
2103  }
2104  real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
2105  if (real_mz <= 0 || real_intens <= 0)
2106  {
2107  return false;
2108  }
2109  real_mz = h_iter->getMZ();
2110  real_intens = h_iter->getIntensity();
2111  }
2112  }
2113 
2114  double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2115 
2116  if (c_score <= 0)
2117  {
2118  return false;
2119  }
2120 
2121  double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2122  typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2123  typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2124  if (real_r_MZ_iter == ref.end())
2125  {
2126  --real_r_MZ_iter;
2127  }
2128 
2129 
2130  UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2131  UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2132 
2133  if (prev_score == -1000)
2134  {
2135  push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2136  }
2137  else
2138  {
2139  push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2140  }
2141 
2142  return true;
2143  }
2144 
2145  template <typename PeakType>
2146  std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2147  {
2148  double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2149  double ppms = getPPMs_(peptideMassRule_(mass), mass);
2151  {
2152 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2153  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2154 #endif
2155  return std::pair<double, double>(-1, -1);
2156  }
2157 
2158 #ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2159  std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2160 #endif
2161  return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2162  }
2163 
2164 } //namespace
2165 
2166 #pragma clang diagnostic pop
2167 
2168 
void setCharge(const ChargeType &ch)
Set charge state.
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:223
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:71
Iterator begin()
See std::vector documentation.
Definition: ConstRefVector.h:398
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:720
Iterator end()
See std::vector documentation.
Definition: ConstRefVector.h:404
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling.
Definition: ConvexHull2D.h:73
void addPoints(const PointArrayType &points)
A container for features.
Definition: FeatureMap.h:105
An LC-MS feature.
Definition: Feature.h:72
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
void setOverallQuality(QualityType q)
Set the overall quality.
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:95
double getRT() const
Definition: IsotopeWaveletTransform.h:128
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:170
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:107
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:140
TransSpectrum()
Definition: IsotopeWaveletTransform.h:101
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:191
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:134
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:114
std::vector< float > * trans_intens_
The intensities of the transform.
Definition: IsotopeWaveletTransform.h:206
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:152
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:164
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:184
Size size() const
Definition: IsotopeWaveletTransform.h:158
virtual void destroy()
Definition: IsotopeWaveletTransform.h:119
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:146
const MSSpectrum * reference_
The reference spectrum.
Definition: IsotopeWaveletTransform.h:205
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:177
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:198
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:70
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1613
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:557
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:483
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:480
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:491
UInt data_length_
Definition: IsotopeWaveletTransform.h:484
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one.
Definition: IsotopeWaveletTransform.h:1227
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:480
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:470
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:476
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:480
UInt max_charge_
Definition: IsotopeWaveletTransform.h:484
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:487
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2146
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:317
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:476
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score.
Definition: IsotopeWaveletTransform.h:967
double score
The associated score.
Definition: IsotopeWaveletTransform.h:79
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:519
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:480
double mz
The monoisotopic position.
Definition: IsotopeWaveletTransform.h:77
double sigma_
Definition: IsotopeWaveletTransform.h:479
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1438
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:491
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:476
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:477
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:488
UInt MZ_begin
Index.
Definition: IsotopeWaveletTransform.h:84
double ref_intens
Definition: IsotopeWaveletTransform.h:81
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:481
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:295
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:276
UInt c
Note, this is not the charge (it is charge-1!!!)
Definition: IsotopeWaveletTransform.h:78
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1212
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:479
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:563
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass.
Definition: IsotopeWaveletTransform.h:447
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:487
String intenstype_
Definition: IsotopeWaveletTransform.h:486
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:481
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:490
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1333
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1198
double RT
The elution time (not the scan index)
Definition: IsotopeWaveletTransform.h:82
void extendBox_(const PeakMap &map, const Box &box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1507
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1721
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:696
double getSigma() const
Definition: IsotopeWaveletTransform.h:300
UInt RT_index
The elution time (map) index.
Definition: IsotopeWaveletTransform.h:83
double intens
The transformed intensity at the monoisotopic mass.
Definition: IsotopeWaveletTransform.h:80
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:476
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:284
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:644
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1179
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:606
UInt MZ_end
Index.
Definition: IsotopeWaveletTransform.h:85
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:305
double min_spacing_
Definition: IsotopeWaveletTransform.h:490
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:2021
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:484
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:312
bool hr_data_
Definition: IsotopeWaveletTransform.h:485
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:686
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:88
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:480
Internally used data structure.
Definition: IsotopeWaveletTransform.h:76
static UInt getNumPeakCutOff(const double mass, const UInt z)
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually,...
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:80
Iterator begin()
Definition: MSExperiment.h:157
Size size() const
Definition: MSExperiment.h:127
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:71
double getRT() const
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:55
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:196
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:202
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:214
IntensityType getIntensity() const
Definition: Peak2D.h:166
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height)
Definition: Peak2D.h:172
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:133
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:68
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:49
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:54
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:43
const double c
Definition: Constants.h:209
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:51
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:50
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:55
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:56
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:495
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:507
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:513
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:501