OpenMS
IsotopeWaveletTransform.h
Go to the documentation of this file.
1// Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg$
6// $Authors: $
7// --------------------------------------------------------------------------
8
9#pragma once
10
20
21#include <cmath>
22#include <vector>
23#include <map>
24#include <sstream>
25#include <fstream>
26#include <iomanip>
27
28// TODO: move this to cpp and use STL once it is available in clang on Mac
29#include <boost/math/special_functions/bessel.hpp>
30
31// This code has quite a few strange things in it triggering warnings which
32// clutters the rest of the diagnostics
33#pragma clang diagnostic push
34#pragma clang diagnostic ignored "-Wfloat-equal"
35#pragma clang diagnostic ignored "-Wconversion"
36#pragma clang diagnostic ignored "-Wshorten-64-to-32"
37
38namespace OpenMS
39{
40
44 template <typename PeakType>
46 {
47public:
48
49
52 {
53 double mz;
55 double score;
56 double intens;
57 double ref_intens;
58 double RT;
62 };
63
64 typedef std::multimap<UInt, BoxElement> Box;
65
66
71 {
73
74public:
75
78 reference_(nullptr), trans_intens_(nullptr)
79 {
80 }
81
83 TransSpectrum(const MSSpectrum* reference) :
84 reference_(reference)
85 {
86 trans_intens_ = new std::vector<float>(reference_->size(), 0.0);
87 }
88
91 {
92 delete (trans_intens_);
93 }
94
95 virtual void destroy()
96 {
97 delete (trans_intens_);
98 trans_intens_ = NULL;
99 delete (reference_);
100 reference_ = NULL;
101 }
102
104 inline double getRT() const
105 {
106 return reference_->getRT();
107 }
108
110 inline double getMZ(const UInt i) const
111 {
112 return (*reference_)[i].getMZ();
113 }
114
116 inline double getRefIntensity(const UInt i) const
117 {
118 return (*reference_)[i].getIntensity();
119 }
120
122 inline double getTransIntensity(const UInt i) const
123 {
124 return (*trans_intens_)[i];
125 }
126
128 inline void setTransIntensity(const UInt i, const double intens)
129 {
130 (*trans_intens_)[i] = intens;
131 }
132
134 inline Size size() const
135 {
136 return trans_intens_->size();
137 }
138
141 {
142 return reference_;
143 }
144
146 inline const MSSpectrum* getRefSpectrum() const
147 {
148 return reference_;
149 }
150
153 inline typename MSSpectrum::const_iterator MZBegin(const double mz) const
154 {
155 return reference_->MZBegin(mz);
156 }
157
160 inline typename MSSpectrum::const_iterator MZEnd(const double mz) const
161 {
162 return reference_->MZEnd(mz);
163 }
164
167 inline typename MSSpectrum::const_iterator end() const
168 {
169 return reference_->end();
170 }
171
174 inline typename MSSpectrum::const_iterator begin() const
175 {
176 return reference_->begin();
177 }
178
179protected:
180
182 std::vector<float>* trans_intens_;
183
184 };
185
186
187
193 IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size = 0, const bool hr_data = false, const String intenstype = "ref");
194
196 virtual ~IsotopeWaveletTransform();
197
198
203 virtual void getTransform(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
204
209 virtual void getTransformHighRes(MSSpectrum& c_trans, const MSSpectrum& c_ref, const UInt c);
210
230 virtual void identifyCharge(const MSSpectrum& candidates, const MSSpectrum& ref, const UInt scan_index, const UInt c,
231 const double ampl_cutoff, const bool check_PPMs);
232
233 virtual void initializeScan(const MSSpectrum& c_ref, const UInt c = 0);
234
241 void updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
242 const UInt RT_votes_cutoff, const Int front_bound = -1, const Int end_bound = -1);
243
244
249 FeatureMap mapSeeds2Features(const PeakMap& map, const UInt RT_votes_cutoff);
250
252 virtual std::multimap<double, Box> getClosedBoxes()
253 { return closed_boxes_; }
254
255
260 inline double getLinearInterpolation(const typename MSSpectrum::const_iterator& left_iter, const double mz_pos, const typename MSSpectrum::const_iterator& right_iter)
261 {
262 return left_iter->getIntensity() + (right_iter->getIntensity() - left_iter->getIntensity()) / (right_iter->getMZ() - left_iter->getMZ()) * (mz_pos - left_iter->getMZ());
263 }
264
271 inline double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
272 {
273 return intens_a + (intens_b - intens_a) / (mz_b - mz_a) * (mz_pos - mz_a);
274 }
275
276 inline double getSigma() const
277 {
278 return sigma_;
279 }
280
281 inline void setSigma(const double sigma)
282 {
283 sigma_ = sigma;
284 }
285
286 virtual void computeMinSpacing(const MSSpectrum& c_ref);
287
288 inline double getMinSpacing() const
289 {
290 return min_spacing_;
291 }
292
293 inline Size getMaxScanSize() const
294 {
295 return max_scan_size_;
296 }
297
298protected:
299
300
304
305
306 inline void sampleTheCMarrWavelet_(const MSSpectrum& scan, const Int wavelet_length, const Int mz_index, const UInt charge);
307
308
315 virtual double scoreThis_(const TransSpectrum& candidate, const UInt peak_cutoff,
316 const double seed_mz, const UInt c, const double ampl_cutoff);
317
324 virtual double scoreThis_(const MSSpectrum& candidate, const UInt peak_cutoff,
325 const double seed_mz, const UInt c, const double ampl_cutoff);
326
327
335 virtual bool checkPositionForPlausibility_(const TransSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
336 const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
337
345 virtual bool checkPositionForPlausibility_(const MSSpectrum& candidate, const MSSpectrum& ref, const double seed_mz,
346 const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score);
347
348 virtual std::pair<double, double> checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c);
349
350
352 inline double getAvIntens_(const TransSpectrum& scan);
354 inline double getAvIntens_(const MSSpectrum& scan);
355
357 inline double getSdIntens_(const TransSpectrum& scan, const double mean);
359 inline double getSdIntens_(const MSSpectrum& scan, const double mean);
360
371 virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score,
372 const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens);
373
389 virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score,
390 const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end);
391
397 inline double getAvMZSpacing_(const MSSpectrum& scan);
398
399
404 void clusterSeeds_(const TransSpectrum& candidates, const MSSpectrum& ref,
405 const UInt scan_index, const UInt c, const bool check_PPMs);
406
411 virtual void clusterSeeds_(const MSSpectrum& candidates, const MSSpectrum& ref,
412 const UInt scan_index, const UInt c, const bool check_PPMs);
413
414
419 void extendBox_(const PeakMap& map, const Box& box);
420
423 inline double peptideMassRule_(const double c_mass) const
424 {
425 double correction_fac = c_mass / Constants::PEPTIDE_MASS_RULE_BOUND;
426 double old_frac_mass = c_mass - (Int)(c_mass);
427 double new_mass = ((Int)(c_mass)) * (1. + Constants::PEPTIDE_MASS_RULE_FACTOR) - (Int)(correction_fac);
428 double new_frac_mass = new_mass - (Int)(new_mass);
429
430 if (new_frac_mass - old_frac_mass > 0.5)
431 {
432 new_mass -= 1.;
433 }
434
435 if (new_frac_mass - old_frac_mass < -0.5)
436 {
437 new_mass += 1.;
438 }
439
440 return new_mass;
441 }
442
446 inline double getPPMs_(const double mass_a, const double mass_b) const
447 {
448 return fabs(mass_a - mass_b) / (0.5 * (mass_a + mass_b)) * 1e6;
449 }
450
451 //internally used data structures for the sweep line algorithm
452 std::multimap<double, Box> open_boxes_, closed_boxes_, end_boxes_, front_boxes_; //double = average m/z position
453 std::vector<std::multimap<double, Box> >* tmp_boxes_; //for each charge we need a separate container
454
456 std::vector<double> c_mzs_, c_spacings_, psi_, prod_, xs_;
457 std::vector<double> interpol_xs_, interpol_ys_;
458
464 std::vector<int> indices_;
465
467 std::vector<float> scores_, zeros_;
468 };
469
470 template <typename PeakType>
471 bool intensityComparator(const PeakType& a, const PeakType& b)
472 {
473 return a.getIntensity() > b.getIntensity();
474 }
475
476 template <typename PeakType>
478 {
479 return a.getIntensity() < b.getIntensity();
480 }
481
482 template <typename PeakType>
484 {
485 return a->getIntensity() > b->getIntensity();
486 }
487
488 template <typename PeakType>
489 bool positionComparator(const PeakType& a, const PeakType& b)
490 {
491 return a.getMZ() < b.getMZ();
492 }
493
494 template <typename PeakType>
496 {
497 tmp_boxes_ = new std::vector<std::multimap<double, Box> >(1);
498 av_MZ_spacing_ = 1;
499 max_scan_size_ = 0;
500 max_mz_cutoff_ = 3;
501 max_num_peaks_per_pattern_ = 3;
502 hr_data_ = false;
503 intenstype_ = "ref";
504 }
505
506 template <typename PeakType>
507 IsotopeWaveletTransform<PeakType>::IsotopeWaveletTransform(const double min_mz, const double max_mz, const UInt max_charge, const Size max_scan_size, const bool hr_data, String intenstype)
508 {
509 max_charge_ = max_charge;
510 max_scan_size_ = max_scan_size;
511 hr_data_ = hr_data;
512 intenstype_ = intenstype;
513 tmp_boxes_ = new std::vector<std::multimap<double, Box> >(max_charge);
514 if (max_scan_size <= 0) //only important for the CPU
515 {
516 IsotopeWavelet::init(max_mz, max_charge);
517 }
518
519 av_MZ_spacing_ = 1;
520 max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(max_mz, max_charge);
521 max_num_peaks_per_pattern_ = IsotopeWavelet::getNumPeakCutOff(max_mz, max_charge);
522
523 Int size_estimate((Int)ceil(max_scan_size_ / (max_mz - min_mz)));
524 Int to_reserve((Int)ceil(size_estimate * max_num_peaks_per_pattern_ * Constants::IW_NEUTRON_MASS));
525 psi_.reserve(to_reserve); //The wavelet
526 prod_.reserve(to_reserve);
527 xs_.reserve(to_reserve);
530 }
531
532 template <typename PeakType>
534 {
535 delete (tmp_boxes_);
536 }
537
538 template <typename PeakType>
540 {
541 Int spec_size((Int)c_ref.size());
542 //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
543 //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
544 UInt charge = c + 1;
545 double value, T_boundary_left, T_boundary_right, old, c_diff, current, old_pos, my_local_MZ, my_local_lambda, origin, c_mz;
546
547 for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
548 {
549 value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
550 old = 0; old_pos = (my_local_pos - from_max_to_left_ - 1 >= 0) ? c_ref[my_local_pos - from_max_to_left_ - 1].getMZ() : c_ref[0].getMZ() - min_spacing_;
551 my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
552 c_diff = 0;
553 origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
554
555 for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
556 {
557 if (current_conv_pos >= spec_size)
558 {
559 value += 0.5 * old * min_spacing_;
560 break;
561 }
562
563 c_mz = c_ref[current_conv_pos].getMZ();
564 c_diff = c_mz + origin;
565
566 //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
567 current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
568
569 value += 0.5 * (current + old) * (c_mz - old_pos);
570
571 old = current;
572 old_pos = c_mz;
573 }
574
575
576
577 c_trans[my_local_pos].setIntensity(value);
578 }
579 }
580
581 template <typename PeakType>
583 {
584 Int spec_size((Int)c_ref.size());
585 //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
586 //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
587 UInt charge = c + 1;
588 double value, T_boundary_left, T_boundary_right, c_diff, current, my_local_MZ, my_local_lambda, origin, c_mz;
589
590 for (Int my_local_pos = 0; my_local_pos < spec_size; ++my_local_pos)
591 {
592 value = 0; T_boundary_left = 0; T_boundary_right = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[my_local_pos].getMZ(), charge) / (double)charge;
593
594
595 my_local_MZ = c_ref[my_local_pos].getMZ(); my_local_lambda = IsotopeWavelet::getLambdaL(my_local_MZ * charge);
596 c_diff = 0;
597 origin = -my_local_MZ + Constants::IW_QUARTER_NEUTRON_MASS / (double)charge;
598
599 for (Int current_conv_pos = std::max(0, my_local_pos - from_max_to_left_); c_diff < T_boundary_right; ++current_conv_pos)
600 {
601 if (current_conv_pos >= spec_size)
602 {
603 break;
604 }
605
606 c_mz = c_ref[current_conv_pos].getMZ();
607 c_diff = c_mz + origin;
608
609 //Attention! The +1. has nothing to do with the charge, it is caused by the wavelet's formula (tz1).
610 current = c_diff > T_boundary_left && c_diff <= T_boundary_right ? IsotopeWavelet::getValueByLambda(my_local_lambda, c_diff * charge + 1.) * c_ref[current_conv_pos].getIntensity() : 0;
611
612 value += current;
613 }
614
615 c_trans[my_local_pos].setIntensity(value);
616 }
617 }
618
619 template <typename PeakType>
621 {
622 data_length_ = (UInt) c_ref.size();
623 computeMinSpacing(c_ref);
624 Int wavelet_length = 0, quarter_length = 0;
625
626 if (hr_data_) //We have to check this separately, because the simply estimation for LowRes data is destroyed by large gaps
627 {
628 UInt c_mz_cutoff;
629 typename MSSpectrum::const_iterator start_iter, end_iter;
630 for (UInt i = 0; i < data_length_; ++i)
631 {
632 c_mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[i].getMZ(), c + 1);
633 start_iter = c_ref.MZEnd(c_ref[i].getMZ());
634 end_iter = c_ref.MZBegin(c_ref[i].getMZ() + c_mz_cutoff);
635 wavelet_length = std::max((SignedSize) wavelet_length, distance(start_iter, end_iter) + 1);
636 end_iter = c_ref.MZEnd(c_ref[i].getMZ() - Constants::IW_QUARTER_NEUTRON_MASS / double(c + 1.));
637 quarter_length = std::max((SignedSize) quarter_length, distance(end_iter, start_iter) + 1);
638 }
639 }
640 else
641 {
642 //CHANGED
643 max_mz_cutoff_ = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_ref[data_length_ - 1].getMZ(), max_charge_);
644 wavelet_length = (UInt) ceil(max_mz_cutoff_ / min_spacing_);
645 }
646 //... done
647
648
649 if (wavelet_length > (Int) c_ref.size())
650 {
651 std::cout << "Warning: the extremal length of the wavelet is larger (" << wavelet_length << ") than the number of data points (" << c_ref.size() << "). This might (!) severely affect the transform." << std::endl;
652 std::cout << "Minimal spacing: " << min_spacing_ << std::endl;
653 std::cout << "Warning/Error generated at scan with RT " << c_ref.getRT() << "." << std::endl;
654 }
655
656 Int max_index = (UInt) (Constants::IW_QUARTER_NEUTRON_MASS / min_spacing_);
657 from_max_to_left_ = max_index;
658 from_max_to_right_ = wavelet_length - 1 - from_max_to_left_;
659 }
660
661 template <typename PeakType>
663 {
664 min_spacing_ = INT_MAX;
665 for (UInt c_conv_pos = 1; c_conv_pos < c_ref.size(); ++c_conv_pos)
666 {
667 min_spacing_ = std::min(min_spacing_, c_ref[c_conv_pos].getMZ() - c_ref[c_conv_pos - 1].getMZ());
668 }
669 }
670
671 template <typename PeakType>
673 const MSSpectrum& ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
674 {
675 Size scan_size(candidates.size());
677 typename MSSpectrum::const_iterator iter_start, iter_end, seed_iter, iter2;
678 double mz_cutoff, seed_mz, c_av_intens = 0, c_score = 0, c_sd_intens = 0, threshold = 0, help_mz, share, share_pos, bwd, fwd;
679 UInt MZ_start, MZ_end;
680
681 MSSpectrum diffed(candidates);
682 diffed[0].setIntensity(0); diffed[scan_size - 1].setIntensity(0);
683
684#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
685 std::stringstream stream;
686 stream << "diffed_" << ref.getRT() << "_" << c + 1 << ".trans\0";
687 std::ofstream ofile(stream.str().c_str());
688#endif
689
690 if (!hr_data_) //LowRes data
691 {
692 for (UInt i = 0; i < scan_size - 2; ++i)
693 {
694 share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
695 bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
696 fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
697
698 if (!(bwd >= 0 && fwd <= 0) || share > ref[i + 1].getIntensity())
699 {
700 diffed[i + 1].setIntensity(0);
701 }
702
703#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
704 ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
705#endif
706 }
707 }
708 else //HighRes data
709 {
710 for (UInt i = 0; i < scan_size - 2; ++i)
711 {
712 share = candidates[i + 1].getIntensity(); share_pos = candidates[i + 1].getMZ();
713 bwd = (share - candidates[i].getIntensity()) / (share_pos - candidates[i].getMZ());
714 fwd = (candidates[i + 2].getIntensity() - share) / (candidates[i + 2].getMZ() - share_pos);
715
716 if (!(bwd >= 0 && fwd <= 0))
717 {
718 diffed[i + 1].setIntensity(0);
719 }
720
721#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
722 ofile << diffed[i + 1].getMZ() << "\t" << diffed[i + 1].getIntensity() << std::endl;
723#endif
724 }
725 }
726#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
727 ofile.close();
728#endif
729
730 ConstRefVector<MSSpectrum > c_sorted_candidate(diffed.begin(), diffed.end());
731
732 //Sort the transform in descending order according to the intensities present in the transform
733 c_sorted_candidate.sortByIntensity();
734
735#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
736 std::stringstream stream2;
737 stream2 << "sorted_cpu_" << candidates.getRT() << "_" << c + 1 << ".trans\0";
738 std::ofstream ofile2(stream2.str().c_str());
739 for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
740 {
741 ofile2 << iter->getMZ() << "\t" << iter->getIntensity() << std::endl;
742 }
743 ofile2.close();
744#endif
745
746 std::vector<UInt> processed(scan_size, 0);
747
748 if (ampl_cutoff < 0)
749 {
750 threshold = 0;
751 }
752 else
753 {
754 c_av_intens = getAvIntens_(candidates);
755 c_sd_intens = getSdIntens_(candidates, c_av_intens);
756 threshold = ampl_cutoff * c_sd_intens + c_av_intens;
757 }
758
759 for (iter = c_sorted_candidate.end() - 1; iter != c_sorted_candidate.begin(); --iter)
760 {
761 if (iter->getIntensity() <= 0)
762 {
763 break;
764 }
765
766 seed_mz = iter->getMZ();
767 seed_iter = ref.MZBegin(seed_mz);
768
769 if (seed_iter == ref.end() || processed[distance(ref.begin(), seed_iter)])
770 {
771 continue;
772 }
773
774 mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(seed_mz, c + 1);
775 //Mark the region as processed
776 //Do not move this further down, since we have to mark this as processed in any case,
777 //even when score <=0; otherwise we would look around the maximum's position unless
778 //any significant point is found
779 iter_start = ref.MZBegin(ref.begin(), seed_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.), seed_iter);
780 iter_end = ref.MZEnd(seed_iter, seed_mz + mz_cutoff / (c + 1.), ref.end());
781 if (iter_end == ref.end())
782 {
783 --iter_end;
784 }
785
786 MZ_start = distance(ref.begin(), iter_start);
787 MZ_end = distance(ref.begin(), iter_end);
788
789 memset(&(processed[MZ_start]), 1, sizeof(UInt) * (MZ_end - MZ_start + 1));
790
791 c_score = scoreThis_(candidates, IsotopeWavelet::getNumPeakCutOff(seed_mz * (c + 1.)), seed_mz, c, threshold);
792
793#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
794 if (trunc(seed_mz) == 874)
795 std::cout << seed_mz << "\t" << c_score << std::endl;
796#endif
797
798 if (c_score <= 0 && c_score != -1000)
799 {
800 continue;
801 }
802
803 //Push the seed into its corresponding box (or create a new one, if necessary)
804 //Do ***NOT*** move this further down!
805 push2TmpBox_(seed_mz, scan_index, c, c_score, iter->getIntensity(), ref.getRT(), MZ_start, MZ_end);
806
807 help_mz = seed_mz - Constants::IW_NEUTRON_MASS / (c + 1.);
808 iter2 = candidates.MZBegin(help_mz);
809 if (iter2 == candidates.end() || iter2 == candidates.begin())
810 {
811 continue;
812 }
813
814 if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
815 {
816 //In the other case, we are too close to the peak, leading to incorrect derivatives.
817 if (iter2 != candidates.end())
818 {
819 push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
820 }
821 }
822
823 help_mz = seed_mz + Constants::IW_NEUTRON_MASS / (c + 1.);
824 iter2 = candidates.MZBegin(help_mz);
825 if (iter2 == candidates.end() || iter2 == candidates.begin())
826 {
827 continue;
828 }
829
830 if (fabs(iter2->getMZ() - seed_mz) > 0.5 * Constants::IW_NEUTRON_MASS / (c + 1.))
831 {
832 //In the other case, we are too close to the peak, leading to incorrect derivatives.
833 if (iter2 != candidates.end())
834 {
835 push2TmpBox_(iter2->getMZ(), scan_index, c, 0, getLinearInterpolation(iter2 - 1, help_mz, iter2), ref.getRT(), MZ_start, MZ_end);
836 }
837 }
838 }
839
840 clusterSeeds_(candidates, ref, scan_index, c, check_PPMs);
841 }
842
843 template <typename PeakType>
845 const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
846 {
847 double c_score = 0, c_val;
848 typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
849 Int signal_size((Int)candidate.size());
850 //in the very unlikely case that size_t will not fit to int anymore this will be a problem of course
851 //for the sake of simplicity (we need here a signed int) we do not cast at every following comparison individually
852
853 //p_h_ind indicates if we are looking for a whole or a peak
854 Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
855
856 std::vector<double> positions(end);
857 for (Int i = 0; i < end; ++i)
858 {
859 positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
860 }
861
862 double l_score = 0, mid_val = 0;
863 Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
864 for (Int v = 1; v <= end; ++v, ++p_h_ind)
865 {
866 do
867 {
868 if (start_index < signal_size - 1)
869 ++start_index;
870 else
871 break;
872 }
873 while (candidate[start_index].getMZ() < positions[v - 1]);
874
875 if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
876 {
877 continue;
878 }
879
880 c_left_iter2 = candidate.begin() + start_index - 1;
881 c_right_iter2 = c_left_iter2 + 1;
882
883 c_val = c_left_iter2->getIntensity() + (c_right_iter2->getIntensity() - c_left_iter2->getIntensity()) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
884
885 if (v == (int)(ceil(end / 2.)))
886 {
887 l_score = c_score;
888 mid_val = c_val;
889 }
890
891 if (p_h_ind % 2 == 1) //I.e. a whole
892 {
893 c_score -= c_val;
894#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
895 if (trunc(seed_mz) == 874)
896 std::cout << -c_val << std::endl;
897#endif
898 }
899 else
900 {
901 c_score += c_val;
902#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
903 if (trunc(seed_mz) == 874)
904 std::cout << c_val << std::endl;
905#endif
906 }
907
908
909 start_index = distance(candidate.begin(), c_left_iter2);
910 }
911
912#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
913 std::ofstream ofile_score("scores.dat", ios::app);
914 std::ofstream ofile_check_score("check_scores.dat", ios::app);
915 ofile_score.close();
916 ofile_check_score.close();
917#endif
918
919#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
920 if (trunc(seed_mz) == 874)
921 std::cout << "final_score: " << seed_mz << "\t" << c_score << "\t l_score: " << l_score << "\t" << c_score - l_score - mid_val << "\t" << c_score - mid_val << "\t" << ampl_cutoff << std::endl;
922#endif
923
924 if (c_score - mid_val <= 0)
925 {
926 return 0;
927 }
928
929 if (c_score - mid_val <= ampl_cutoff)
930 {
931 return -1000;
932 }
933
934 if (l_score <= 0 || c_score - l_score - mid_val <= 0)
935 {
936 return 0;
937 }
938
939 return c_score;
940 }
941
942 template <typename PeakType>
944 const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
945 {
946 double c_score = 0, c_val;
947 typename MSSpectrum::const_iterator c_left_iter2, c_right_iter2;
948 Int signal_size((Int)candidate.size());
949
950 //p_h_ind indicates if we are looking for a whole or a peak
951 Int p_h_ind = 1, end = 4 * (peak_cutoff - 1) - 1; //4 times and not 2 times, since we move by 0.5 m/z entities
952
953 std::vector<double> positions(end);
954 for (Int i = 0; i < end; ++i)
955 {
956 positions[i] = seed_mz - ((peak_cutoff - 1) * Constants::IW_NEUTRON_MASS - (i + 1) * Constants::IW_HALF_NEUTRON_MASS) / ((double)c + 1);
957 }
958
959 double l_score = 0, mid_val = 0;
960 Int start_index = distance(candidate.begin(), candidate.MZBegin(positions[0])) - 1;
961 for (Int v = 1; v <= end; ++v, ++p_h_ind)
962 {
963 do
964 {
965 if (start_index < signal_size - 1)
966 ++start_index;
967 else
968 break;
969 }
970 while (candidate.getMZ(start_index) < positions[v - 1]);
971
972 if (start_index <= 0 || start_index >= signal_size - 1) //unable to interpolate
973 {
974 continue;
975 }
976
977 c_left_iter2 = candidate.begin() + start_index - 1;
978 c_right_iter2 = c_left_iter2 + 1;
979
980 c_val = candidate.getTransIntensity(start_index - 1) + (candidate.getTransIntensity(start_index) - candidate.getTransIntensity(start_index - 1)) / (c_right_iter2->getMZ() - c_left_iter2->getMZ()) * (positions[v - 1] - c_left_iter2->getMZ());
981 if (v == (int)(ceil(end / 2.)))
982 {
983 l_score = c_score;
984 mid_val = c_val;
985 }
986
987 if (p_h_ind % 2 == 1) //I.e. a whole
988 {
989 c_score -= c_val;
990 }
991 else
992 {
993 c_score += c_val;
994 }
995
996 start_index = distance(candidate.begin(), c_left_iter2);
997 }
998
999#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1000 std::ofstream ofile_score("scores.dat", ios::app);
1001 std::ofstream ofile_check_score("check_scores.dat", ios::app);
1002 ofile_score << c_check_point << "\t" << c_score << std::endl;
1003 ofile_score.close();
1004 ofile_check_score.close();
1005#endif
1006
1007 if (l_score <= 0 || c_score - l_score - mid_val <= 0 || c_score - mid_val <= ampl_cutoff)
1008 {
1009 return 0;
1010 }
1011
1012 return c_score;
1013 }
1014
1015 template <typename PeakType>
1017 const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1018 {
1019 typename std::multimap<double, Box>::iterator iter;
1020 typename Box::iterator box_iter;
1021 std::vector<BoxElement> final_box;
1022 double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1023 double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1024
1025 for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1026 {
1027
1028 Box& c_box = iter->second;
1029 av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1030 virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1031
1032 //Now, let's get the RT boundaries for the box
1033 for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1034 {
1035 if (box_iter->second.score == 0) //virtual helping point
1036 {
1037 if (count != 0)
1038 continue; //it is in any way not pure virtual
1039
1040 c_mz = box_iter->second.mz;
1041 virtual_av_intens += box_iter->second.intens;
1042 virtual_av_abs_intens += fabs(box_iter->second.intens);
1043 virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1044 ++virtual_count;
1045 }
1046 else
1047 {
1048 c_mz = box_iter->second.mz;
1049 av_score += box_iter->second.score;
1050 av_intens += box_iter->second.intens;
1051 av_abs_intens += fabs(box_iter->second.intens);
1052 av_mz += c_mz * fabs(box_iter->second.intens);
1053 ++count;
1054 }
1055 }
1056
1057 if (count == 0) //pure virtual helping box
1058 {
1059 av_intens = virtual_av_intens / virtual_count;
1060 av_score = 0;
1061 av_mz = virtual_av_mz / virtual_av_abs_intens;
1062 }
1063 else
1064 {
1065 av_intens /= count;
1066 av_score /= count;
1067 av_mz /= av_abs_intens;
1068 }
1069
1070 BoxElement c_box_element;
1071 c_box_element.mz = av_mz;
1072 c_box_element.c = c;
1073 c_box_element.score = av_score;
1074 c_box_element.intens = av_intens;
1075
1076 c_box_element.RT = c_box.begin()->second.RT;
1077 final_box.push_back(c_box_element);
1078 }
1079
1080 Size num_o_feature = final_box.size();
1081 if (num_o_feature == 0)
1082 {
1083 tmp_boxes_->at(c).clear();
1084 return;
1085 }
1086
1087 //Computing the derivatives
1088 std::vector<double> bwd_diffs(num_o_feature, 0);
1089
1090 bwd_diffs[0] = 0;
1091 for (Size i = 1; i < num_o_feature; ++i)
1092 {
1093 bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1094 }
1095
1096#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1097 std::ofstream ofile_bwd("bwd_cpu.dat");
1098 for (Size i = 0; i < num_o_feature; ++i)
1099 {
1100 ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1101 }
1102 ofile_bwd.close();
1103#endif
1104
1105
1106 for (Size i = 0; i < num_o_feature - 1; ++i)
1107 {
1108 while (i < num_o_feature - 2)
1109 {
1110 if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1111 break;
1112 ++i;
1113 }
1114
1115 if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1116 {
1117 checkPositionForPlausibility_(candidate, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1118 continue;
1119 }
1120 }
1121
1122 tmp_boxes_->at(c).clear();
1123 }
1124
1125 template <typename PeakType>
1127 {
1128 double av_intens = 0;
1129 for (UInt i = 0; i < scan.size(); ++i)
1130 {
1131 if (scan[i].getIntensity() >= 0)
1132 {
1133 av_intens += scan[i].getIntensity();
1134 }
1135 }
1136 return av_intens / (double)scan.size();
1137 }
1138
1139 template <typename PeakType>
1141 {
1142 double res = 0, intens;
1143 for (UInt i = 0; i < scan.size(); ++i)
1144 {
1145 if (scan[i].getIntensity() >= 0)
1146 {
1147 intens = scan[i].getIntensity();
1148 res += (intens - mean) * (intens - mean);
1149 }
1150 }
1151 return sqrt(res / (double)(scan.size() - 1));
1152 }
1153
1154 template <typename PeakType>
1155 double IsotopeWaveletTransform<PeakType>::getAvMZSpacing_(const MSSpectrum& scan) //, Int start_index, Int end_index)
1156 {
1157 std::vector<double> diffs(scan.size() - 1, 0);
1158 for (UInt i = 0; i < scan.size() - 1; ++i)
1159 {
1160 diffs[i] = scan[i + 1].getMZ() - scan[i].getMZ();
1161 }
1162
1163 sort(diffs.begin(), diffs.end());
1164 double av_MZ_spacing = 0;
1165 for (UInt i = 0; i < diffs.size() / 2; ++i)
1166 {
1167 av_MZ_spacing += diffs[i];
1168 }
1169
1170 return av_MZ_spacing / (diffs.size() / 2);
1171 }
1172
1173 template <typename PeakType>
1175 {
1176 double av_intens = 0;
1177 for (UInt i = 0; i < scan.size(); ++i)
1178 {
1179 if (scan.getTransIntensity(i) >= 0)
1180 {
1181 av_intens += scan.getTransIntensity(i);
1182 }
1183 }
1184 return av_intens / (double)scan.size();
1185 }
1186
1187 template <typename PeakType>
1189 {
1190 double res = 0, intens;
1191 for (UInt i = 0; i < scan.size(); ++i)
1192 {
1193 if (scan.getTransIntensity(i) >= 0)
1194 {
1195 intens = scan.getTransIntensity(i);
1196 res += (intens - mean) * (intens - mean);
1197 }
1198 }
1199 return sqrt(res / (double)(scan.size() - 1));
1200 }
1201
1202 template <typename PeakType>
1203 void IsotopeWaveletTransform<PeakType>::push2Box_(const double mz, const UInt scan, UInt c,
1204 const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, double ref_intens)
1205 {
1206 const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1207
1208 typename std::multimap<double, Box>::iterator upper_iter(open_boxes_.upper_bound(mz));
1209 typename std::multimap<double, Box>::iterator lower_iter(open_boxes_.lower_bound(mz));
1210
1211 if (lower_iter != open_boxes_.end())
1212 {
1213 //Ugly, but necessary due to the implementation of STL lower_bound
1214 if (mz != lower_iter->first && lower_iter != open_boxes_.begin())
1215 {
1216 --lower_iter;
1217 }
1218 }
1219
1220 typename std::multimap<double, Box>::iterator insert_iter;
1221 bool create_new_box = true;
1222 if (lower_iter == open_boxes_.end()) //I.e. there is no open Box for that mz position
1223 {
1224 //There is another special case to be considered here:
1225 //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1226 //then the lower bound for the new mz value is box.end and this would usually force a new entry
1227 if (!open_boxes_.empty())
1228 {
1229 if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1230 {
1231 create_new_box = false;
1232 insert_iter = lower_iter;
1233 }
1234 }
1235 else
1236 {
1237 create_new_box = true;
1238 }
1239 }
1240 else
1241 {
1242 if (upper_iter == open_boxes_.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1243 {
1244 insert_iter = lower_iter;
1245 create_new_box = false;
1246 }
1247 else
1248 {
1249 create_new_box = true;
1250 }
1251 }
1252
1253
1254 if (upper_iter != open_boxes_.end() && lower_iter != open_boxes_.end())
1255 {
1256 //Here is the question if you should figure out the smallest charge .... and then
1257
1258 //Figure out which entry is closer to m/z
1259 double dist_lower = fabs(lower_iter->first - mz);
1260 double dist_upper = fabs(upper_iter->first - mz);
1261 dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1262 dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1263
1264 if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1265 {
1266 create_new_box = true;
1267 }
1268 else
1269 {
1270 insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1271 create_new_box = false;
1272 }
1273 }
1274
1275 BoxElement element;
1276 element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = ref_intens;
1277 element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1278
1279
1280 if (create_new_box == false)
1281 {
1282 std::pair<UInt, BoxElement> help2(scan, element);
1283 insert_iter->second.insert(help2);
1284
1285 //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1286 Box replacement(insert_iter->second);
1287
1288 //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1289 //Also note that we already inserted the new entry, leading to size-1.
1290 double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1291 c_mz /= ((double) insert_iter->second.size());
1292
1293 //Now let's remove the old and insert the new one
1294 open_boxes_.erase(insert_iter);
1295 std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1296 open_boxes_.insert(help3);
1297 }
1298 else
1299 {
1300 std::pair<UInt, BoxElement> help2(scan, element);
1301 std::multimap<UInt, BoxElement> help3;
1302 help3.insert(help2);
1303 std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1304 open_boxes_.insert(help4);
1305 }
1306 }
1307
1308 template <typename PeakType>
1310 const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
1311 {
1312 const double dist_constraint(Constants::IW_HALF_NEUTRON_MASS / (double)max_charge_);
1313
1314 std::multimap<double, Box>& tmp_box(tmp_boxes_->at(c));
1315 typename std::multimap<double, Box>::iterator upper_iter(tmp_box.upper_bound(mz));
1316 typename std::multimap<double, Box>::iterator lower_iter(tmp_box.lower_bound(mz));
1317
1318 if (lower_iter != tmp_box.end())
1319 {
1320 //Ugly, but necessary due to the implementation of STL lower_bound
1321 if (mz != lower_iter->first && lower_iter != tmp_box.begin())
1322 {
1323 --lower_iter;
1324 }
1325 }
1326
1327 typename std::multimap<double, Box>::iterator insert_iter;
1328 bool create_new_box = true;
1329 if (lower_iter == tmp_box.end()) //I.e. there is no tmp Box for that mz position
1330 {
1331 //There is another special case to be considered here:
1332 //Assume that the current box contains only a single element that is (slightly) smaller than the new mz value,
1333 //then the lower bound for the new mz value is box.end and this would usually force a new entry
1334 if (!tmp_box.empty())
1335 {
1336 if (fabs((--lower_iter)->first - mz) < dist_constraint) //matching box
1337 {
1338 create_new_box = false;
1339 insert_iter = lower_iter;
1340 }
1341 }
1342 else
1343 {
1344 create_new_box = true;
1345 }
1346 }
1347 else
1348 {
1349 if (upper_iter == tmp_box.end() && fabs(lower_iter->first - mz) < dist_constraint) //Found matching Box
1350 {
1351 insert_iter = lower_iter;
1352 create_new_box = false;
1353 }
1354 else
1355 {
1356 create_new_box = true;
1357 }
1358 }
1359
1360
1361 if (upper_iter != tmp_box.end() && lower_iter != tmp_box.end())
1362 {
1363 //Figure out which entry is closer to m/z
1364 double dist_lower = fabs(lower_iter->first - mz);
1365 double dist_upper = fabs(upper_iter->first - mz);
1366 dist_lower = (dist_lower < dist_constraint) ? dist_lower : INT_MAX;
1367 dist_upper = (dist_upper < dist_constraint) ? dist_upper : INT_MAX;
1368
1369 if (dist_lower >= dist_constraint && dist_upper >= dist_constraint) // they are both too far away
1370 {
1371 create_new_box = true;
1372 }
1373 else
1374 {
1375 insert_iter = (dist_lower < dist_upper) ? lower_iter : upper_iter;
1376 create_new_box = false;
1377 }
1378 }
1379
1380 BoxElement element;
1381 element.c = c; element.mz = mz; element.score = score; element.RT = rt; element.intens = intens; element.ref_intens = -1000;
1382 element.RT_index = scan; element.MZ_begin = MZ_begin; element.MZ_end = MZ_end;
1383
1384 if (create_new_box == false)
1385 {
1386 std::pair<UInt, BoxElement> help2(scan, element);
1387 insert_iter->second.insert(help2);
1388
1389 //Unfortunately, we need to change the m/z key to the average of all keys inserted in that box.
1390 Box replacement(insert_iter->second);
1391
1392 //We cannot divide both m/z by 2, since we already inserted some m/zs whose weight would be lowered.
1393 //Also note that we already inserted the new entry, leading to size-1.
1394 double c_mz = insert_iter->first * (insert_iter->second.size() - 1) + mz;
1395 c_mz /= ((double) insert_iter->second.size());
1396
1397 //Now let's remove the old and insert the new one
1398 tmp_box.erase(insert_iter);
1399 std::pair<double, std::multimap<UInt, BoxElement> > help3(c_mz, replacement);
1400 tmp_box.insert(help3);
1401 }
1402 else
1403 {
1404 std::pair<UInt, BoxElement> help2(scan, element);
1405 std::multimap<UInt, BoxElement> help3;
1406 help3.insert(help2);
1407
1408 std::pair<double, std::multimap<UInt, BoxElement> > help4(mz, help3);
1409 tmp_box.insert(help4);
1410 }
1411 }
1412
1413 template <typename PeakType>
1414 void IsotopeWaveletTransform<PeakType>::updateBoxStates(const PeakMap& map, const Size scan_index, const UInt RT_interleave,
1415 const UInt RT_votes_cutoff, const Int front_bound, const Int end_bound)
1416 {
1417 typename std::multimap<double, Box>::iterator iter, iter2;
1418
1419 if ((Int)scan_index == end_bound && end_bound != (Int)map.size() - 1)
1420 {
1421 for (iter = open_boxes_.begin(); iter != open_boxes_.end(); ++iter)
1422 {
1423#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1424 std::cout << "LOW THREAD insert in end_box " << iter->first << std::endl;
1425 typename Box::iterator dings;
1426 for (dings = iter->second.begin(); dings != iter->second.end(); ++dings)
1427 std::cout << map[dings->first].getRT() << "\t" << dings->second.c + 1 << std::endl;
1428#endif
1429 end_boxes_.insert(*iter);
1430 }
1431 open_boxes_.clear();
1432 return;
1433 }
1434
1435 for (iter = open_boxes_.begin(); iter != open_boxes_.end(); )
1436 {
1437#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1438 if (front_bound > 0)
1439 {
1440 std::cout << "HIGH THREAD open box. " << iter->first << "\t current scan index " << scan_index << "\t" << ((iter->second.begin()))->first << "\t of last scan " << map.size() - 1 << "\t" << front_bound << std::endl;
1441 }
1442#endif
1443
1444 //For each Box we need to figure out, if and when the last RT value has been inserted
1445 UInt lastScan = (--(iter->second.end()))->first;
1446 if (scan_index - lastScan > RT_interleave + 1 || scan_index == map.size() - 1) //I.e. close the box!
1447 {
1448 if (iter->second.begin()->first - front_bound <= RT_interleave + 1 && front_bound > 0)
1449 {
1450 iter2 = iter;
1451 ++iter2;
1452#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1453 std::cout << "HIGH THREAD insert in front_box " << iter->first << std::endl;
1454#endif
1455 front_boxes_.insert(*iter);
1456 open_boxes_.erase(iter);
1457 iter = iter2;
1458 continue;
1459 }
1460
1461 iter2 = iter;
1462 ++iter2;
1463 //Please do **NOT** simplify the upcoming lines.
1464 //The 'obvious' overhead is necessary since the object represented by iter might be erased
1465 //by push2Box which might be called by extendBox_.
1466 if (iter->second.size() >= RT_votes_cutoff)
1467 {
1468 //extendBox_ (map, iter->second);
1469 iter = iter2;
1470 closed_boxes_.insert(*(--iter));
1471 }
1472 open_boxes_.erase(iter);
1473 iter = iter2;
1474 }
1475 else
1476 {
1477 ++iter;
1478 }
1479 }
1480 }
1481
1482 template <typename PeakType>
1484 {
1485#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1486 std::cout << "**** CHECKING FOR BOX EXTENSIONS ****" << std::endl;
1487#endif
1488
1489 //Determining the elution profile
1490 typename Box::const_iterator iter;
1491 std::vector<double> elution_profile(box.size());
1492 UInt index = 0;
1493 for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1494 {
1495 for (Size i = iter->second.MZ_begin; i != iter->second.MZ_end; ++i)
1496 {
1497 elution_profile[index] += map[iter->second.RT_index][i].getIntensity();
1498 }
1499 elution_profile[index] /= iter->second.MZ_end - iter->second.MZ_begin + 1.;
1500 }
1501
1502 double max = 0;
1503 Int max_index = INT_MIN;
1504 for (Size i = 0; i < elution_profile.size(); ++i)
1505 {
1506 if (elution_profile[i] > max)
1507 {
1508 max_index = i;
1509 max = elution_profile[i];
1510 }
1511 }
1512
1513 Int max_extension = (Int)(elution_profile.size()) - 2 * max_index;
1514
1515 double av_elution = 0;
1516 for (Size i = 0; i < elution_profile.size(); ++i)
1517 {
1518 av_elution += elution_profile[i];
1519 }
1520 av_elution /= (double)elution_profile.size();
1521
1522 double sd_elution = 0;
1523 for (Size i = 0; i < elution_profile.size(); ++i)
1524 {
1525 sd_elution += (av_elution - elution_profile[i]) * (av_elution - elution_profile[i]);
1526 }
1527 sd_elution /= (double)(elution_profile.size() - 1);
1528 sd_elution = sqrt(sd_elution);
1529
1530 //Determine average m/z monoisotopic pos
1531 double av_mz = 0;
1532 for (iter = box.begin(); iter != box.end(); ++iter, ++index)
1533 {
1534 av_mz += iter->second.mz;
1535#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1536 std::cout << iter->second.RT << "\t" << iter->second.mz << "\t" << iter->second.c + 1 << std::endl;
1537#endif
1538 }
1539 av_mz /= (double)box.size();
1540
1541
1542 //Boundary check
1543 if ((Int)(box.begin()->second.RT_index) - 1 < 0)
1544 {
1545 return;
1546 }
1547
1548 UInt pre_index = box.begin()->second.RT_index - 1;
1549 typename MSSpectrum::const_iterator c_iter = map[pre_index].MZBegin(av_mz);
1550 double pre_elution = 0;
1551
1552 double mz_start = map[pre_index + 1][box.begin()->second.MZ_begin].getMZ();
1553 double mz_end = map[pre_index + 1][box.begin()->second.MZ_end].getMZ();
1554
1555 typename MSSpectrum::const_iterator mz_start_iter = map[pre_index].MZBegin(mz_start), mz_end_iter = map[pre_index].MZBegin(mz_end);
1556 for (typename MSSpectrum::const_iterator mz_iter = mz_start_iter; mz_iter != mz_end_iter; ++mz_iter)
1557 {
1558 pre_elution += mz_iter->getIntensity();
1559 }
1560
1561
1562 //Do we need to extend at all?
1563 if (pre_elution <= av_elution - 2 * sd_elution)
1564 {
1565 return;
1566 }
1567
1568 Int first_index = box.begin()->second.RT_index;
1569 for (Int i = 1; i < max_extension; ++i)
1570 {
1571 Int c_index = first_index - i;
1572 if (c_index < 0)
1573 {
1574 break;
1575 }
1576
1577 //CHECK Majority vote for charge???????????????
1578#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1579 std::cout << box.begin()->second.RT << "\t" << av_mz << "\t" << box.begin()->second.c + 1 << "\t" << " extending the box " << std::endl;
1580#endif
1581
1582 push2Box_(av_mz, c_index, box.begin()->second.c, box.begin()->second.score, c_iter->getIntensity(),
1583 map[c_index].getRT(), box.begin()->second.MZ_begin, box.begin()->second.MZ_end);
1584 }
1585 }
1586
1587 template <typename PeakType>
1589 const MSSpectrum& ref, const UInt scan_index, const UInt c, const bool check_PPMs)
1590 {
1591 typename std::multimap<double, Box>::iterator iter;
1592 typename Box::iterator box_iter;
1593 std::vector<BoxElement> final_box;
1594 double c_mz, av_score = 0, av_mz = 0, av_intens = 0, av_abs_intens = 0, count = 0;
1595 double virtual_av_mz = 0, virtual_av_intens = 0, virtual_av_abs_intens = 0, virtual_count = 0;
1596
1597 for (iter = tmp_boxes_->at(c).begin(); iter != tmp_boxes_->at(c).end(); ++iter)
1598 {
1599 Box& c_box = iter->second;
1600 av_score = 0; av_mz = 0; av_intens = 0; av_abs_intens = 0; count = 0;
1601 virtual_av_mz = 0; virtual_av_intens = 0; virtual_av_abs_intens = 0; virtual_count = 0;
1602
1603 for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1604 {
1605 if (box_iter->second.score == 0) //virtual helping point
1606 {
1607 if (count != 0)
1608 continue; //it is in any way not pure virtual
1609
1610 c_mz = box_iter->second.mz;
1611 virtual_av_intens += box_iter->second.intens;
1612 virtual_av_abs_intens += fabs(box_iter->second.intens);
1613 virtual_av_mz += c_mz * fabs(box_iter->second.intens);
1614 ++virtual_count;
1615 }
1616 else
1617 {
1618 c_mz = box_iter->second.mz;
1619 av_score += box_iter->second.score;
1620 av_intens += box_iter->second.intens;
1621 av_abs_intens += fabs(box_iter->second.intens);
1622 av_mz += c_mz * fabs(box_iter->second.intens);
1623 ++count;
1624 }
1625 }
1626
1627 if (count == 0) //pure virtual helping box
1628 {
1629 av_intens = virtual_av_intens / virtual_count;
1630 av_score = 0;
1631 av_mz = virtual_av_mz / virtual_av_abs_intens;
1632 }
1633 else
1634 {
1635 av_intens /= count;
1636 av_score /= count;
1637 av_mz /= av_abs_intens;
1638 }
1639
1640 BoxElement c_box_element;
1641 c_box_element.mz = av_mz;
1642 c_box_element.c = c;
1643 c_box_element.score = av_score;
1644 c_box_element.intens = av_intens;
1645
1646 c_box_element.RT = c_box.begin()->second.RT;
1647
1648 final_box.push_back(c_box_element);
1649 }
1650
1651 UInt num_o_feature = final_box.size();
1652 if (num_o_feature == 0)
1653 {
1654 tmp_boxes_->at(c).clear();
1655 return;
1656 }
1657
1658 //Computing the derivatives
1659 std::vector<double> bwd_diffs(num_o_feature, 0);
1660
1661 bwd_diffs[0] = 0;
1662 for (UInt i = 1; i < num_o_feature; ++i)
1663 {
1664 bwd_diffs[i] = (final_box[i].intens - final_box[i - 1].intens) / (final_box[i].mz - final_box[i - 1].mz);
1665 }
1666
1667#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1668 std::ofstream ofile_bwd("bwd_gpu.dat");
1669 for (UInt i = 0; i < num_o_feature; ++i)
1670 {
1671 ofile_bwd << final_box[i].mz << "\t" << bwd_diffs[i] << std::endl;
1672 }
1673 ofile_bwd.close();
1674#endif
1675
1676 for (UInt i = 0; i < num_o_feature - 1; ++i)
1677 {
1678 while (i < num_o_feature - 2)
1679 {
1680 if (final_box[i].score > 0 || final_box[i].score == -1000) //this has been an helping point
1681 break;
1682 ++i;
1683 }
1684
1685 if (bwd_diffs[i] > 0 && bwd_diffs[i + 1] < 0)
1686 {
1687 checkPositionForPlausibility_(candidates, ref, final_box[i].mz, final_box[i].c, scan_index, check_PPMs, final_box[i].intens, final_box[i].score);
1688 continue;
1689 }
1690 }
1691
1692 tmp_boxes_->at(c).clear();
1693 }
1694
1695 template <typename PeakType>
1697 {
1698 FeatureMap feature_map;
1699 typename std::multimap<double, Box>::iterator iter;
1700 typename Box::iterator box_iter;
1701 UInt best_charge_index; double best_charge_score, c_mz, c_RT; UInt c_charge;
1702 double av_intens = 0, av_ref_intens = 0, av_score = 0, av_mz = 0, av_RT = 0, mz_cutoff, sum_of_ref_intenses_g;
1703
1704 for (iter = closed_boxes_.begin(); iter != closed_boxes_.end(); ++iter)
1705 {
1706 sum_of_ref_intenses_g = 0;
1707 Box& c_box = iter->second;
1708 std::vector<double> charge_votes(max_charge_, 0), charge_binary_votes(max_charge_, 0);
1709 bool restart = false;
1710
1711 //Let's first determine the charge
1712 //Therefore, we can use two types of votes: qualitative ones (charge_binary_votes) or quantitative ones (charge_votes)
1713 for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1714 {
1715#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1716 if (trunc(box_iter->second.mz) == 874)
1717 std::cout << box_iter->second.c << "\t" << box_iter->second.intens << "\t" << box_iter->second.score << std::endl;
1718#endif
1719
1720 if (box_iter->second.score == -1000)
1721 {
1722 restart = true;
1723 break;
1724 }
1725
1726 charge_votes[box_iter->second.c] += box_iter->second.intens; //score; Do not use score, can get problematic for charge state 2 vs 4
1727 ++charge_binary_votes[box_iter->second.c];
1728 }
1729
1730 if (restart)
1731 {
1732 continue;
1733 }
1734
1735 //... determining the best fitting charge
1736 best_charge_index = 0; best_charge_score = 0;
1737 for (UInt i = 0; i < max_charge_; ++i)
1738 {
1739 if (charge_votes[i] > best_charge_score)
1740 {
1741 best_charge_index = i;
1742 best_charge_score = charge_votes[i];
1743 }
1744 }
1745
1746 //Pattern found in too few RT scan
1747 if (charge_binary_votes[best_charge_index] < RT_votes_cutoff && RT_votes_cutoff <= map.size())
1748 {
1749 continue;
1750 }
1751
1752 c_charge = best_charge_index + 1; //that's the finally predicted charge state for the pattern
1753
1754 av_intens = 0; av_ref_intens = 0; av_score = 0; av_mz = 0; av_RT = 0;
1755 //Now, let's get the RT boundaries for the box
1756 std::vector<DPosition<2> > point_set;
1757 double sum_of_ref_intenses_l;
1758 for (box_iter = c_box.begin(); box_iter != c_box.end(); ++box_iter)
1759 {
1760 sum_of_ref_intenses_l = 0;
1761 c_mz = box_iter->second.mz;
1762 c_RT = box_iter->second.RT;
1763
1764 mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(c_mz, c_charge);
1765
1766 point_set.push_back(DPosition<2>(c_RT, c_mz - Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge));
1767 //-1 since we are already at the first peak and +0.75, since this includes the last peak of the wavelet as a whole
1768 point_set.push_back(DPosition<2>(c_RT, c_mz + mz_cutoff / (double)c_charge));
1769
1770#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1771 std::cout << "Intenstype: " << intenstype_ << std::endl;
1772#endif
1773 if (intenstype_ == "ref")
1774 {
1775 //Find monoisotopic max
1776 const MSSpectrum& c_spec(map[box_iter->second.RT_index]);
1777 //'Correct' possible shift
1778 for (unsigned int i = 0; i < mz_cutoff; ++i)
1779 {
1780 typename MSSpectrum::const_iterator h_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge + Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge), hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1781
1782 hc_iter = c_spec.MZBegin(c_mz + i * Constants::IW_NEUTRON_MASS / c_charge);
1783
1784 while (h_iter != c_spec.begin())
1785 {
1786
1787#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1788 if (trunc(c_mz) == 874)
1789 {
1790 std::cout << "cmz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << h_iter->getMZ() << "\t" << h_iter->getIntensity() << std::endl;
1791 }
1792#endif
1793
1794 --h_iter;
1795 if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1796 {
1797 hc_iter = h_iter;
1798 }
1799
1800 if (c_mz + i * Constants::IW_NEUTRON_MASS / c_charge - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (double)c_charge)
1801 {
1802 break;
1803 }
1804 }
1805#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1806 if (trunc(c_mz) == 874)
1807 {
1808 std::cout << "c_mz: " << c_mz + i * Constants::IW_NEUTRON_MASS / c_charge << "\t" << hc_iter->getMZ() << "\t" << hc_iter->getIntensity() << "\t" << i * Constants::IW_NEUTRON_MASS / c_charge << "\t";
1809 }
1810#endif
1811 sum_of_ref_intenses_l += hc_iter->getIntensity();
1812#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1813 if (trunc(c_mz) == 874)
1814 {
1815 std::cout << sum_of_ref_intenses_l << "********" << std::endl;
1816 }
1817#endif
1818 }
1819 }
1820
1821 if (best_charge_index == box_iter->second.c)
1822 {
1823 av_score += box_iter->second.score;
1824 av_intens += box_iter->second.intens;
1825 av_ref_intens += box_iter->second.ref_intens;
1826 sum_of_ref_intenses_g += sum_of_ref_intenses_l;
1827 av_mz += c_mz * box_iter->second.intens;
1828 }
1829 av_RT += c_RT;
1830 }
1831
1832 av_mz /= av_intens;
1833 av_ref_intens /= (double)charge_binary_votes[best_charge_index];
1834 av_score /= (double)charge_binary_votes[best_charge_index];
1835 av_RT /= (double)c_box.size();
1836
1837#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1838 if (trunc(av_mz) == 874)
1839 std::cout << av_mz << "\t" << best_charge_index << "\t" << best_charge_score << std::endl;
1840#endif
1841
1842 Feature c_feature;
1843 ConvexHull2D c_conv_hull;
1844 c_conv_hull.addPoints(point_set);
1845 c_feature.setCharge(c_charge);
1846 c_feature.setConvexHulls(std::vector<ConvexHull2D>(1, c_conv_hull));
1847
1848 //This makes the intensity value independent of the m/z (the lambda) value (Skellam distribution)
1849 if (intenstype_ == "corrected")
1850 {
1851 double lambda = IsotopeWavelet::getLambdaL(av_mz * c_charge);
1852 av_intens /= exp(-2 * lambda) * boost::math::cyl_bessel_i(0.0, 2.0 * lambda);
1853 }
1854 if (intenstype_ == "ref")
1855 {
1856#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
1857 if (trunc(c_mz) == 874)
1858 {
1859 std::cout << sum_of_ref_intenses_g << "####" << std::endl;
1860 }
1861#endif
1862
1863 av_intens = sum_of_ref_intenses_g;
1864 }
1865
1866 c_feature.setMZ(av_mz);
1867 c_feature.setIntensity(av_intens);
1868 c_feature.setRT(av_RT);
1869 c_feature.setOverallQuality(av_score);
1870 feature_map.push_back(c_feature);
1871 }
1872
1873 return feature_map;
1874 }
1875
1876 template <typename PeakType>
1878 const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1879 {
1880 typename MSSpectrum::const_iterator iter, ref_iter;
1881 UInt peak_cutoff;
1882 peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
1883
1884 iter = candidate.MZBegin(seed_mz);
1885 //we can ignore those cases
1886 if (iter == candidate.begin() || iter == candidate.end())
1887 {
1888 return false;
1889 }
1890
1891 std::pair<double, double> reals;
1892 ref_iter = ref.MZBegin(seed_mz);
1893 //Correct the position
1894 double real_mz, real_intens;
1895 if (check_PPMs)
1896 {
1897 //reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
1898 //real_mz = reals.first; real_intens = reals.second;
1899 //if (real_mz <= 0 || real_intens <= 0)
1900 //{
1901 typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1902 while (h_iter != ref.begin())
1903 {
1904 --h_iter;
1905 if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1906 {
1907 hc_iter = h_iter;
1908 }
1909 else
1910 {
1911 break;
1912 }
1913
1914 if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1915 {
1916 return false;
1917 }
1918 }
1919 reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
1920 real_mz = reals.first; real_intens = reals.second;
1921 if (real_mz <= 0 || real_intens <= 0)
1922 {
1923 return false;
1924 }
1925 real_mz = h_iter->getMZ();
1926 real_intens = h_iter->getIntensity();
1927 //}
1928 }
1929 else
1930 {
1931 reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
1932 real_mz = reals.first; real_intens = reals.second;
1933
1934 if (real_mz <= 0 || real_intens <= 0)
1935 {
1936 typename MSSpectrum::const_iterator h_iter = ref_iter, hc_iter = ref_iter;
1937 while (h_iter != ref.begin())
1938 {
1939 --h_iter;
1940 if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
1941 {
1942 hc_iter = h_iter;
1943 }
1944 else
1945 {
1946 break;
1947 }
1948
1949 if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
1950 {
1951 return false;
1952 }
1953 }
1954 real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
1955 if (real_mz <= 0 || real_intens <= 0)
1956 {
1957 return false;
1958 }
1959 real_mz = h_iter->getMZ();
1960 real_intens = h_iter->getIntensity();
1961 }
1962 }
1963
1964 double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
1965
1966 if (c_score <= 0)
1967 {
1968 return false;
1969 }
1970
1971 double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
1972 typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
1973 typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
1974 if (real_r_MZ_iter == ref.end())
1975 {
1976 --real_r_MZ_iter;
1977 }
1978
1979
1980 UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
1981 UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
1982
1983 if (prev_score == -1000)
1984 {
1985 push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
1986 }
1987 else
1988 {
1989 push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
1990 }
1991 return true;
1992 }
1993
1994 template <typename PeakType>
1996 const MSSpectrum& ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
1997 {
1998 typename MSSpectrum::const_iterator iter, ref_iter;
1999 UInt peak_cutoff;
2000 peak_cutoff = IsotopeWavelet::getNumPeakCutOff(seed_mz, c + 1);
2001
2002 iter = candidate.MZBegin(seed_mz);
2003 //we can ignore those cases
2004 if (iter == candidate.begin() || iter == candidate.end())
2005 {
2006 return false;
2007 }
2008
2009 std::pair<double, double> reals;
2010 ref_iter = ref.MZBegin(seed_mz);
2011 //Correct the position
2012 double real_mz, real_intens;
2013 if (check_PPMs)
2014 {
2015 reals = checkPPMTheoModel_(ref, iter->getMZ(), c);
2016 real_mz = reals.first; real_intens = reals.second;
2017 //if (real_mz <= 0 || real_intens <= 0)
2018 //{
2019 auto h_iter = ref_iter, hc_iter = ref_iter;
2020 while (h_iter != ref.begin())
2021 {
2022 --h_iter;
2023 if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2024 {
2025 hc_iter = h_iter;
2026 }
2027 else
2028 {
2029 break;
2030 }
2031
2032 if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2033 {
2034 return false;
2035 }
2036 }
2037 ++h_iter;
2038 reals = checkPPMTheoModel_(ref, h_iter->getMZ(), c);
2039 real_mz = reals.first; real_intens = reals.second;
2040
2041#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2042 std::cout << "Plausibility check old_mz: " << iter->getMZ() << "\t" << real_mz << std::endl;
2043#endif
2044
2045 if (real_mz <= 0 || real_intens <= 0)
2046 {
2047 return false;
2048 }
2049 real_mz = h_iter->getMZ();
2050 real_intens = h_iter->getIntensity();
2051 //}
2052 }
2053 else
2054 {
2055 reals = std::pair<double, double>(seed_mz, ref_iter->getIntensity());
2056 real_mz = reals.first; real_intens = reals.second;
2057
2058 if (real_mz <= 0 || real_intens <= 0)
2059 {
2060 auto h_iter = ref_iter, hc_iter = ref_iter;
2061 while (h_iter != ref.begin())
2062 {
2063 --h_iter;
2064 if (h_iter->getIntensity() > hc_iter->getIntensity() || (h_iter->getIntensity() == hc_iter->getIntensity() && hc_iter->getIntensity() == 0))
2065 {
2066 hc_iter = h_iter;
2067 }
2068 else
2069 {
2070 break;
2071 }
2072
2073 if (seed_mz - h_iter->getMZ() > Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.))
2074 {
2075 return false;
2076 }
2077 }
2078 real_mz = h_iter->getMZ(); real_intens = h_iter->getIntensity();
2079 if (real_mz <= 0 || real_intens <= 0)
2080 {
2081 return false;
2082 }
2083 real_mz = h_iter->getMZ();
2084 real_intens = h_iter->getIntensity();
2085 }
2086 }
2087
2088 double c_score = scoreThis_(candidate, peak_cutoff, real_mz, c, 0);
2089
2090 if (c_score <= 0)
2091 {
2092 return false;
2093 }
2094
2095 double mz_cutoff = IsotopeWavelet::getMzPeakCutOffAtMonoPos(real_mz, c + 1);
2096 typename MSSpectrum::const_iterator real_l_MZ_iter = ref.MZBegin(real_mz - Constants::IW_QUARTER_NEUTRON_MASS / (c + 1.));
2097 typename MSSpectrum::const_iterator real_r_MZ_iter = ref.MZBegin(real_l_MZ_iter, real_mz + mz_cutoff / (c + 1.), ref.end());
2098 if (real_r_MZ_iter == ref.end())
2099 {
2100 --real_r_MZ_iter;
2101 }
2102
2103
2104 UInt real_mz_begin = distance(ref.begin(), real_l_MZ_iter);
2105 UInt real_mz_end = distance(ref.begin(), real_r_MZ_iter);
2106
2107 if (prev_score == -1000)
2108 {
2109 push2Box_(real_mz, scan_index, c, prev_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2110 }
2111 else
2112 {
2113 push2Box_(real_mz, scan_index, c, c_score, transintens, ref.getRT(), real_mz_begin, real_mz_end, real_intens);
2114 }
2115
2116 return true;
2117 }
2118
2119 template <typename PeakType>
2120 std::pair<double, double> IsotopeWaveletTransform<PeakType>::checkPPMTheoModel_(const MSSpectrum& ref, const double c_mz, const UInt c)
2121 {
2122 double mass = c_mz * (c + 1) - Constants::IW_PROTON_MASS * (c);
2123 double ppms = getPPMs_(peptideMassRule_(mass), mass);
2125 {
2126#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2127 std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "REJECT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2128#endif
2129 return std::pair<double, double>(-1, -1);
2130 }
2131
2132#ifdef OPENMS_DEBUG_ISOTOPE_WAVELET
2133 std::cout << ::std::setprecision(8) << std::fixed << c_mz << "\t =(" << "ISO_WAVE" << ")> " << "ACCEPT \t" << ppms << " (rule: " << peptideMassRule_(mass) << " got: " << mass << ")" << std::endl;
2134#endif
2135 return std::pair<double, double>(c_mz, ref.MZBegin(c_mz)->getIntensity());
2136 }
2137
2138} //namespace
2139
2140#pragma clang diagnostic pop
2141
2142
void setCharge(const ChargeType &ch)
Set charge state.
Mutable iterator for the ConstRefVector.
Definition: ConstRefVector.h:196
This vector holds pointer to the elements of another container.
Definition: ConstRefVector.h:44
Iterator begin()
See std::vector documentation.
Definition: ConstRefVector.h:371
void sortByIntensity(bool reverse=false)
Sorting.
Definition: ConstRefVector.h:693
Iterator end()
See std::vector documentation.
Definition: ConstRefVector.h:377
A 2-dimensional hull representation in [counter]clockwise direction - depending on axis labelling.
Definition: ConvexHull2D.h:47
void addPoints(const PointArrayType &points)
void push_back(const VectorElement &f)
Definition: ExposedVector.h:161
A container for features.
Definition: FeatureMap.h:80
An LC-MS feature.
Definition: Feature.h:46
void setConvexHulls(const std::vector< ConvexHull2D > &hulls)
Set the convex hulls of single mass traces.
void setOverallQuality(QualityType q)
Set the overall quality.
Internally (only by GPUs) used data structure . It allows efficient data exchange between CPU and GPU...
Definition: IsotopeWaveletTransform.h:71
double getRT() const
Definition: IsotopeWaveletTransform.h:104
TransSpectrum(const MSSpectrum *reference)
Definition: IsotopeWaveletTransform.h:83
double getRefIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:116
TransSpectrum()
Definition: IsotopeWaveletTransform.h:77
MSSpectrum::const_iterator end() const
Definition: IsotopeWaveletTransform.h:167
const MSSpectrum * getRefSpectrum()
Definition: IsotopeWaveletTransform.h:140
double getMZ(const UInt i) const
Definition: IsotopeWaveletTransform.h:110
virtual ~TransSpectrum()
Definition: IsotopeWaveletTransform.h:90
const MSSpectrum * getRefSpectrum() const
Definition: IsotopeWaveletTransform.h:146
std::vector< float > * trans_intens_
The intensities of the transform.
Definition: IsotopeWaveletTransform.h:182
void setTransIntensity(const UInt i, const double intens)
Definition: IsotopeWaveletTransform.h:128
MSSpectrum::const_iterator MZEnd(const double mz) const
Definition: IsotopeWaveletTransform.h:160
Size size() const
Definition: IsotopeWaveletTransform.h:134
virtual void destroy()
Definition: IsotopeWaveletTransform.h:95
double getTransIntensity(const UInt i) const
Definition: IsotopeWaveletTransform.h:122
const MSSpectrum * reference_
The reference spectrum.
Definition: IsotopeWaveletTransform.h:181
MSSpectrum::const_iterator MZBegin(const double mz) const
Definition: IsotopeWaveletTransform.h:153
MSSpectrum::const_iterator begin() const
Definition: IsotopeWaveletTransform.h:174
A class implementing the isotope wavelet transform. If you just want to find features using the isoto...
Definition: IsotopeWaveletTransform.h:46
void clusterSeeds_(const TransSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const bool check_PPMs)
Clusters the seeds stored by push2TmpBox_.
Definition: IsotopeWaveletTransform.h:1588
virtual ~IsotopeWaveletTransform()
Destructor.
Definition: IsotopeWaveletTransform.h:533
Size max_scan_size_
Definition: IsotopeWaveletTransform.h:459
std::vector< double > prod_
Definition: IsotopeWaveletTransform.h:456
std::vector< float > zeros_
Definition: IsotopeWaveletTransform.h:467
UInt data_length_
Definition: IsotopeWaveletTransform.h:460
virtual void push2Box_(const double mz, const UInt scan, UInt c, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end, const double ref_intens)
Inserts a potential isotopic pattern into an open box or - if no such box exists - creates a new one.
Definition: IsotopeWaveletTransform.h:1203
std::vector< double > xs_
Definition: IsotopeWaveletTransform.h:456
double getPPMs_(const double mass_a, const double mass_b) const
Returns the parts-per-million deviation of the masses.
Definition: IsotopeWaveletTransform.h:446
std::multimap< double, Box > front_boxes_
Definition: IsotopeWaveletTransform.h:452
std::vector< double > c_spacings_
Definition: IsotopeWaveletTransform.h:456
UInt max_charge_
Definition: IsotopeWaveletTransform.h:460
Int from_max_to_right_
Definition: IsotopeWaveletTransform.h:463
virtual std::pair< double, double > checkPPMTheoModel_(const MSSpectrum &ref, const double c_mz, const UInt c)
Definition: IsotopeWaveletTransform.h:2120
Size getMaxScanSize() const
Definition: IsotopeWaveletTransform.h:293
std::multimap< double, Box > end_boxes_
Definition: IsotopeWaveletTransform.h:452
virtual double scoreThis_(const TransSpectrum &candidate, const UInt peak_cutoff, const double seed_mz, const UInt c, const double ampl_cutoff)
Given a candidate for an isotopic pattern, this function computes the corresponding score.
Definition: IsotopeWaveletTransform.h:943
double score
The associated score.
Definition: IsotopeWaveletTransform.h:55
IsotopeWaveletTransform()
Default Constructor.
Definition: IsotopeWaveletTransform.h:495
std::vector< double > c_mzs_
Definition: IsotopeWaveletTransform.h:456
double mz
The monoisotopic position.
Definition: IsotopeWaveletTransform.h:53
double sigma_
Definition: IsotopeWaveletTransform.h:455
void updateBoxStates(const PeakMap &map, const Size scan_index, const UInt RT_interleave, const UInt RT_votes_cutoff, const Int front_bound=-1, const Int end_bound=-1)
A function keeping track of currently open and closed sweep line boxes. This function is used by the ...
Definition: IsotopeWaveletTransform.h:1414
std::vector< float > scores_
Definition: IsotopeWaveletTransform.h:467
std::multimap< double, Box > closed_boxes_
Definition: IsotopeWaveletTransform.h:452
std::vector< std::multimap< double, Box > > * tmp_boxes_
Definition: IsotopeWaveletTransform.h:453
std::vector< int > indices_
Definition: IsotopeWaveletTransform.h:464
UInt MZ_begin
Index.
Definition: IsotopeWaveletTransform.h:60
double ref_intens
Definition: IsotopeWaveletTransform.h:57
std::vector< double > interpol_xs_
Definition: IsotopeWaveletTransform.h:457
double getLinearInterpolation(const double mz_a, const double intens_a, const double mz_pos, const double mz_b, const double intens_b)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:271
UInt c
Note, this is not the charge (it is charge-1!!!)
Definition: IsotopeWaveletTransform.h:54
double getSdIntens_(const TransSpectrum &scan, const double mean)
Computes the standard deviation (neglecting negative values) of the (transformed) intensities of scan...
Definition: IsotopeWaveletTransform.h:1188
double av_MZ_spacing_
Definition: IsotopeWaveletTransform.h:455
virtual void getTransform(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:539
double peptideMassRule_(const double c_mass) const
Returns the monoisotopic mass (with corresponding decimal values) we would expect at c_mass.
Definition: IsotopeWaveletTransform.h:423
Int from_max_to_left_
Definition: IsotopeWaveletTransform.h:463
String intenstype_
Definition: IsotopeWaveletTransform.h:462
std::vector< double > interpol_ys_
Definition: IsotopeWaveletTransform.h:457
double max_mz_cutoff_
Definition: IsotopeWaveletTransform.h:466
virtual void push2TmpBox_(const double mz, const UInt scan, UInt charge, const double score, const double intens, const double rt, const UInt MZ_begin, const UInt MZ_end)
Essentially the same function as.
Definition: IsotopeWaveletTransform.h:1309
double getAvIntens_(const TransSpectrum &scan)
Computes the average (transformed) intensity (neglecting negative values) of scan.
Definition: IsotopeWaveletTransform.h:1174
double RT
The elution time (not the scan index)
Definition: IsotopeWaveletTransform.h:58
void extendBox_(const PeakMap &map, const Box &box)
A currently still necessary function that extends the box box in order to capture also signals whose ...
Definition: IsotopeWaveletTransform.h:1483
FeatureMap mapSeeds2Features(const PeakMap &map, const UInt RT_votes_cutoff)
Filters the candidates further more and maps the internally used data structures to the OpenMS framew...
Definition: IsotopeWaveletTransform.h:1696
virtual void identifyCharge(const MSSpectrum &candidates, const MSSpectrum &ref, const UInt scan_index, const UInt c, const double ampl_cutoff, const bool check_PPMs)
Given an isotope wavelet transformed spectrum candidates, this function assigns to every significant ...
Definition: IsotopeWaveletTransform.h:672
double getSigma() const
Definition: IsotopeWaveletTransform.h:276
UInt RT_index
The elution time (map) index.
Definition: IsotopeWaveletTransform.h:59
double intens
The transformed intensity at the monoisotopic mass.
Definition: IsotopeWaveletTransform.h:56
void sampleTheCMarrWavelet_(const MSSpectrum &scan, const Int wavelet_length, const Int mz_index, const UInt charge)
std::multimap< double, Box > open_boxes_
Definition: IsotopeWaveletTransform.h:452
double getLinearInterpolation(const typename MSSpectrum::const_iterator &left_iter, const double mz_pos, const typename MSSpectrum::const_iterator &right_iter)
Computes a linear (intensity) interpolation.
Definition: IsotopeWaveletTransform.h:260
virtual void initializeScan(const MSSpectrum &c_ref, const UInt c=0)
Definition: IsotopeWaveletTransform.h:620
double getAvMZSpacing_(const MSSpectrum &scan)
Computes the average MZ spacing of scan.
Definition: IsotopeWaveletTransform.h:1155
virtual void getTransformHighRes(MSSpectrum &c_trans, const MSSpectrum &c_ref, const UInt c)
Computes the isotope wavelet transform of charge state c.
Definition: IsotopeWaveletTransform.h:582
UInt MZ_end
Index.
Definition: IsotopeWaveletTransform.h:61
void setSigma(const double sigma)
Definition: IsotopeWaveletTransform.h:281
double min_spacing_
Definition: IsotopeWaveletTransform.h:466
virtual bool checkPositionForPlausibility_(const TransSpectrum &candidate, const MSSpectrum &ref, const double seed_mz, const UInt c, const UInt scan_index, const bool check_PPMs, const double transintens, const double prev_score)
A ugly but necessary function to handle "off-by-1-Dalton predictions" due to idiosyncrasies of the da...
Definition: IsotopeWaveletTransform.h:1995
UInt max_num_peaks_per_pattern_
Definition: IsotopeWaveletTransform.h:460
double getMinSpacing() const
Definition: IsotopeWaveletTransform.h:288
virtual std::multimap< double, Box > getClosedBoxes()
Returns the closed boxes.
Definition: IsotopeWaveletTransform.h:252
bool hr_data_
Definition: IsotopeWaveletTransform.h:461
virtual void computeMinSpacing(const MSSpectrum &c_ref)
Definition: IsotopeWaveletTransform.h:662
std::multimap< UInt, BoxElement > Box
Key: RT index, value: BoxElement.
Definition: IsotopeWaveletTransform.h:64
std::vector< double > psi_
Definition: IsotopeWaveletTransform.h:456
Internally used data structure.
Definition: IsotopeWaveletTransform.h:52
static UInt getNumPeakCutOff(const double mass, const UInt z)
static IsotopeWavelet * init(const double max_m, const UInt max_charge)
static double getLambdaL(const double m)
Returns the mass-parameter lambda (linear fit).
static double getValueByLambda(const double lambda, const double tz1)
Returns the value of the isotope wavelet at position t via a fast table lookup. Usually,...
static UInt getMzPeakCutOffAtMonoPos(const double mass, const UInt z)
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:46
Iterator begin()
Definition: MSExperiment.h:156
Size size() const
The number of spectra.
Definition: MSExperiment.h:121
void clear(bool clear_meta_data)
Clears all data and meta data.
The representation of a 1D spectrum.
Definition: MSSpectrum.h:44
double getRT() const
Iterator MZBegin(CoordinateType mz)
Binary search for peak range begin.
Iterator MZEnd(CoordinateType mz)
Binary search for peak range end (returns the past-the-end iterator)
A 2-dimensional raw data point or peak.
Definition: Peak2D.h:29
CoordinateType getMZ() const
Returns the m/z coordinate (index 1)
Definition: Peak2D.h:172
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1)
Definition: Peak2D.h:178
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0)
Definition: Peak2D.h:190
IntensityType getIntensity() const
Definition: Peak2D.h:142
void setIntensity(IntensityType intensity)
Sets data point intensity (height)
Definition: Peak2D.h:148
A more convenient string class.
Definition: String.h:34
int Int
Signed integer type.
Definition: Types.h:76
unsigned int UInt
Unsigned integer type.
Definition: Types.h:68
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:108
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
static double mean(IteratorType begin, IteratorType end)
Calculates the mean of a range of values.
Definition: StatisticFunctions.h:94
const double IW_PROTON_MASS
Definition: IsotopeWaveletConstants.h:42
const double PEPTIDE_MASS_RULE_FACTOR
Definition: IsotopeWaveletConstants.h:23
const double IW_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:28
const unsigned int DEFAULT_NUM_OF_INTERPOLATION_POINTS
Definition: IsotopeWaveletConstants.h:17
const double c
Definition: Constants.h:188
const double PEPTIDE_MASS_RULE_THEO_PPM_BOUND
Definition: IsotopeWaveletConstants.h:25
const double PEPTIDE_MASS_RULE_BOUND
Definition: IsotopeWaveletConstants.h:24
const double IW_HALF_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:29
const double IW_QUARTER_NEUTRON_MASS
Definition: IsotopeWaveletConstants.h:30
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
bool intensityAscendingComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:477
bool positionComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:489
bool intensityComparator(const PeakType &a, const PeakType &b)
Definition: IsotopeWaveletTransform.h:471
bool intensityPointerComparator(PeakType *a, PeakType *b)
Definition: IsotopeWaveletTransform.h:483