OpenMS
Loading...
Searching...
No Matches
PeakIntegrator.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Douglas McCloskey, Pasquale Domenico Colaianni $
6// $Authors: Douglas McCloskey, Pasquale Domenico Colaianni $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h> // OPENMS_DLLAPI
19
20namespace OpenMS
21{
85 class OPENMS_DLLAPI PeakIntegrator :
87 {
88public:
92 ~PeakIntegrator() override;
93
98 struct PeakArea
99 {
103 double area = 0.0;
107 double height = 0.0;
111 double apex_pos = 0.0;
116 };
118
124 {
128 double area = 0.0;
132 double height = 0.0;
133 };
135
142 {
146 double width_at_5 = 0.0;
150 double width_at_10 = 0.0;
154 double width_at_50 = 0.0;
158 double start_position_at_5 = 0.0;
162 double start_position_at_10 = 0.0;
166 double start_position_at_50 = 0.0;
170 double end_position_at_5 = 0.0;
174 double end_position_at_10 = 0.0;
178 double end_position_at_50 = 0.0;
182 double total_width = 0.0;
196 double tailing_factor = 0.0;
206 double asymmetry_factor = 0.0;
211 double slope_of_baseline = 0.0;
216 double baseline_delta_2_height = 0.0;
220 Int points_across_baseline = 0;
224 Int points_across_half_height = 0;
225 };
227
235 static constexpr const char* INTEGRATION_TYPE_INTENSITYSUM = "intensity_sum";
237 static constexpr const char* INTEGRATION_TYPE_TRAPEZOID = "trapezoid";
239 static constexpr const char* INTEGRATION_TYPE_SIMPSON = "simpson";
241 static constexpr const char* BASELINE_TYPE_BASETOBASE = "base_to_base";
243 static constexpr const char* BASELINE_TYPE_VERTICALDIVISION = "vertical_division";
245 static constexpr const char* BASELINE_TYPE_VERTICALDIVISION_MIN = "vertical_division_min";
247 static constexpr const char* BASELINE_TYPE_VERTICALDIVISION_MAX = "vertical_division_max";
249
269 const MSChromatogram& chromatogram, const double left, const double right
270 ) const;
271
292 ) const;
293
313 const MSSpectrum& spectrum, const double left, const double right
314 ) const;
315
336 ) const;
337
363 const MSChromatogram& chromatogram, const double left, const double right,
364 const double peak_apex_pos
365 ) const;
366
393 const double peak_apex_pos
394 ) const;
395
421 const MSSpectrum& spectrum, const double left, const double right,
422 const double peak_apex_pos
423 ) const;
424
451 const double peak_apex_pos
452 ) const;
453
474 const MSChromatogram& chromatogram, const double left, const double right,
475 const double peak_height, const double peak_apex_pos
476 ) const;
477
499 const double peak_height, const double peak_apex_pos
500 ) const;
501
522 const MSSpectrum& spectrum, const double left, const double right,
523 const double peak_height, const double peak_apex_pos
524 ) const;
525
547 const double peak_height, const double peak_apex_pos
548 ) const;
549
551
552protected:
553 void updateMembers_() override;
554
555 template <typename PeakContainerT>
556 PeakArea integratePeak_(const PeakContainerT& pc, double left, double right) const
557 {
558 OPENMS_PRECONDITION(left <= right, "Left peak boundary must be smaller than right boundary!") // otherwise the code below will segfault (due to PosBegin/PosEnd)
559 PeakContainerT emg_pc;
560 const PeakContainerT& p = EMGPreProcess_(pc, emg_pc, left, right);
561
562 std::function<double(const double, const double)>
563 compute_peak_area_trapezoid = [&p](const double left, const double right)
564 {
565 double peak_area { 0.0 };
566 for (typename PeakContainerT::ConstIterator it = p.PosBegin(left); it != p.PosEnd(right) - 1; ++it)
567 {
568 peak_area += ((it + 1)->getPos() - it->getPos()) * ((it->getIntensity() + (it + 1)->getIntensity()) / 2.0);
569 }
570 return peak_area;
571 };
572
573 std::function<double(const double, const double)>
574 compute_peak_area_intensity_sum = [&p](const double left, const double right)
575 {
576 // OPENMS_LOG_WARN << "WARNING: intensity_sum method is being used." << std::endl;
577 double peak_area { 0.0 };
578 for (typename PeakContainerT::ConstIterator it = p.PosBegin(left); it != p.PosEnd(right); ++it)
579 {
580 peak_area += it->getIntensity();
581 }
582 return peak_area;
583 };
584
585 PeakArea pa;
586 pa.apex_pos = (left + right) / 2; // initial estimate, to avoid apex being outside of [left,right]
587 UInt n_points = std::distance(p.PosBegin(left), p.PosEnd(right));
588 for (auto it = p.PosBegin(left); it != p.PosEnd(right); ++it) //OMS_CODING_TEST_EXCLUDE
589 {
590 pa.hull_points.push_back(DPosition<2>(it->getPos(), it->getIntensity()));
591 if (pa.height < it->getIntensity())
592 {
593 pa.height = it->getIntensity();
594 pa.apex_pos = it->getPos();
595 }
596 }
597
598 if (integration_type_ == INTEGRATION_TYPE_TRAPEZOID)
599 {
600 if (n_points >= 2)
601 {
602 pa.area = compute_peak_area_trapezoid(left, right);
603 }
604 }
605 else if (integration_type_ == INTEGRATION_TYPE_SIMPSON)
606 {
607 if (n_points == 2)
608 {
609 OPENMS_LOG_WARN << std::endl << "PeakIntegrator::integratePeak:"
610 "number of points is 2, falling back to `trapezoid`." << std::endl;
611 pa.area = compute_peak_area_trapezoid(left, right);
612 }
613 else if (n_points > 2)
614 {
615 if (n_points % 2)
616 {
617 pa.area = simpson_(p.PosBegin(left), p.PosEnd(right));
618 }
619 else
620 {
621 double areas[4] = {-1.0, -1.0, -1.0, -1.0};
622 areas[0] = simpson_(p.PosBegin(left), p.PosEnd(right) - 1); // without last point
623 areas[1] = simpson_(p.PosBegin(left) + 1, p.PosEnd(right)); // without first point
624 if (p.begin() <= p.PosBegin(left) - 1)
625 {
626 areas[2] = simpson_(p.PosBegin(left) - 1, p.PosEnd(right)); // with one more point on the left
627 }
628 if (p.PosEnd(right) < p.end())
629 {
630 areas[3] = simpson_(p.PosBegin(left), p.PosEnd(right) + 1); // with one more point on the right
631 }
632 UInt valids = 0;
633 for (const auto& area : areas)
634 {
635 if (area != -1.0)
636 {
637 pa.area += area;
638 ++valids;
639 }
640 }
641 pa.area /= valids;
642 }
643 }
644 }
645 else if (integration_type_ == INTEGRATION_TYPE_INTENSITYSUM)
646 {
647 pa.area = compute_peak_area_intensity_sum(left, right);
648 }
649 else
650 {
651 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Please set a valid value for the parameter \"integration_type\".");
652 }
653
654 return pa;
655 }
656
657
658
659 template <typename PeakContainerT>
661 const PeakContainerT& pc, double left, double right,
662 const double peak_apex_pos
663 ) const
664 {
665 PeakContainerT emg_pc;
666 const PeakContainerT& p = EMGPreProcess_(pc, emg_pc, left, right);
667
668 const double int_l = p.PosBegin(left)->getIntensity();
669 const double int_r = (p.PosEnd(right) - 1)->getIntensity();
670 const double delta_int = int_r - int_l;
671 const double delta_pos = (p.PosEnd(right) - 1)->getPos() - p.PosBegin(left)->getPos();
672 const double min_int_pos = int_r <= int_l ? (p.PosEnd(right) - 1)->getPos() : p.PosBegin(left)->getPos();
673 const double delta_int_apex = std::fabs(delta_int) * std::fabs(min_int_pos - peak_apex_pos) / delta_pos;
674 double area {0.0};
675 double height {0.0};
676 if (baseline_type_ == BASELINE_TYPE_BASETOBASE)
677 {
678 height = std::min(int_r, int_l) + delta_int_apex;
679 if (integration_type_ == INTEGRATION_TYPE_TRAPEZOID || integration_type_ == INTEGRATION_TYPE_SIMPSON)
680 {
681 // formula for calculating the background using the trapezoidal rule
682 // area = intensity_min*delta_pos + 0.5*delta_int*delta_pos;
683 area = delta_pos * (std::min(int_r, int_l) + 0.5 * std::fabs(delta_int));
684 }
685 else if (integration_type_ == INTEGRATION_TYPE_INTENSITYSUM)
686 {
687 // calculate the background using an estimator of the form
688 // y = mx + b
689 // where x = rt or mz, m = slope, b = left intensity
690 // sign of delta_int will determine line direction
691 // area += delta_int / delta_pos * (it->getPos() - left) + int_l;
692 double pos_sum = 0.0; // rt or mz
693 for (auto it = p.PosBegin(left); it != p.PosEnd(right); ++it) //OMS_CODING_TEST_EXCLUDE
694 {
695 pos_sum += it->getPos();
696 }
697 UInt n_points = std::distance(p.PosBegin(left), p.PosEnd(right));
698
699 // We construct the background area as the sum of a rectangular part
700 // and a triangle on top. The triangle is constructed as the sum of the
701 // line's y value at each sampled point: \sum_{i=0}^{n} (x_i - x_0) * m
702 const double rectangle_area = n_points * int_l;
703 const double slope = delta_int / delta_pos;
704 const double triangle_area = (pos_sum - n_points * p.PosBegin(left)->getPos()) * slope;
705 area = triangle_area + rectangle_area;
706 }
707 }
708 else if (baseline_type_ == BASELINE_TYPE_VERTICALDIVISION || baseline_type_ == BASELINE_TYPE_VERTICALDIVISION_MIN)
709 {
710 height = std::min(int_r, int_l);
711 if (integration_type_ == INTEGRATION_TYPE_TRAPEZOID || integration_type_ == INTEGRATION_TYPE_SIMPSON)
712 {
713 area = delta_pos * std::min(int_r, int_l);
714 }
715 else if (integration_type_ == INTEGRATION_TYPE_INTENSITYSUM)
716 {
717 area = std::min(int_r, int_l) * std::distance(p.PosBegin(left), p.PosEnd(right));;
718 }
719 }
720 else if (baseline_type_ == BASELINE_TYPE_VERTICALDIVISION_MAX)
721 {
722 height = std::max(int_r, int_l);
723 if (integration_type_ == INTEGRATION_TYPE_TRAPEZOID || integration_type_ == INTEGRATION_TYPE_SIMPSON)
724 {
725 area = delta_pos * std::max(int_r, int_l);
726 }
727 else if (integration_type_ == INTEGRATION_TYPE_INTENSITYSUM)
728 {
729 area = std::max(int_r, int_l) * std::distance(p.PosBegin(left), p.PosEnd(right));
730 }
731 }
732 else
733 {
734 throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Please set a valid value for the parameter \"baseline_type\".");
735 }
737 pb.area = area;
738 pb.height = height;
739 return pb;
740 }
741
756 template <typename PeakContainerConstIteratorT>
757 double simpson_(PeakContainerConstIteratorT it_begin, PeakContainerConstIteratorT it_end) const
758 {
759 double integral = 0.0;
760 for (auto it = it_begin + 1; it < it_end - 1; it = it + 2) //OMS_CODING_TEST_EXCLUDE
761 {
762 const double h = it->getPos() - (it - 1)->getPos();
763 const double k = (it + 1)->getPos() - it->getPos();
764 const double y_h = (it - 1)->getIntensity();
765 const double y_0 = it->getIntensity();
766 const double y_k = (it + 1)->getIntensity();
767 integral += (1.0 / 6.0) * (h + k) * ((2.0 - k / h) * y_h + (pow(h + k, 2) / (h * k)) * y_0 + (2.0 - h / k) * y_k);
768 }
769 return integral;
770 }
771
772
773 template <typename PeakContainerT>
775 const PeakContainerT& pc, double left, double right,
776 const double peak_height, const double peak_apex_pos
777 ) const
778 {
780
781 if (pc.empty()) return psm; // return all '0'
782
783 // enforce order: left <= peakapex <= right
784 if (!(left <= peak_apex_pos && peak_apex_pos <= right)) throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
785
786 PeakContainerT emg_pc;
787 const PeakContainerT& p = EMGPreProcess_(pc, emg_pc, left, right);
788
789 typename PeakContainerT::ConstIterator it_PosBegin_l = p.PosBegin(left);
790 typename PeakContainerT::ConstIterator it_PosEnd_apex = p.PosBegin(peak_apex_pos); // if peak_apex_pos is correct, this will get the underlying iterator
791 typename PeakContainerT::ConstIterator it_PosEnd_r = p.PosEnd(right); // past the end. Do not dereference (might be the true .end())
792 for (auto it = it_PosBegin_l; it != it_PosEnd_r; ++it) //OMS_CODING_TEST_EXCLUDE
793 {
794 // points across the peak
796 if (it->getIntensity() >= 0.5 * peak_height)
797 {
799 }
800 }
801 // positions at peak heights
802 psm.start_position_at_5 = findPosAtPeakHeightPercent_(it_PosBegin_l, it_PosEnd_apex, p.end(), peak_height, 0.05, true);
803 psm.start_position_at_10 = findPosAtPeakHeightPercent_(it_PosBegin_l, it_PosEnd_apex, p.end(), peak_height, 0.1, true);
804 psm.start_position_at_50 = findPosAtPeakHeightPercent_(it_PosBegin_l, it_PosEnd_apex, p.end(), peak_height, 0.5, true);
805 psm.end_position_at_5 = findPosAtPeakHeightPercent_(it_PosEnd_apex, it_PosEnd_r, p.end(), peak_height, 0.05, false);
806 psm.end_position_at_10 = findPosAtPeakHeightPercent_(it_PosEnd_apex, it_PosEnd_r, p.end(), peak_height, 0.1, false);
807 psm.end_position_at_50 = findPosAtPeakHeightPercent_(it_PosEnd_apex, it_PosEnd_r, p.end(), peak_height, 0.5, false);
808 // peak widths
812 psm.total_width = (p.PosEnd(right) - 1)->getPos() - p.PosBegin(left)->getPos();
813 psm.slope_of_baseline = (p.PosEnd(right) - 1)->getIntensity() - p.PosBegin(left)->getIntensity();
814 if (peak_height != 0.0) // avoid division by zero
815 {
816 psm.baseline_delta_2_height = psm.slope_of_baseline / peak_height;
817 }
818 // Source of tailing_factor and asymmetry_factor formulas:
819 // USP 40 - NF 35 The United States Pharmacopeia and National Formulary - Supplementary
820
821 // Can only compute if start and peak apex are different
822 if (psm.start_position_at_5 != peak_apex_pos)
823 {
824 psm.tailing_factor = psm.width_at_5 / (2*(peak_apex_pos - psm.start_position_at_5));
825 }
826 if (psm.start_position_at_10 != peak_apex_pos)
827 {
828 psm.asymmetry_factor = (psm.end_position_at_10 - peak_apex_pos) / (peak_apex_pos - psm.start_position_at_10);
829 }
830 return psm;
831 }
832
833
834
855 template <typename PeakContainerConstIteratorT>
857 PeakContainerConstIteratorT it_left, // must not be past the end
858 PeakContainerConstIteratorT it_right, // might be past the end
859 PeakContainerConstIteratorT it_end, // definitely past-the-end
860 const double peak_height,
861 const double percent,
862 const bool is_left_half) const
863 {
864 // no points in range
865 if (it_left == it_end) throw Exception::InvalidRange(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
866
867 // only one point in range
868 if (it_left == it_right) return it_left->getPos();
869
870 const double percent_intensity = peak_height * percent;
871 PeakContainerConstIteratorT closest;
872 if (is_left_half)
873 {
874 closest = it_left;
875 for (
876 PeakContainerConstIteratorT it = it_left;
877 it < it_right && it->getIntensity() <= percent_intensity;
878 closest = it++
879 ) {}
880 }
881 else // right half; search from right to left
882 {
883 closest = it_right - 1; // make sure we can deference it
884 for (
885 PeakContainerConstIteratorT it = it_right - 1;
886 it >= it_left && it->getIntensity() <= percent_intensity;
887 closest = it--
888 ) {}
889 }
890 return closest->getPos();
891 }
892
893private:
899
903 String integration_type_ = INTEGRATION_TYPE_INTENSITYSUM;
908 String baseline_type_ = BASELINE_TYPE_BASETOBASE;
910
914
915
929 template <typename PeakContainerT>
930 const PeakContainerT& EMGPreProcess_(
931 const PeakContainerT& pc,
932 PeakContainerT& emg_pc,
933 double& left,
934 double& right
935 ) const
936 {
937 if (fit_EMG_)
938 {
939 emg_.fitEMGPeakModel(pc, emg_pc, left, right);
940 left = emg_pc.front().getPos();
941 right = emg_pc.back().getPos();
942 return emg_pc;
943 }
944 return pc;
945 }
946 };
947}
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition LogStream.h:447
std::vector< PointType > PointArrayType
Definition ConvexHull2D.h:52
Representation of a coordinate in D-dimensional space.
Definition DPosition.h:32
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Fit peaks to an Exponentially Modified Gaussian (EMG) model using gradient descent.
Definition EmgGradientDescent.h:66
void fitEMGPeakModel(const PeakContainerT &input_peak, PeakContainerT &output_peak, const double left_pos=0.0, const double right_pos=0.0) const
Fit the given peak (either MSChromatogram or MSSpectrum) to the EMG peak model.
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition Exception.h:317
Invalid range exception.
Definition Exception.h:257
The representation of a chromatogram.
Definition MSChromatogram.h:30
ContainerType::const_iterator ConstIterator
Non-mutable iterator.
Definition MSChromatogram.h:66
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
ContainerType::const_iterator ConstIterator
Non-mutable iterator.
Definition MSSpectrum.h:120
Management and storage of parameters / INI files.
Definition Param.h:46
Compute the area, background and shape metrics of a peak.
Definition PeakIntegrator.h:87
Int points_across_half_height
Definition PeakIntegrator.h:224
bool fit_EMG_
Enable/disable EMG peak model fitting.
Definition PeakIntegrator.h:912
PeakArea integratePeak(const MSChromatogram &chromatogram, const double left, const double right) const
Compute the area of a peak contained in a MSChromatogram.
double apex_pos
Definition PeakIntegrator.h:111
ConvexHull2D::PointArrayType hull_points
Definition PeakIntegrator.h:115
PeakBackground estimateBackground_(const PeakContainerT &pc, double left, double right, const double peak_apex_pos) const
Definition PeakIntegrator.h:660
PeakArea integratePeak(const MSChromatogram &chromatogram, MSChromatogram::ConstIterator &left, MSChromatogram::ConstIterator &right) const
Compute the area of a peak contained in a MSChromatogram.
double width_at_5
Definition PeakIntegrator.h:146
Int points_across_baseline
Definition PeakIntegrator.h:220
PeakShapeMetrics calculatePeakShapeMetrics(const MSSpectrum &spectrum, MSSpectrum::ConstIterator &left, MSSpectrum::ConstIterator &right, const double peak_height, const double peak_apex_pos) const
Calculate peak's shape metrics.
double width_at_50
Definition PeakIntegrator.h:154
double end_position_at_50
Definition PeakIntegrator.h:178
const PeakContainerT & EMGPreProcess_(const PeakContainerT &pc, PeakContainerT &emg_pc, double &left, double &right) const
Fit the peak to the EMG model.
Definition PeakIntegrator.h:930
double findPosAtPeakHeightPercent_(PeakContainerConstIteratorT it_left, PeakContainerConstIteratorT it_right, PeakContainerConstIteratorT it_end, const double peak_height, const double percent, const bool is_left_half) const
Find the position (RT/MZ) at a given percentage of peak's height.
Definition PeakIntegrator.h:856
double baseline_delta_2_height
Definition PeakIntegrator.h:216
EmgGradientDescent emg_
Definition PeakIntegrator.h:913
double height
Definition PeakIntegrator.h:107
double end_position_at_10
Definition PeakIntegrator.h:174
double simpson_(PeakContainerConstIteratorT it_begin, PeakContainerConstIteratorT it_end) const
Simpson's rule algorithm.
Definition PeakIntegrator.h:757
~PeakIntegrator() override
Destructor.
PeakShapeMetrics calculatePeakShapeMetrics(const MSChromatogram &chromatogram, MSChromatogram::ConstIterator &left, MSChromatogram::ConstIterator &right, const double peak_height, const double peak_apex_pos) const
Calculate peak's shape metrics.
PeakShapeMetrics calculatePeakShapeMetrics_(const PeakContainerT &pc, double left, double right, const double peak_height, const double peak_apex_pos) const
Definition PeakIntegrator.h:774
PeakBackground estimateBackground(const MSSpectrum &spectrum, MSSpectrum::ConstIterator &left, MSSpectrum::ConstIterator &right, const double peak_apex_pos) const
Estimate the background of a peak contained in a MSSpectrum.
double width_at_10
Definition PeakIntegrator.h:150
PeakArea integratePeak(const MSSpectrum &spectrum, MSSpectrum::ConstIterator &left, MSSpectrum::ConstIterator &right) const
Compute the area of a peak contained in a MSSpectrum.
double total_width
Definition PeakIntegrator.h:182
PeakShapeMetrics calculatePeakShapeMetrics(const MSChromatogram &chromatogram, const double left, const double right, const double peak_height, const double peak_apex_pos) const
Calculate peak's shape metrics.
double end_position_at_5
Definition PeakIntegrator.h:170
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
double start_position_at_50
Definition PeakIntegrator.h:166
void getDefaultParameters(Param &params)
double tailing_factor
Definition PeakIntegrator.h:196
PeakShapeMetrics calculatePeakShapeMetrics(const MSSpectrum &spectrum, const double left, const double right, const double peak_height, const double peak_apex_pos) const
Calculate peak's shape metrics.
double slope_of_baseline
Definition PeakIntegrator.h:211
PeakBackground estimateBackground(const MSChromatogram &chromatogram, const double left, const double right, const double peak_apex_pos) const
Estimate the background of a peak contained in a MSChromatogram.
double start_position_at_10
Definition PeakIntegrator.h:162
PeakArea integratePeak(const MSSpectrum &spectrum, const double left, const double right) const
Compute the area of a peak contained in a MSSpectrum.
double asymmetry_factor
Definition PeakIntegrator.h:206
double start_position_at_5
Definition PeakIntegrator.h:158
double area
Definition PeakIntegrator.h:103
PeakBackground estimateBackground(const MSSpectrum &spectrum, const double left, const double right, const double peak_apex_pos) const
Estimate the background of a peak contained in a MSSpectrum.
PeakBackground estimateBackground(const MSChromatogram &chromatogram, MSChromatogram::ConstIterator &left, MSChromatogram::ConstIterator &right, const double peak_apex_pos) const
Estimate the background of a peak contained in a MSChromatogram.
PeakArea integratePeak_(const PeakContainerT &pc, double left, double right) const
Definition PeakIntegrator.h:556
PeakIntegrator()
Constructor.
Definition PeakIntegrator.h:99
Definition PeakIntegrator.h:124
Definition PeakIntegrator.h:142
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition openms/include/OpenMS/CONCEPT/Macros.h:94
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19