2.7.0/html/LinearRegression_8h_source.html

 // --------------------------------------------------------------------------

 //                   OpenMS -- Open-Source Mass Spectrometry

 // --------------------------------------------------------------------------

 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,

 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.

 //

 // This software is released under a three-clause BSD license:

 //  * Redistributions of source code must retain the above copyright

 //    notice, this list of conditions and the following disclaimer.

 //  * Redistributions in binary form must reproduce the above copyright

 //    notice, this list of conditions and the following disclaimer in the

 //    documentation and/or other materials provided with the distribution.

 //  * Neither the name of any author or any participating institution

 //    may be used to endorse or promote products derived from this software

 //    without specific prior written permission.

 // For a full list of authors, refer to the file AUTHORS.

 // --------------------------------------------------------------------------

 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING

 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;

 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,

 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR

 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF

 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 //

 // --------------------------------------------------------------------------

 // $Maintainer: Timo Sachsenberg $

 // $Authors: $

 // --------------------------------------------------------------------------


 #pragma once


 #include <OpenMS/CONCEPT/Types.h>

 #include <OpenMS/CONCEPT/Exception.h>

 #include <OpenMS/DATASTRUCTURES/String.h>

 #include <OpenMS/MATH/STATISTICS/RegressionUtils.h>


 #include "Wm5Vector2.h"

 #include "Wm5ApprLineFit2.h"

 #include "Wm5LinearSystem.h"


 #include <cmath>

 #include <vector>


 namespace OpenMS

 {

   namespace Math

   {

     class OPENMS_DLLAPI LinearRegression

     {

 public:


       LinearRegression() :

         intercept_(0),

         slope_(0),

         x_intercept_(0),

         lower_(0),

         upper_(0),

         t_star_(0),

         r_squared_(0),

         stand_dev_residuals_(0),

         mean_residuals_(0),

         stand_error_slope_(0),

         chi_squared_(0),

         rsd_(0)

       {

       }


       virtual ~LinearRegression() = default;


       template <typename Iterator>

       void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness = true);


       template <typename Iterator>

       void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness = true);


       double getIntercept() const;

       double getSlope() const;

       double getXIntercept() const;

       double getLower() const;

       double getUpper() const;

       double getTValue() const;

       double getRSquared() const;

       double getStandDevRes() const;

       double getMeanRes() const;

       double getStandErrSlope() const;

       double getChiSquared() const;

       double getRSD() const;


       static inline double computePointY(double x, double slope, double intercept)

       {

         return slope * x + intercept;

       }


 protected:


       double intercept_;

       double slope_;

       double x_intercept_;

       double lower_;

       double upper_;

       double t_star_;

       double r_squared_;

       double stand_dev_residuals_;

       double mean_residuals_;

       double stand_error_slope_;

       double chi_squared_;

       double rsd_;


       void computeGoodness_(const std::vector<Wm5::Vector2d>& points, double confidence_interval_P);


       template <typename Iterator>

       double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept);


       template <typename Iterator>

       double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept);


 private:


       LinearRegression(const LinearRegression& arg);

       LinearRegression& operator=(const LinearRegression& arg);


     }; //class


     //x, y, w must be of same size

     template <typename Iterator>

     double LinearRegression::computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)

     {

       double chi_squared = 0.0;

       Iterator xIter = x_begin;

       Iterator yIter = y_begin;

       for (; xIter != x_end; ++xIter, ++yIter)

       {

         chi_squared += std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);

       }


       return chi_squared;

     }


     //x, y, w must be of same size

     template <typename Iterator>

     double LinearRegression::computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)

     {

       double chi_squared = 0.0;

       Iterator xIter = x_begin;

       Iterator yIter = y_begin;

       Iterator wIter = w_begin;

       for (; xIter != x_end; ++xIter, ++yIter, ++wIter)

       {

         chi_squared += *wIter * std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);

       }


       return chi_squared;

     }


     template <typename Iterator>

     void LinearRegression::computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness)

     {

       std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);


       // Compute the unweighted linear fit.

       // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X

       // and the value of Chi squared (sum( (y - evel(x))^2)

       bool pass = Wm5::HeightLineFit2<double>(static_cast<int>(points.size()), &points.front(), slope_, intercept_);

       chi_squared_ = computeChiSquare(x_begin, x_end, y_begin, slope_, intercept_);


       if (pass)

       {

         if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);

       }

       else

       {

         throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,

             "UnableToFit-LinearRegression", String("Could not fit a linear model to the data (") + points.size() + " points).");

       }

     }


     template <typename Iterator>

     void LinearRegression::computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness)

     {

       // Compute the weighted linear fit.

       // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X

       // and the value of Chi squared, the covariances of the intercept and the slope

       std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);

       // Compute sums for linear system. copy&paste from GeometricTools Wm5ApprLineFit2.cpp

       // and modified to allow weights

       int numPoints = static_cast<int>(points.size());

       double sumX = 0, sumY = 0;

       double sumXX = 0, sumXY = 0;

       double sumW = 0;

       Iterator wIter = w_begin;


       for (int i = 0; i < numPoints; ++i, ++wIter)

       {

         sumX += (*wIter) * points[i].X();

         sumY += (*wIter) * points[i].Y();

         sumXX += (*wIter) * points[i].X() * points[i].X();

         sumXY += (*wIter) * points[i].X() * points[i].Y();

         sumW += (*wIter);

       }

       //create matrices to solve Ax = B

       double A[2][2] =

       {

         {sumXX, sumX},

         {sumX, sumW}

       };

       double B[2] =

       {

         sumXY,

         sumY

       };

       double X[2];


       bool nonsingular = Wm5::LinearSystem<double>().Solve2(A, B, X);

       if (nonsingular)

       {

         slope_ = X[0];

         intercept_ = X[1];

       }

       chi_squared_ = computeWeightedChiSquare(x_begin, x_end, y_begin, w_begin, slope_, intercept_);


       if (nonsingular)

       {

         if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);

       }

       else

       {

         throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,

             "UnableToFit-LinearRegression", "Could not fit a linear model to the data");

       }

     }


   } // namespace Math

 } // namespace OpenMS


Exception.h

RegressionUtils.h

String.h

Types.h

Iterator

OpenMS::Exception::UnableToFit
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:684

OpenMS::Math::LinearRegression
This class offers functions to perform least-squares fits to a straight line model,...
Definition: LinearRegression.h:70

OpenMS::Math::LinearRegression::computeRegression
void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients  of the model  for the dataset .
Definition: LinearRegression.h:253

OpenMS::Math::LinearRegression::r_squared_
double r_squared_
The squared correlation coefficient (Pearson)
Definition: LinearRegression.h:188

OpenMS::Math::LinearRegression::getRSquared
double getRSquared() const
Non-mutable access to the squared Pearson coefficient.

OpenMS::Math::LinearRegression::computeGoodness_
void computeGoodness_(const std::vector< Wm5::Vector2d > &points, double confidence_interval_P)
Computes the goodness of the fitted regression line.

OpenMS::Math::LinearRegression::getIntercept
double getIntercept() const
Non-mutable access to the y-intercept of the straight line.

OpenMS::Math::LinearRegression::x_intercept_
double x_intercept_
The intercept of the fitted line with the x-axis.
Definition: LinearRegression.h:180

OpenMS::Math::LinearRegression::getUpper
double getUpper() const
Non-mutable access to the upper border of confidence interval.

OpenMS::Math::LinearRegression::LinearRegression
LinearRegression()
Constructor.
Definition: LinearRegression.h:74

OpenMS::Math::LinearRegression::lower_
double lower_
The lower bound of the confidence interval.
Definition: LinearRegression.h:182

OpenMS::Math::LinearRegression::~LinearRegression
virtual ~LinearRegression()=default
Destructor.

OpenMS::Math::LinearRegression::getXIntercept
double getXIntercept() const
Non-mutable access to the x-intercept of the straight line.

OpenMS::Math::LinearRegression::computePointY
static double computePointY(double x, double slope, double intercept)
given x compute y = slope * x + intercept
Definition: LinearRegression.h:168

OpenMS::Math::LinearRegression::upper_
double upper_
The upper bound of the confidence interval.
Definition: LinearRegression.h:184

OpenMS::Math::LinearRegression::operator=
LinearRegression & operator=(const LinearRegression &arg)
Not implemented.

OpenMS::Math::LinearRegression::computeWeightedChiSquare
double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
Compute the chi squared of a weighted linear fit.
Definition: LinearRegression.h:238

OpenMS::Math::LinearRegression::t_star_
double t_star_
The value of the t-statistic.
Definition: LinearRegression.h:186

OpenMS::Math::LinearRegression::getRSD
double getRSD() const
Non-mutable access to relative standard deviation.

OpenMS::Math::LinearRegression::computeChiSquare
double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
Compute the chi squared of a linear fit.
Definition: LinearRegression.h:223

OpenMS::Math::LinearRegression::getTValue
double getTValue() const
Non-mutable access to the value of the t-distribution.

OpenMS::Math::LinearRegression::getStandErrSlope
double getStandErrSlope() const
Non-mutable access to the standard error of the slope.

OpenMS::Math::LinearRegression::getSlope
double getSlope() const
Non-mutable access to the slope of the straight line.

OpenMS::Math::LinearRegression::getChiSquared
double getChiSquared() const
Non-mutable access to the chi squared value.

OpenMS::Math::LinearRegression::chi_squared_
double chi_squared_
The value of the Chi Squared statistic.
Definition: LinearRegression.h:196

OpenMS::Math::LinearRegression::intercept_
double intercept_
The intercept of the fitted line with the y-axis.
Definition: LinearRegression.h:176

OpenMS::Math::LinearRegression::slope_
double slope_
The slope of the fitted line.
Definition: LinearRegression.h:178

OpenMS::Math::LinearRegression::getLower
double getLower() const
Non-mutable access to the lower border of confidence interval.

OpenMS::Math::LinearRegression::mean_residuals_
double mean_residuals_
Mean of residuals.
Definition: LinearRegression.h:192

OpenMS::Math::LinearRegression::stand_dev_residuals_
double stand_dev_residuals_
The standard deviation of the residuals.
Definition: LinearRegression.h:190

OpenMS::Math::LinearRegression::computeRegressionWeighted
void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients  of the model  for the weighted da...
Definition: LinearRegression.h:275

OpenMS::Math::LinearRegression::LinearRegression
LinearRegression(const LinearRegression &arg)
Not implemented.

OpenMS::Math::LinearRegression::rsd_
double rsd_
the relative standard deviation
Definition: LinearRegression.h:198

OpenMS::Math::LinearRegression::getMeanRes
double getMeanRes() const
Non-mutable access to the residual mean.

OpenMS::Math::LinearRegression::stand_error_slope_
double stand_error_slope_
The standard error of the slope.
Definition: LinearRegression.h:194

OpenMS::Math::LinearRegression::getStandDevRes
double getStandDevRes() const
Non-mutable access to the standard deviation of the residuals.

OpenMS::String
A more convenient string class.
Definition: String.h:61

OpenMS::Math::iteratorRange2Wm5Vectors
std::vector< Wm5::Vector2d > iteratorRange2Wm5Vectors(Iterator x_begin, Iterator x_end, Iterator y_begin)
Copies the distance(x_begin,x_end) elements starting at x_begin and y_begin into the Wm5::Vector.
Definition: RegressionUtils.h:44

OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47