OpenMS  2.7.0
LinearRegression.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include "Wm5Vector2.h"
43 #include "Wm5ApprLineFit2.h"
44 #include "Wm5LinearSystem.h"
45 
46 #include <cmath>
47 #include <vector>
48 
49 namespace OpenMS
50 {
51  namespace Math
52  {
69  class OPENMS_DLLAPI LinearRegression
70  {
71 public:
72 
75  intercept_(0),
76  slope_(0),
77  x_intercept_(0),
78  lower_(0),
79  upper_(0),
80  t_star_(0),
81  r_squared_(0),
82  stand_dev_residuals_(0),
83  mean_residuals_(0),
84  stand_error_slope_(0),
85  chi_squared_(0),
86  rsd_(0)
87  {
88  }
89 
91  virtual ~LinearRegression() = default;
92 
114  template <typename Iterator>
115  void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness = true);
116 
139  template <typename Iterator>
140  void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness = true);
141 
143  double getIntercept() const;
145  double getSlope() const;
147  double getXIntercept() const;
149  double getLower() const;
151  double getUpper() const;
153  double getTValue() const;
155  double getRSquared() const;
157  double getStandDevRes() const;
159  double getMeanRes() const;
161  double getStandErrSlope() const;
163  double getChiSquared() const;
165  double getRSD() const;
166 
168  static inline double computePointY(double x, double slope, double intercept)
169  {
170  return slope * x + intercept;
171  }
172 
173 protected:
174 
176  double intercept_;
178  double slope_;
180  double x_intercept_;
182  double lower_;
184  double upper_;
186  double t_star_;
188  double r_squared_;
196  double chi_squared_;
198  double rsd_;
199 
200 
202  void computeGoodness_(const std::vector<Wm5::Vector2d>& points, double confidence_interval_P);
203 
205  template <typename Iterator>
206  double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept);
207 
209  template <typename Iterator>
210  double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept);
211 
212 private:
213 
218 
219  }; //class
220 
221  //x, y, w must be of same size
222  template <typename Iterator>
223  double LinearRegression::computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
224  {
225  double chi_squared = 0.0;
226  Iterator xIter = x_begin;
227  Iterator yIter = y_begin;
228  for (; xIter != x_end; ++xIter, ++yIter)
229  {
230  chi_squared += std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
231  }
232 
233  return chi_squared;
234  }
235 
236  //x, y, w must be of same size
237  template <typename Iterator>
238  double LinearRegression::computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
239  {
240  double chi_squared = 0.0;
241  Iterator xIter = x_begin;
242  Iterator yIter = y_begin;
243  Iterator wIter = w_begin;
244  for (; xIter != x_end; ++xIter, ++yIter, ++wIter)
245  {
246  chi_squared += *wIter * std::pow(*yIter - computePointY(*xIter, slope, intercept), 2);
247  }
248 
249  return chi_squared;
250  }
251 
252  template <typename Iterator>
253  void LinearRegression::computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness)
254  {
255  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
256 
257  // Compute the unweighted linear fit.
258  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
259  // and the value of Chi squared (sum( (y - evel(x))^2)
260  bool pass = Wm5::HeightLineFit2<double>(static_cast<int>(points.size()), &points.front(), slope_, intercept_);
261  chi_squared_ = computeChiSquare(x_begin, x_end, y_begin, slope_, intercept_);
262 
263  if (pass)
264  {
265  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
266  }
267  else
268  {
269  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
270  "UnableToFit-LinearRegression", String("Could not fit a linear model to the data (") + points.size() + " points).");
271  }
272  }
273 
274  template <typename Iterator>
275  void LinearRegression::computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness)
276  {
277  // Compute the weighted linear fit.
278  // Get the intercept and the slope of the regression Y_hat=intercept_+slope_*X
279  // and the value of Chi squared, the covariances of the intercept and the slope
280  std::vector<Wm5::Vector2d> points = iteratorRange2Wm5Vectors(x_begin, x_end, y_begin);
281  // Compute sums for linear system. copy&paste from GeometricTools Wm5ApprLineFit2.cpp
282  // and modified to allow weights
283  int numPoints = static_cast<int>(points.size());
284  double sumX = 0, sumY = 0;
285  double sumXX = 0, sumXY = 0;
286  double sumW = 0;
287  Iterator wIter = w_begin;
288 
289  for (int i = 0; i < numPoints; ++i, ++wIter)
290  {
291  sumX += (*wIter) * points[i].X();
292  sumY += (*wIter) * points[i].Y();
293  sumXX += (*wIter) * points[i].X() * points[i].X();
294  sumXY += (*wIter) * points[i].X() * points[i].Y();
295  sumW += (*wIter);
296  }
297  //create matrices to solve Ax = B
298  double A[2][2] =
299  {
300  {sumXX, sumX},
301  {sumX, sumW}
302  };
303  double B[2] =
304  {
305  sumXY,
306  sumY
307  };
308  double X[2];
309 
310  bool nonsingular = Wm5::LinearSystem<double>().Solve2(A, B, X);
311  if (nonsingular)
312  {
313  slope_ = X[0];
314  intercept_ = X[1];
315  }
316  chi_squared_ = computeWeightedChiSquare(x_begin, x_end, y_begin, w_begin, slope_, intercept_);
317 
318  if (nonsingular)
319  {
320  if (compute_goodness && points.size() > 2) computeGoodness_(points, confidence_interval_P);
321  }
322  else
323  {
324  throw Exception::UnableToFit(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
325  "UnableToFit-LinearRegression", "Could not fit a linear model to the data");
326  }
327  }
328 
329  } // namespace Math
330 } // namespace OpenMS
331 
332 
Exception used if an error occurred while fitting a model to a given dataset.
Definition: Exception.h:684
This class offers functions to perform least-squares fits to a straight line model,...
Definition: LinearRegression.h:70
void computeRegression(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the dataset .
Definition: LinearRegression.h:253
double r_squared_
The squared correlation coefficient (Pearson)
Definition: LinearRegression.h:188
double getRSquared() const
Non-mutable access to the squared Pearson coefficient.
void computeGoodness_(const std::vector< Wm5::Vector2d > &points, double confidence_interval_P)
Computes the goodness of the fitted regression line.
double getIntercept() const
Non-mutable access to the y-intercept of the straight line.
double x_intercept_
The intercept of the fitted line with the x-axis.
Definition: LinearRegression.h:180
double getUpper() const
Non-mutable access to the upper border of confidence interval.
LinearRegression()
Constructor.
Definition: LinearRegression.h:74
double lower_
The lower bound of the confidence interval.
Definition: LinearRegression.h:182
virtual ~LinearRegression()=default
Destructor.
double getXIntercept() const
Non-mutable access to the x-intercept of the straight line.
static double computePointY(double x, double slope, double intercept)
given x compute y = slope * x + intercept
Definition: LinearRegression.h:168
double upper_
The upper bound of the confidence interval.
Definition: LinearRegression.h:184
LinearRegression & operator=(const LinearRegression &arg)
Not implemented.
double computeWeightedChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, double slope, double intercept)
Compute the chi squared of a weighted linear fit.
Definition: LinearRegression.h:238
double t_star_
The value of the t-statistic.
Definition: LinearRegression.h:186
double getRSD() const
Non-mutable access to relative standard deviation.
double computeChiSquare(Iterator x_begin, Iterator x_end, Iterator y_begin, double slope, double intercept)
Compute the chi squared of a linear fit.
Definition: LinearRegression.h:223
double getTValue() const
Non-mutable access to the value of the t-distribution.
double getStandErrSlope() const
Non-mutable access to the standard error of the slope.
double getSlope() const
Non-mutable access to the slope of the straight line.
double getChiSquared() const
Non-mutable access to the chi squared value.
double chi_squared_
The value of the Chi Squared statistic.
Definition: LinearRegression.h:196
double intercept_
The intercept of the fitted line with the y-axis.
Definition: LinearRegression.h:176
double slope_
The slope of the fitted line.
Definition: LinearRegression.h:178
double getLower() const
Non-mutable access to the lower border of confidence interval.
double mean_residuals_
Mean of residuals.
Definition: LinearRegression.h:192
double stand_dev_residuals_
The standard deviation of the residuals.
Definition: LinearRegression.h:190
void computeRegressionWeighted(double confidence_interval_P, Iterator x_begin, Iterator x_end, Iterator y_begin, Iterator w_begin, bool compute_goodness=true)
This function computes the best-fit linear regression coefficients of the model for the weighted da...
Definition: LinearRegression.h:275
LinearRegression(const LinearRegression &arg)
Not implemented.
double rsd_
the relative standard deviation
Definition: LinearRegression.h:198
double getMeanRes() const
Non-mutable access to the residual mean.
double stand_error_slope_
The standard error of the slope.
Definition: LinearRegression.h:194
double getStandDevRes() const
Non-mutable access to the standard deviation of the residuals.
A more convenient string class.
Definition: String.h:61
std::vector< Wm5::Vector2d > iteratorRange2Wm5Vectors(Iterator x_begin, Iterator x_end, Iterator y_begin)
Copies the distance(x_begin,x_end) elements starting at x_begin and y_begin into the Wm5::Vector.
Definition: RegressionUtils.h:44
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47