OpenMS
RANSAC.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: George Rosenberger $
6 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/config.h>
12 
14 
19 
20 #include <limits> // std::numeric_limits
21 #include <vector> // std::vector
22 #include <sstream> // stringstream
23 
24 namespace OpenMS
25 {
26 
27  namespace Math
28  {
32  struct RANSACParam
33  {
36  : n(0), k(0), t(0), d(0), relative_d(false)
37  {
38  }
40  RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false)
41  : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d)
42  {
43  if (relative_d)
44  {
45  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
46  }
47  }
48 
49  [[nodiscard]] std::string toString() const
50  {
51  std::stringstream r;
52  r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
53  return r.str();
54  }
55 
56  size_t n;
57  size_t k;
58  double t;
59  size_t d;
60  bool relative_d;
61  };
62 
68  template<typename TModelType = RansacModelLinear>
69  class RANSAC
70  {
71 public:
72 
73  explicit RANSAC(uint64_t seed = time(nullptr)):
74  shuffler_(seed)
75  {}
76 
77  ~RANSAC() = default;
78 
79 
81  void setSeed(uint64_t seed)
82  {
83  shuffler_.seed(seed);
84  }
85 
87  std::vector<std::pair<double, double> > ransac(
88  const std::vector<std::pair<double, double> >& pairs,
89  const RANSACParam& p)
90  {
91  return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d);
92  }
93 
123  std::vector<std::pair<double, double> > ransac(
124  const std::vector<std::pair<double, double> >& pairs,
125  size_t n,
126  size_t k,
127  double t,
128  size_t d,
129  bool relative_d = false)
130  {
131  // translate relative percentages into actual numbers
132  if (relative_d)
133  {
134  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
135  d = pairs.size() * d / 100;
136  }
137 
138  // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
139 
140  if (pairs.size() <= n)
141  {
142  throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
143  String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
144  }
145 
146  TModelType model;
147 
148  std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
149  std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
150  double besterror = std::numeric_limits<double>::max();
151  typename TModelType::ModelParameters coeff;
152  #ifdef DEBUG_RANSAC
153  std::pair<double, double > bestcoeff;
154  double betterrsq = 0;
155  double bestrsq = 0;
156  #endif
157 
158  for (size_t ransac_int=0; ransac_int<k; ransac_int++)
159  {
160  // check if the model already includes all points
161  if (bestdata.size() == pairs.size()) break;
162 
163  // use portable RNG in test mode
164  shuffler_.portable_random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
165 
166  // test 'maybeinliers'
167  try
168  { // fitting might throw UnableToFit if points are 'unfortunate'
169  coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
170  }
171  catch (...)
172  {
173  continue;
174  }
175  // apply model to remaining data; pick inliers
176  alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
177  // ... and add data
178  if (alsoinliers.size() > d
179  || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
180  {
181  betterdata.clear();
182  std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
183  betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
184  typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
185  double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
186  #ifdef DEBUG_RANSAC
187  betterrsq = model.rm_rsq(betterdata);
188  #endif
189 
190  // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
191  // If the number of points is equal, we trust rss.
192  // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
193  // these points fit, since its a simple absolute SUM() of residual error over all points.
194  if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
195  {
196  besterror = bettererror;
197  bestdata = betterdata;
198  #ifdef DEBUG_RANSAC
199  bestcoeff = bettercoeff;
200  bestrsq = betterrsq;
201  std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
202  #endif
203  }
204  }
205  }
206 
207  #ifdef DEBUG_RANSAC
208  std::cout << "=======STARTPOINTS=======" << std::endl;
209  for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
210  {
211  std::cout << it->first << "\t" << it->second << std::endl;
212  }
213  std::cout << "=======ENDPOINTS=======" << std::endl;
214  #endif
215 
216  return(bestdata);
217  } // ransac()
218 
219  private:
221  }; // class
222 
223  } // namespace Math
224 
225 
226 } // namespace OpenMS
Precondition failed exception.
Definition: Exception.h:128
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:70
void setSeed(uint64_t seed)
set seed for random shuffle
Definition: RANSAC.h:81
Math::RandomShuffler shuffler_
Definition: RANSAC.h:220
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:123
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:87
RANSAC(uint64_t seed=time(nullptr))
Definition: RANSAC.h:73
Definition: MathFunctions.h:411
void seed(uint64_t val)
Definition: MathFunctions.h:435
void portable_random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition: MathFunctions.h:426
A more convenient string class.
Definition: String.h:34
const double k
Definition: Constants.h:132
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:33
std::string toString() const
Definition: RANSAC.h:49
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition: RANSAC.h:59
size_t n
data points: The minimum number of data points required to fit the model
Definition: RANSAC.h:56
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition: RANSAC.h:58
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition: RANSAC.h:57
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false)
Full constructor.
Definition: RANSAC.h:40
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition: RANSAC.h:60
RANSACParam()
Default constructor.
Definition: RANSAC.h:35