OpenMS
RANSAC.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: George Rosenberger $
6 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
11 #include <OpenMS/config.h>
12 
14 
19 
20 #include <limits> // std::numeric_limits
21 #include <vector> // std::vector
22 #include <sstream> // stringstream
23 
24 namespace OpenMS
25 {
26 
27  namespace Math
28  {
32  struct RANSACParam
33  {
36  : n(0), k(0), t(0), d(0), relative_d(false)
37  {
38  }
40  RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false)
41  : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d)
42  {
43  if (relative_d)
44  {
45  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
46  }
47  }
48 
49  [[nodiscard]] std::string toString() const
50  {
51  std::stringstream r;
52  r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
53  return r.str();
54  }
55 
56  size_t n;
57  size_t k;
58  double t;
59  size_t d;
60  bool relative_d;
61  };
62 
68  template<typename TModelType = RansacModelLinear>
69  class RANSAC
70  {
71 public:
72 
73  explicit RANSAC(uint64_t seed = time(nullptr)):
74  shuffler_(seed)
75  {}
76 
77  ~RANSAC() = default;
78 
79 
81  void setSeed(uint64_t seed)
82  {
83  shuffler_.seed(seed);
84  }
85 
87  std::vector<std::pair<double, double> > ransac(
88  const std::vector<std::pair<double, double> >& pairs,
89  const RANSACParam& p)
90  {
91  return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d);
92  }
93 
124  std::vector<std::pair<double, double> > ransac(
125  const std::vector<std::pair<double, double> >& pairs,
126  size_t n,
127  size_t k,
128  double t,
129  size_t d,
130  bool relative_d = false)
131  {
132  // translate relative percentages into actual numbers
133  if (relative_d)
134  {
135  if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
136  d = pairs.size() * d / 100;
137  }
138 
139  // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
140 
141  if (pairs.size() <= n)
142  {
143  throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
144  String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
145  }
146 
147  TModelType model;
148 
149  std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
150  std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
151  double besterror = std::numeric_limits<double>::max();
152  typename TModelType::ModelParameters coeff;
153  #ifdef DEBUG_RANSAC
154  std::pair<double, double > bestcoeff;
155  double betterrsq = 0;
156  double bestrsq = 0;
157  #endif
158 
159  for (size_t ransac_int=0; ransac_int<k; ransac_int++)
160  {
161  // check if the model already includes all points
162  if (bestdata.size() == pairs.size()) break;
163 
164  // use portable RNG in test mode
165  shuffler_.portable_random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
166 
167  // test 'maybeinliers'
168  try
169  { // fitting might throw UnableToFit if points are 'unfortunate'
170  coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
171  }
172  catch (...)
173  {
174  continue;
175  }
176  // apply model to remaining data; pick inliers
177  alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
178  // ... and add data
179  if (alsoinliers.size() > d
180  || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
181  {
182  betterdata.clear();
183  std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
184  betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
185  typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
186  double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
187  #ifdef DEBUG_RANSAC
188  betterrsq = model.rm_rsq(betterdata);
189  #endif
190 
191  // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
192  // If the number of points is equal, we trust rss.
193  // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
194  // these points fit, since its a simple absolute SUM() of residual error over all points.
195  if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
196  {
197  besterror = bettererror;
198  bestdata = betterdata;
199  #ifdef DEBUG_RANSAC
200  bestcoeff = bettercoeff;
201  bestrsq = betterrsq;
202  std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
203  #endif
204  }
205  }
206  }
207 
208  #ifdef DEBUG_RANSAC
209  std::cout << "=======STARTPOINTS=======" << std::endl;
210  for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
211  {
212  std::cout << it->first << "\t" << it->second << std::endl;
213  }
214  std::cout << "=======ENDPOINTS=======" << std::endl;
215  #endif
216 
217  return(bestdata);
218  } // ransac()
219 
220  private:
222  }; // class
223 
224  } // namespace Math
225 
226 
227 } // namespace OpenMS
Precondition failed exception.
Definition: Exception.h:133
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:70
void setSeed(uint64_t seed)
set seed for random shuffle
Definition: RANSAC.h:81
Math::RandomShuffler shuffler_
Definition: RANSAC.h:221
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:124
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:87
RANSAC(uint64_t seed=time(nullptr))
Definition: RANSAC.h:73
Definition: MathFunctions.h:382
void seed(uint64_t val)
Definition: MathFunctions.h:406
void portable_random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition: MathFunctions.h:397
A more convenient string class.
Definition: String.h:34
const double k
Definition: Constants.h:132
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:33
std::string toString() const
Definition: RANSAC.h:49
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition: RANSAC.h:59
size_t n
data points: The minimum number of data points required to fit the model
Definition: RANSAC.h:56
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition: RANSAC.h:58
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition: RANSAC.h:57
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false)
Full constructor.
Definition: RANSAC.h:40
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition: RANSAC.h:60
RANSACParam()
Default constructor.
Definition: RANSAC.h:35