OpenMS
Loading...
Searching...
No Matches
RANSAC.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: George Rosenberger $
6// $Authors: George Rosenberger, Hannes Roest, Chris Bielow $
7// --------------------------------------------------------------------------
8
9#pragma once
10
11#include <OpenMS/config.h>
12
14
19
20#include <limits> // std::numeric_limits
21#include <vector> // std::vector
22#include <sstream> // stringstream
23
24namespace OpenMS
25{
26
27 namespace Math
28 {
33 {
36 : n(0), k(0), t(0), d(0), relative_d(false)
37 {
38 }
40 RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false)
41 : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d)
42 {
43 if (relative_d)
44 {
45 if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
46 }
47 }
48
49 [[nodiscard]] std::string toString() const
50 {
51 std::stringstream r;
52 r << "RANSAC param:\n n: " << n << "\n k: " << k << " iterations\n t: " << t << " threshold\n d: " << d << " inliers\n\n";
53 return r.str();
54 }
55
56 size_t n;
57 size_t k;
58 double t;
59 size_t d;
61 };
62
68 template<typename TModelType = RansacModelLinear>
69 class RANSAC
70 {
71public:
72
73 explicit RANSAC(uint64_t seed = time(nullptr)):
74 shuffler_(seed)
75 {}
76
77 ~RANSAC() = default;
78
79
81 void setSeed(uint64_t seed)
82 {
83 shuffler_.seed(seed);
84 }
85
87 std::vector<std::pair<double, double> > ransac(
88 const std::vector<std::pair<double, double> >& pairs,
89 const RANSACParam& p)
90 {
91 return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d);
92 }
93
123 std::vector<std::pair<double, double> > ransac(
124 const std::vector<std::pair<double, double> >& pairs,
125 size_t n,
126 size_t k,
127 double t,
128 size_t d,
129 bool relative_d = false)
130 {
131 // translate relative percentages into actual numbers
132 if (relative_d)
133 {
134 if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));
135 d = pairs.size() * d / 100;
136 }
137
138 // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.
139
140 if (pairs.size() <= n)
141 {
142 throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
143 String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");
144 }
145
146 TModelType model;
147
148 std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;
149 std::vector<std::pair<double, double> > pairs_shuffled = pairs; // mutable data. will be shuffled in every iteration
150 double besterror = std::numeric_limits<double>::max();
151 typename TModelType::ModelParameters coeff;
152 #ifdef DEBUG_RANSAC
153 std::pair<double, double > bestcoeff;
154 double betterrsq = 0;
155 double bestrsq = 0;
156 #endif
157
158 for (size_t ransac_int=0; ransac_int<k; ransac_int++)
159 {
160 // check if the model already includes all points
161 if (bestdata.size() == pairs.size()) break;
162
163 // use portable RNG in test mode
164 shuffler_.portable_random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());
165
166 // test 'maybeinliers'
167 try
168 { // fitting might throw UnableToFit if points are 'unfortunate'
169 coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);
170 }
171 catch (...)
172 {
173 continue;
174 }
175 // apply model to remaining data; pick inliers
176 alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);
177 // ... and add data
178 if (alsoinliers.size() > d
179 || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)
180 {
181 betterdata.clear();
182 std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );
183 betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );
184 typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());
185 double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);
186 #ifdef DEBUG_RANSAC
187 betterrsq = model.rm_rsq(betterdata);
188 #endif
189
190 // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);
191 // If the number of points is equal, we trust rss.
192 // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good
193 // these points fit, since its a simple absolute SUM() of residual error over all points.
194 if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))
195 {
196 besterror = bettererror;
197 bestdata = betterdata;
198 #ifdef DEBUG_RANSAC
199 bestcoeff = bettercoeff;
200 bestrsq = betterrsq;
201 std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;
202 #endif
203 }
204 }
205 }
206
207 #ifdef DEBUG_RANSAC
208 std::cout << "=======STARTPOINTS=======" << std::endl;
209 for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)
210 {
211 std::cout << it->first << "\t" << it->second << std::endl;
212 }
213 std::cout << "=======ENDPOINTS=======" << std::endl;
214 #endif
215
216 return(bestdata);
217 } // ransac()
218
219 private:
221 }; // class
222
223 } // namespace Math
224
225
226} // namespace OpenMS
Precondition failed exception.
Definition Exception.h:128
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition RANSAC.h:70
void setSeed(uint64_t seed)
set seed for random shuffle
Definition RANSAC.h:81
Math::RandomShuffler shuffler_
Definition RANSAC.h:220
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition RANSAC.h:87
RANSAC(uint64_t seed=time(nullptr))
Definition RANSAC.h:73
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition RANSAC.h:123
Definition MathFunctions.h:477
void seed(uint64_t val)
Definition MathFunctions.h:501
void portable_random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition MathFunctions.h:492
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
A simple struct to carry all the parameters required for a RANSAC run.
Definition RANSAC.h:33
std::string toString() const
Definition RANSAC.h:49
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition RANSAC.h:59
size_t n
data points: The minimum number of data points required to fit the model
Definition RANSAC.h:56
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition RANSAC.h:58
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition RANSAC.h:57
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false)
Full constructor.
Definition RANSAC.h:40
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition RANSAC.h:60
RANSACParam()
Default constructor.
Definition RANSAC.h:35