nightly/html/RANSAC_8h_source.html

 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin

 // SPDX-License-Identifier: BSD-3-Clause

 //

 // --------------------------------------------------------------------------

 // $Maintainer: George Rosenberger $

 // $Authors: George Rosenberger, Hannes Roest, Chris Bielow $

 // --------------------------------------------------------------------------


 #pragma once


 #include <OpenMS/config.h>


 #include <OpenMS/ML/RANSAC/RANSACModel.h>


 #include <OpenMS/CONCEPT/Exception.h>

 #include <OpenMS/DATASTRUCTURES/String.h>

 #include <OpenMS/ML/RANSAC/RANSACModelLinear.h>

 #include <OpenMS/MATH/MathFunctions.h>


 #include <limits>       // std::numeric_limits

 #include <vector>       // std::vector

 #include <sstream>      // stringstream


 namespace OpenMS

 {


   namespace Math

   {

     struct RANSACParam

     {

       RANSACParam()

         : n(0), k(0), t(0), d(0), relative_d(false)

         {

         }

       RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d = false)

         : n(p_n), k(p_k), t(p_t), d(p_d), relative_d(p_relative_d)

       {

         if (relative_d)

         {

           if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));

         }

       }


       [[nodiscard]] std::string toString() const

       {

         std::stringstream r;

         r << "RANSAC param:\n  n: " << n << "\n  k: " << k << " iterations\n  t: " << t << " threshold\n  d: " << d << " inliers\n\n";

         return r.str();

       }


       size_t n;

       size_t k;

       double t;

       size_t d;

       bool relative_d;

     };


     template<typename TModelType = RansacModelLinear>

     class RANSAC

     {

 public:


       explicit RANSAC(uint64_t seed = time(nullptr)):

       shuffler_(seed)

       {}


       ~RANSAC() = default;


       void setSeed(uint64_t seed)

       {

         shuffler_.seed(seed);

       }


       std::vector<std::pair<double, double> > ransac(

         const std::vector<std::pair<double, double> >& pairs,

         const RANSACParam& p)

       {

         return ransac(pairs, p.n, p.k, p.t, p.d, p.relative_d);

       }


       std::vector<std::pair<double, double> > ransac(

           const std::vector<std::pair<double, double> >& pairs,

           size_t n,

           size_t k,

           double t,

           size_t d,

           bool relative_d = false)

       {

         // translate relative percentages into actual numbers

         if (relative_d)

         {

           if (d >= 100) throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("RANSAC: Relative 'd' >= 100% given. Use a lower value; the more outliers you expect, the lower it should be."));

           d = pairs.size() * d / 100;

         }


         // implementation of the RANSAC algorithm according to http://wiki.scipy.org/Cookbook/RANSAC.


         if (pairs.size() <= n)

         {

           throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,

                                         String("RANSAC: Number of total data points (") + String(pairs.size()) + ") must be larger than number of initial points (n=" + String(n) + ").");

         }


         TModelType model;


         std::vector< std::pair<double, double> > alsoinliers, betterdata, bestdata;

         std::vector<std::pair<double, double> > pairs_shuffled = pairs;  // mutable data. will be shuffled in every iteration

         double besterror = std::numeric_limits<double>::max();

         typename TModelType::ModelParameters coeff;

     #ifdef DEBUG_RANSAC

         std::pair<double, double > bestcoeff;

         double betterrsq = 0;

         double bestrsq = 0;

     #endif


         for (size_t ransac_int=0; ransac_int<k; ransac_int++)

         {

           // check if the model already includes all points

           if (bestdata.size() == pairs.size()) break;


           // use portable RNG in test mode

           shuffler_.portable_random_shuffle(pairs_shuffled.begin(), pairs_shuffled.end());


           // test 'maybeinliers'

           try

           { // fitting might throw UnableToFit if points are 'unfortunate'

             coeff = model.rm_fit(pairs_shuffled.begin(), pairs_shuffled.begin()+n);

           }

           catch (...)

           {

             continue;

           }

           // apply model to remaining data; pick inliers

           alsoinliers = model.rm_inliers(pairs_shuffled.begin()+n, pairs_shuffled.end(), coeff, t);

           // ... and add data

           if (alsoinliers.size() > d

               || alsoinliers.size() >= (pairs_shuffled.size()-n)) // maximum number of inliers we can possibly have (i.e. remaining data)

           {

             betterdata.clear();

             std::copy( pairs_shuffled.begin(), pairs_shuffled.begin()+n, back_inserter(betterdata) );

             betterdata.insert( betterdata.end(), alsoinliers.begin(), alsoinliers.end() );

             typename TModelType::ModelParameters bettercoeff = model.rm_fit(betterdata.begin(), betterdata.end());

             double bettererror = model.rm_rss(betterdata.begin(), betterdata.end(), bettercoeff);

     #ifdef DEBUG_RANSAC

             betterrsq = model.rm_rsq(betterdata);

     #endif


             // If the current model explains more points, we assume its better (these points pass the error threshold 't', so they should be ok);

             // If the number of points is equal, we trust rss.

             // E.g. imagine gaining a zillion more points (which pass the threshold!) -- then rss will automatically be worse, no matter how good

             //      these points fit, since its a simple absolute SUM() of residual error over all points.

             if (betterdata.size() > bestdata.size() || (betterdata.size() == bestdata.size() && (bettererror < besterror)))

             {

               besterror = bettererror;

               bestdata = betterdata;

     #ifdef DEBUG_RANSAC

               bestcoeff = bettercoeff;

               bestrsq = betterrsq;

               std::cout << "RANSAC " << ransac_int << ": Points: " << betterdata.size() << " RSQ: " << bestrsq << " Error: " << besterror << " c0: " << bestcoeff.first << " c1: " << bestcoeff.second << std::endl;

     #endif

             }

           }

         }


     #ifdef DEBUG_RANSAC

         std::cout << "=======STARTPOINTS=======" << std::endl;

         for (std::vector<std::pair<double, double> >::iterator it = bestdata.begin(); it != bestdata.end(); ++it)

         {

           std::cout << it->first << "\t" << it->second << std::endl;

         }

         std::cout << "=======ENDPOINTS=======" << std::endl;

     #endif


         return(bestdata);

       } // ransac()


     private:

       Math::RandomShuffler shuffler_{};

     }; // class


   } // namespace Math


 } // namespace OpenMS

Exception.h

MathFunctions.h

RANSACModelLinear.h

RANSACModel.h

String.h

OpenMS::Exception::Precondition
Precondition failed exception.
Definition: Exception.h:128

OpenMS::Math::RANSAC
This class provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:70

OpenMS::Math::RANSAC::setSeed
void setSeed(uint64_t seed)
set seed for random shuffle
Definition: RANSAC.h:81

OpenMS::Math::RANSAC::shuffler_
Math::RandomShuffler shuffler_
Definition: RANSAC.h:220

OpenMS::Math::RANSAC::~RANSAC
~RANSAC()=default

OpenMS::Math::RANSAC::ransac
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, size_t n, size_t k, double t, size_t d, bool relative_d=false)
This function provides a generic implementation of the RANSAC outlier detection algorithm....
Definition: RANSAC.h:123

OpenMS::Math::RANSAC::ransac
std::vector< std::pair< double, double > > ransac(const std::vector< std::pair< double, double > > &pairs, const RANSACParam &p)
alias for ransac() with full params
Definition: RANSAC.h:87

OpenMS::Math::RANSAC::RANSAC
RANSAC(uint64_t seed=time(nullptr))
Definition: RANSAC.h:73

OpenMS::Math::RandomShuffler
Definition: MathFunctions.h:411

OpenMS::Math::RandomShuffler::seed
void seed(uint64_t val)
Definition: MathFunctions.h:435

OpenMS::Math::RandomShuffler::portable_random_shuffle
void portable_random_shuffle(RandomAccessIterator first, RandomAccessIterator last)
Definition: MathFunctions.h:426

OpenMS::String
A more convenient string class.
Definition: String.h:34

OpenMS::Constants::k
const double k
Definition: Constants.h:132

OpenMS
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19

OpenMS::Math::RANSACParam
A simple struct to carry all the parameters required for a RANSAC run.
Definition: RANSAC.h:33

OpenMS::Math::RANSACParam::toString
std::string toString() const
Definition: RANSAC.h:49

OpenMS::Math::RANSACParam::d
size_t d
The number of close data values (according to 't') required to assert that a model fits well to data.
Definition: RANSAC.h:59

OpenMS::Math::RANSACParam::n
size_t n
data points: The minimum number of data points required to fit the model
Definition: RANSAC.h:56

OpenMS::Math::RANSACParam::t
double t
Threshold value: for determining when a data point fits a model. Corresponds to the maximal squared d...
Definition: RANSAC.h:58

OpenMS::Math::RANSACParam::k
size_t k
iterations: The maximum number of iterations allowed in the algorithm
Definition: RANSAC.h:57

OpenMS::Math::RANSACParam::RANSACParam
RANSACParam(size_t p_n, size_t p_k, double p_t, size_t p_d, bool p_relative_d=false)
Full constructor.
Definition: RANSAC.h:40

OpenMS::Math::RANSACParam::relative_d
bool relative_d
Should 'd' be interpreted as percentages (0-100) of data input size.
Definition: RANSAC.h:60

OpenMS::Math::RANSACParam::RANSACParam
RANSACParam()
Default constructor.
Definition: RANSAC.h:35