latest/html/IntegerMassDecomposer_8h_source.html

 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin

 // SPDX-License-Identifier: BSD-3-Clause

 //

 // --------------------------------------------------------------------------

 // $Maintainer: Timo Sachsenberg $

 // $Authors: Anton Pervukhin <Anton.Pervukhin@CeBiTec.Uni-Bielefeld.DE> $

 // --------------------------------------------------------------------------

 //


 #pragma once


 #include <vector>

 #include <utility>


 #include <OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/Weights.h>

 #include <OpenMS/CHEMISTRY/MASSDECOMPOSITION/IMS/MassDecomposer.h>


 #include <OpenMS/MATH/MathFunctions.h>


 namespace OpenMS

 {


   namespace ims

   {


     template <typename ValueType = long unsigned int,

               typename DecompositionValueType = unsigned int>

     class IntegerMassDecomposer :

       public MassDecomposer<ValueType, DecompositionValueType>

     {

 public:

       typedef typename MassDecomposer<ValueType, DecompositionValueType>::value_type value_type;


       typedef typename MassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type decomposition_value_type;


       typedef typename MassDecomposer<ValueType, DecompositionValueType>::decomposition_type decomposition_type;


       typedef typename MassDecomposer<ValueType, DecompositionValueType>::decompositions_type decompositions_type;


       typedef typename decomposition_type::size_type size_type;


       explicit IntegerMassDecomposer(const Weights & alphabet);


       bool exist(value_type mass) override;


       decomposition_type getDecomposition(value_type mass) override;


       decompositions_type getAllDecompositions(value_type mass) override;


       decomposition_value_type getNumberOfDecompositions(value_type mass) override;


 private:


       typedef std::vector<std::pair<size_type, decomposition_value_type> > witness_vector_type;


       typedef std::vector<value_type> residues_table_row_type;


       typedef std::vector<residues_table_row_type> residues_table_type;


       Weights alphabet_;


       residues_table_type ertable_;


       residues_table_row_type lcms_;


       residues_table_row_type mass_in_lcms_;


       value_type infty_;


       witness_vector_type witness_vector_;


       void fillExtendedResidueTable_(const Weights & _alphabet, residues_table_row_type & _lcms,

                                      residues_table_row_type & _mass_in_lcms, const value_type _infty,

                                      witness_vector_type & _witness_vector, residues_table_type & _ertable);


       void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,

                                              decomposition_type decomposition, decompositions_type & decompositionsStore);

     };


     template <typename ValueType, typename DecompositionValueType>

     IntegerMassDecomposer<ValueType, DecompositionValueType>::IntegerMassDecomposer(

       const Weights & alphabet) :

       alphabet_(alphabet)

     {


       lcms_.resize(alphabet.size());

       mass_in_lcms_.resize(alphabet.size());


       infty_ = alphabet.getWeight(0) * alphabet.getWeight(alphabet.size() - 1);


       fillExtendedResidueTable_(alphabet, lcms_, mass_in_lcms_, infty_, witness_vector_, ertable_);


     }


     template <typename ValueType, typename DecompositionValueType>

     void IntegerMassDecomposer<ValueType, DecompositionValueType>::fillExtendedResidueTable_(

       const Weights & _alphabet, residues_table_row_type & _lcms, residues_table_row_type & _mass_in_lcms,

       const value_type _infty, witness_vector_type & _witnessVector, residues_table_type & _ertable)

     {


       if (_alphabet.size() < 2)

       {

         return;

       }

       // caches the most often used mass - smallest mass

       value_type smallestMass = _alphabet.getWeight(0), secondMass = _alphabet.getWeight(1);


       // initializes table: infinity everywhere except in the first field of every column

       _ertable.reserve(_alphabet.size());

       _ertable.assign(_alphabet.size(), std::vector<value_type>(smallestMass, _infty));


       for (size_type i = 0; i < _alphabet.size(); ++i)

       {

         _ertable[i][0] = 0;

       }


       // initializes witness vector

       _witnessVector.resize(smallestMass);


       // fills second column (the first one is already correct)

       size_type it_inc = secondMass % smallestMass, witness = 1;

       //typename residues_table_row_type::iterator it = _ertable[1].begin() + it_inc;

       value_type mass = secondMass;

       // initializes counter to create a witness vector

       decomposition_value_type counter = 0;

       size_type it_i = it_inc;

       while (it_i != 0)

       {

         _ertable[1][it_i] = mass;

         mass += secondMass;

         ++counter;

         _witnessVector[it_i] = std::make_pair(witness, counter);

         //std::cerr << "BLA: " << counter << " " << &_ertable[1][0] << " " << it - _ertable[1].begin() << " " << _ertable[1].size() << std::endl;

         it_i += it_inc;

         if (it_i >= _ertable[1].size())

         {

           it_i -= _ertable[1].size();

         }

       }

       // fills cache variables for i==1

       value_type tmp_d = Math::gcd(smallestMass, secondMass);

       _lcms[1] = secondMass * smallestMass / tmp_d;

       _mass_in_lcms[1] = smallestMass / tmp_d;


       // fills remaining table. i is the column index.

       for (size_type i = 2; i < _alphabet.size(); ++i)

       {

         // caches often used i-th alphabet mass

         value_type currentMass = _alphabet.getWeight(i);


         value_type d = Math::gcd(smallestMass, currentMass);


         // fills cache for various variables.

         // note that values for i==0 are never assigned since they're unused anyway.

         _lcms[i] = currentMass * smallestMass / d;

         _mass_in_lcms[i] = smallestMass / d;


         // Nijenhuis' improvement: Is currentMass composable with smaller alphabet?

         if (currentMass >= _ertable[i - 1][currentMass % smallestMass])

         {

           _ertable[i] = _ertable[i - 1];

           continue;

         }


         const residues_table_row_type & prev_column = _ertable[i - 1];

         residues_table_row_type & cur_column = _ertable[i];


         if (d == 1)

         {

           // This loop is for the case that the gcd is 1. The optimization used below

           // is not applicable here.


           // p_inc is used to change residue (p) efficiently

           size_type p_inc = currentMass % smallestMass;


           // n is the value that will be written into the table

           value_type n = 0;

           // current residue (in paper variable 'r' is used)

           size_type p = 0;

           // counter for creation of witness vector

           decomposition_value_type local_counter = 0;


           for (size_type m = smallestMass; m > 0; --m)

           {

             n += currentMass;

             p += p_inc;

             ++local_counter;

             if (p >= smallestMass)

             {

               p -= smallestMass;

             }

             if (n > prev_column[p])

             {

               n = prev_column[p];

               local_counter = 0;

             }

             else

             {

               _witnessVector[p] = std::make_pair(i, local_counter);

             }

             cur_column[p] = n;

           }

         }

         else

         {

           // If we're here, the gcd is not 1. We can use the following cache-optimized

           // version of the algorithm. The trick is to put the iteration over all

           // residue classes into the _inner_ loop.

           //

           // One could see it as going through one column in blocks which are gcd entries long.

           size_type cur = currentMass % smallestMass;

           size_type prev = 0;

           size_type p_inc = cur - d;

           // counters for creation of one witness vector

           std::vector<decomposition_value_type> counters(smallestMass);


           // copies first block from prev_column to cur_column

           for (size_type j = 1; j < d; ++j)

           {

             cur_column[j] = prev_column[j];

           }


           // first loop: goes through all blocks, updating cur_column for the first time.

           for (size_type m = smallestMass / d; m > 1; m--)

           {

             // r: current residue class

             for (size_type r = 0; r < d; r++)

             {


               ++counters[cur];

               if (cur_column[prev] + currentMass > prev_column[cur])

               {

                 cur_column[cur] = prev_column[cur];

                 counters[cur] = 0;

               }

               else

               {

                 cur_column[cur] = cur_column[prev] + currentMass;

                 _witnessVector[cur] = std::make_pair(i, counters[cur]);

               }


               prev++;

               cur++;

             }


             prev = cur - d;


             // this does: cur = (cur + currentMass) % smallestMass - d;

             cur += p_inc;

             if (cur >= smallestMass)

             {

               cur -= smallestMass;

             }

           }


           // second loop:

           bool cont = true;

           while (cont)

           {

             cont = false;

             prev++;

             cur++;

             ++counters[cur];

             for (size_type r = 1; r < d; ++r)

             {

               if (cur_column[prev] + currentMass < cur_column[cur])

               {

                 cur_column[cur] = cur_column[prev] + currentMass;

                 cont = true;

                 _witnessVector[cur] = std::make_pair(i, counters[cur]);

               }

               else

               {

                 counters[cur] = 0;

               }

               prev++;

               cur++;

             }


             prev = cur - d;


             cur += p_inc;

             if (cur >= smallestMass)

             {

               cur -= smallestMass;

             }

           }

         }


       }

     }


     template <typename ValueType, typename DecompositionValueType>

     bool IntegerMassDecomposer<ValueType, DecompositionValueType>::

     exist(value_type mass)

     {


       value_type residue = ertable_.back().at(mass % alphabet_.getWeight(0));

       return residue != infty_ && mass >= residue;

     }


     template <typename ValueType, typename DecompositionValueType>

     typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_type

     IntegerMassDecomposer<ValueType, DecompositionValueType>::getDecomposition(value_type mass)

     {


       decomposition_type decomposition;

       if (!this->exist(mass))

       {

         return decomposition;

       }


       decomposition.reserve(alphabet_.size());

       decomposition.resize(alphabet_.size());


       // initial mass residue: in FIND-ONE algorithm in paper corresponds variable "r"

       value_type r = mass % alphabet_.getWeight(0);

       value_type m = ertable_.back().at(r);


       decomposition.at(0) = static_cast<decomposition_value_type>

                             ((mass - m) / alphabet_.getWeight(0));


       while (m != 0)

       {

         size_type i = witness_vector_.at(r).first;

         decomposition_value_type j = witness_vector_.at(r).second;

         decomposition.at(i) += j;

         if (m < j * alphabet_.getWeight(i))

         {

           break;

         }

         m -= j * alphabet_.getWeight(i);

         r = m % alphabet_.getWeight(0);

       }

       return decomposition;

     }


     template <typename ValueType, typename DecompositionValueType>

     typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decompositions_type

     IntegerMassDecomposer<ValueType, DecompositionValueType>::getAllDecompositions(value_type mass)

     {

       decompositions_type decompositionsStore;

       decomposition_type decomposition(alphabet_.size());

       collectDecompositionsRecursively_(mass, alphabet_.size() - 1, decomposition, decompositionsStore);

       return decompositionsStore;

     }


     template <typename ValueType, typename DecompositionValueType>

     void IntegerMassDecomposer<ValueType, DecompositionValueType>::

     collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,

                                       decomposition_type decomposition, decompositions_type & decompositionsStore)

     {

       if (alphabetMassIndex == 0)

       {

         value_type numberOfMasses0 = mass / alphabet_.getWeight(0);

         if (numberOfMasses0 * alphabet_.getWeight(0) == mass)

         {

           decomposition[0] = static_cast<decomposition_value_type>(numberOfMasses0);

           decompositionsStore.push_back(decomposition);

         }

         return;

       }


       // tested: caching these values gives us 15% better performance, at least

       // with aminoacid-mono.masses

       const value_type lcm = lcms_[alphabetMassIndex];

       const value_type mass_in_lcm = mass_in_lcms_[alphabetMassIndex]; // this is alphabet mass divided by gcd


       value_type mass_mod_alphabet0 = mass % alphabet_.getWeight(0); // trying to avoid modulo

       const value_type mass_mod_decrement = alphabet_.getWeight(alphabetMassIndex) % alphabet_.getWeight(0);


       for (value_type i = 0; i < mass_in_lcm; ++i)

       {

         // here is the conversion from value_type to decomposition_value_type

         decomposition[alphabetMassIndex] = static_cast<decomposition_value_type>(i);


         // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge

         // and that will end up in infinite loop

         if (mass < i * alphabet_.getWeight(alphabetMassIndex))

         {

           break;

         }


         // r: current residue class. will stay the same in the following loop

         value_type r = ertable_[alphabetMassIndex - 1][mass_mod_alphabet0];


         // TODO: if infty was std::numeric_limits<...>... the following 'if' would not be necessary

         if (r != infty_)

         {

           for (value_type m = mass - i * alphabet_.getWeight(alphabetMassIndex); m >= r; m -= lcm)

           {

             // the condition of the 'for' loop (m >= r) and decrementing the mass

             // in steps of the lcm ensures that m is decomposable. Therefore

             // the recursion will result in at least one witness.

             collectDecompositionsRecursively_(m, alphabetMassIndex - 1, decomposition, decompositionsStore);

             decomposition[alphabetMassIndex] += mass_in_lcm;

             // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge

             // and that will end up in infinite loop

             if (m < lcm)

             {

               break;

             }

           }

         }

         // subtle way of changing the modulo, instead of plain calculation it from (mass - i*currentAlphabetMass) % alphabetMass0 every time

         if (mass_mod_alphabet0 < mass_mod_decrement)

         {

           mass_mod_alphabet0 += alphabet_.getWeight(0) - mass_mod_decrement;

         }

         else

         {

           mass_mod_alphabet0 -= mass_mod_decrement;

         }

       }


     }


     template <typename ValueType, typename DecompositionValueType>

     typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type IntegerMassDecomposer<ValueType,

                                                                                                                       DecompositionValueType>::getNumberOfDecompositions(value_type mass)

     {

       return static_cast<typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type>(getAllDecompositions(mass).size());

     }


   } // namespace ims

 } // namespace OpenMS


MassDecomposer.h

MathFunctions.h

Weights.h

OpenMS::ims::IntegerMassDecomposer
Implements MassDecomposer interface using algorithm and data structures described in paper "Efficient...
Definition: IntegerMassDecomposer.h:46

OpenMS::ims::IntegerMassDecomposer::lcms_
residues_table_row_type lcms_
Definition: IntegerMassDecomposer.h:137

OpenMS::ims::IntegerMassDecomposer::alphabet_
Weights alphabet_
Definition: IntegerMassDecomposer.h:124

OpenMS::ims::IntegerMassDecomposer::infty_
value_type infty_
Definition: IntegerMassDecomposer.h:149

OpenMS::ims::IntegerMassDecomposer::IntegerMassDecomposer
IntegerMassDecomposer(const Weights &alphabet)
Definition: IntegerMassDecomposer.h:178

OpenMS::ims::IntegerMassDecomposer::residues_table_type
std::vector< residues_table_row_type > residues_table_type
Definition: IntegerMassDecomposer.h:119

OpenMS::ims::IntegerMassDecomposer::witness_vector_type
std::vector< std::pair< size_type, decomposition_value_type > > witness_vector_type
Definition: IntegerMassDecomposer.h:109

OpenMS::ims::IntegerMassDecomposer::collectDecompositionsRecursively_
void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex, decomposition_type decomposition, decompositions_type &decompositionsStore)
Definition: IntegerMassDecomposer.h:447

OpenMS::ims::IntegerMassDecomposer::getAllDecompositions
decompositions_type getAllDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:437

OpenMS::ims::IntegerMassDecomposer::residues_table_row_type
std::vector< value_type > residues_table_row_type
Definition: IntegerMassDecomposer.h:114

OpenMS::ims::IntegerMassDecomposer::mass_in_lcms_
residues_table_row_type mass_in_lcms_
Definition: IntegerMassDecomposer.h:144

OpenMS::ims::IntegerMassDecomposer::getDecomposition
decomposition_type getDecomposition(value_type mass) override
Definition: IntegerMassDecomposer.h:401

OpenMS::ims::IntegerMassDecomposer::decomposition_type
MassDecomposer< ValueType, DecompositionValueType >::decomposition_type decomposition_type
Type of decomposition.
Definition: IntegerMassDecomposer.h:55

OpenMS::ims::IntegerMassDecomposer::size_type
decomposition_type::size_type size_type
Type of decomposition's size.
Definition: IntegerMassDecomposer.h:61

OpenMS::ims::IntegerMassDecomposer::witness_vector_
witness_vector_type witness_vector_
Definition: IntegerMassDecomposer.h:155

OpenMS::ims::IntegerMassDecomposer::exist
bool exist(value_type mass) override
Definition: IntegerMassDecomposer.h:392

OpenMS::ims::IntegerMassDecomposer::decomposition_value_type
MassDecomposer< ValueType, DecompositionValueType >::decomposition_value_type decomposition_value_type
Type of decomposition value.
Definition: IntegerMassDecomposer.h:52

OpenMS::ims::IntegerMassDecomposer::ertable_
residues_table_type ertable_
Definition: IntegerMassDecomposer.h:131

OpenMS::ims::IntegerMassDecomposer::decompositions_type
MassDecomposer< ValueType, DecompositionValueType >::decompositions_type decompositions_type
Type of container for many decompositions.
Definition: IntegerMassDecomposer.h:58

OpenMS::ims::IntegerMassDecomposer::value_type
MassDecomposer< ValueType, DecompositionValueType >::value_type value_type
Type of value to be decomposed.
Definition: IntegerMassDecomposer.h:49

OpenMS::ims::IntegerMassDecomposer::getNumberOfDecompositions
decomposition_value_type getNumberOfDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:524

OpenMS::ims::IntegerMassDecomposer::fillExtendedResidueTable_
void fillExtendedResidueTable_(const Weights &_alphabet, residues_table_row_type &_lcms, residues_table_row_type &_mass_in_lcms, const value_type _infty, witness_vector_type &_witness_vector, residues_table_type &_ertable)
Definition: IntegerMassDecomposer.h:193

OpenMS::ims::MassDecomposer
An interface to handle decomposing of integer values/masses over a set of integer weights (alphabet).
Definition: MassDecomposer.h:42

OpenMS::ims::MassDecomposer::decomposition_value_type
DecompositionValueType decomposition_value_type
Definition: MassDecomposer.h:52

OpenMS::ims::MassDecomposer::value_type
ValueType value_type
Definition: MassDecomposer.h:47

OpenMS::ims::MassDecomposer::decompositions_type
std::vector< decomposition_type > decompositions_type
Definition: MassDecomposer.h:62

OpenMS::ims::MassDecomposer::decomposition_type
std::vector< decomposition_value_type > decomposition_type
Definition: MassDecomposer.h:57

OpenMS::ims::Weights
Represents a set of weights (double values and scaled with a certain precision their integer counterp...
Definition: Weights.h:42

OpenMS::ims::Weights::size
size_type size() const
Definition: Weights.h:98

OpenMS::ims::Weights::getWeight
weight_type getWeight(size_type i) const
Definition: Weights.h:109

OpenMS::Math::gcd
T gcd(T a, T b)
Returns the greatest common divisor (gcd) of two numbers by applying the Euclidean algorithm.
Definition: MathFunctions.h:244

OpenMS
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19