OpenMS
IntegerMassDecomposer.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Anton Pervukhin <Anton.Pervukhin@CeBiTec.Uni-Bielefeld.DE> $
33 // --------------------------------------------------------------------------
34 //
35 
36 #pragma once
37 
38 #include <vector>
39 #include <utility>
40 
43 
45 
46 namespace OpenMS
47 {
48 
49  namespace ims
50  {
51 
68  template <typename ValueType = long unsigned int,
69  typename DecompositionValueType = unsigned int>
71  public MassDecomposer<ValueType, DecompositionValueType>
72  {
73 public:
76 
79 
82 
85 
87  typedef typename decomposition_type::size_type size_type;
88 
94  explicit IntegerMassDecomposer(const Weights & alphabet);
95 
102  bool exist(value_type mass) override;
103 
111 
119 
129 
130 private:
131 
135  typedef std::vector<std::pair<size_type, decomposition_value_type> > witness_vector_type;
136 
140  typedef std::vector<value_type> residues_table_row_type;
141 
145  typedef std::vector<residues_table_row_type> residues_table_type;
146 
151 
158 
164 
171 
176 
182 
186  void fillExtendedResidueTable_(const Weights & _alphabet, residues_table_row_type & _lcms,
187  residues_table_row_type & _mass_in_lcms, const value_type _infty,
188  witness_vector_type & _witness_vector, residues_table_type & _ertable);
189 
198  void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,
199  decomposition_type decomposition, decompositions_type & decompositionsStore);
200  };
201 
202 
203  template <typename ValueType, typename DecompositionValueType>
205  const Weights & alphabet) :
206  alphabet_(alphabet)
207  {
208 
209  lcms_.resize(alphabet.size());
210  mass_in_lcms_.resize(alphabet.size());
211 
212  infty_ = alphabet.getWeight(0) * alphabet.getWeight(alphabet.size() - 1);
213 
215 
216  }
217 
218  template <typename ValueType, typename DecompositionValueType>
220  const Weights & _alphabet, residues_table_row_type & _lcms, residues_table_row_type & _mass_in_lcms,
221  const value_type _infty, witness_vector_type & _witnessVector, residues_table_type & _ertable)
222  {
223 
224  if (_alphabet.size() < 2)
225  {
226  return;
227  }
228  // caches the most often used mass - smallest mass
229  value_type smallestMass = _alphabet.getWeight(0), secondMass = _alphabet.getWeight(1);
230 
231  // initializes table: infinity everywhere except in the first field of every column
232  _ertable.reserve(_alphabet.size());
233  _ertable.assign(_alphabet.size(), std::vector<value_type>(smallestMass, _infty));
234 
235  for (size_type i = 0; i < _alphabet.size(); ++i)
236  {
237  _ertable[i][0] = 0;
238  }
239 
240  // initializes witness vector
241  _witnessVector.resize(smallestMass);
242 
243  // fills second column (the first one is already correct)
244  size_type it_inc = secondMass % smallestMass, witness = 1;
245  //typename residues_table_row_type::iterator it = _ertable[1].begin() + it_inc;
246  value_type mass = secondMass;
247  // initializes counter to create a witness vector
248  decomposition_value_type counter = 0;
249  size_type it_i = it_inc;
250  while (it_i != 0)
251  {
252  _ertable[1][it_i] = mass;
253  mass += secondMass;
254  ++counter;
255  _witnessVector[it_i] = std::make_pair(witness, counter);
256  //std::cerr << "BLA: " << counter << " " << &_ertable[1][0] << " " << it - _ertable[1].begin() << " " << _ertable[1].size() << std::endl;
257  it_i += it_inc;
258  if (it_i >= _ertable[1].size())
259  {
260  it_i -= _ertable[1].size();
261  }
262  }
263  // fills cache variables for i==1
264  value_type tmp_d = Math::gcd(smallestMass, secondMass);
265  _lcms[1] = secondMass * smallestMass / tmp_d;
266  _mass_in_lcms[1] = smallestMass / tmp_d;
267 
268  // fills remaining table. i is the column index.
269  for (size_type i = 2; i < _alphabet.size(); ++i)
270  {
271  // caches often used i-th alphabet mass
272  value_type currentMass = _alphabet.getWeight(i);
273 
274  value_type d = Math::gcd(smallestMass, currentMass);
275 
276  // fills cache for various variables.
277  // note that values for i==0 are never assigned since they're unused anyway.
278  _lcms[i] = currentMass * smallestMass / d;
279  _mass_in_lcms[i] = smallestMass / d;
280 
281  // Nijenhuis' improvement: Is currentMass composable with smaller alphabet?
282  if (currentMass >= _ertable[i - 1][currentMass % smallestMass])
283  {
284  _ertable[i] = _ertable[i - 1];
285  continue;
286  }
287 
288  const residues_table_row_type & prev_column = _ertable[i - 1];
289  residues_table_row_type & cur_column = _ertable[i];
290 
291  if (d == 1)
292  {
293  // This loop is for the case that the gcd is 1. The optimization used below
294  // is not applicable here.
295 
296  // p_inc is used to change residue (p) efficiently
297  size_type p_inc = currentMass % smallestMass;
298 
299  // n is the value that will be written into the table
300  value_type n = 0;
301  // current residue (in paper variable 'r' is used)
302  size_type p = 0;
303  // counter for creation of witness vector
304  decomposition_value_type local_counter = 0;
305 
306  for (size_type m = smallestMass; m > 0; --m)
307  {
308  n += currentMass;
309  p += p_inc;
310  ++local_counter;
311  if (p >= smallestMass)
312  {
313  p -= smallestMass;
314  }
315  if (n > prev_column[p])
316  {
317  n = prev_column[p];
318  local_counter = 0;
319  }
320  else
321  {
322  _witnessVector[p] = std::make_pair(i, local_counter);
323  }
324  cur_column[p] = n;
325  }
326  }
327  else
328  {
329  // If we're here, the gcd is not 1. We can use the following cache-optimized
330  // version of the algorithm. The trick is to put the iteration over all
331  // residue classes into the _inner_ loop.
332  //
333  // One could see it as going through one column in blocks which are gcd entries long.
334  size_type cur = currentMass % smallestMass;
335  size_type prev = 0;
336  size_type p_inc = cur - d;
337  // counters for creation of one witness vector
338  std::vector<decomposition_value_type> counters(smallestMass);
339 
340  // copies first block from prev_column to cur_column
341  for (size_type j = 1; j < d; ++j)
342  {
343  cur_column[j] = prev_column[j];
344  }
345 
346  // first loop: goes through all blocks, updating cur_column for the first time.
347  for (size_type m = smallestMass / d; m > 1; m--)
348  {
349  // r: current residue class
350  for (size_type r = 0; r < d; r++)
351  {
352 
353  ++counters[cur];
354  if (cur_column[prev] + currentMass > prev_column[cur])
355  {
356  cur_column[cur] = prev_column[cur];
357  counters[cur] = 0;
358  }
359  else
360  {
361  cur_column[cur] = cur_column[prev] + currentMass;
362  _witnessVector[cur] = std::make_pair(i, counters[cur]);
363  }
364 
365  prev++;
366  cur++;
367  }
368 
369  prev = cur - d;
370 
371  // this does: cur = (cur + currentMass) % smallestMass - d;
372  cur += p_inc;
373  if (cur >= smallestMass)
374  {
375  cur -= smallestMass;
376  }
377  }
378 
379  // second loop:
380  bool cont = true;
381  while (cont)
382  {
383  cont = false;
384  prev++;
385  cur++;
386  ++counters[cur];
387  for (size_type r = 1; r < d; ++r)
388  {
389  if (cur_column[prev] + currentMass < cur_column[cur])
390  {
391  cur_column[cur] = cur_column[prev] + currentMass;
392  cont = true;
393  _witnessVector[cur] = std::make_pair(i, counters[cur]);
394  }
395  else
396  {
397  counters[cur] = 0;
398  }
399  prev++;
400  cur++;
401  }
402 
403  prev = cur - d;
404 
405  cur += p_inc;
406  if (cur >= smallestMass)
407  {
408  cur -= smallestMass;
409  }
410  }
411  }
412 
413  }
414  }
415 
416  template <typename ValueType, typename DecompositionValueType>
418  exist(value_type mass)
419  {
420 
421  value_type residue = ertable_.back().at(mass % alphabet_.getWeight(0));
422  return residue != infty_ && mass >= residue;
423  }
424 
425  template <typename ValueType, typename DecompositionValueType>
428  {
429 
430  decomposition_type decomposition;
431  if (!this->exist(mass))
432  {
433  return decomposition;
434  }
435 
436  decomposition.reserve(alphabet_.size());
437  decomposition.resize(alphabet_.size());
438 
439  // initial mass residue: in FIND-ONE algorithm in paper corresponds variable "r"
440  value_type r = mass % alphabet_.getWeight(0);
441  value_type m = ertable_.back().at(r);
442 
443  decomposition.at(0) = static_cast<decomposition_value_type>
444  ((mass - m) / alphabet_.getWeight(0));
445 
446  while (m != 0)
447  {
448  size_type i = witness_vector_.at(r).first;
449  decomposition_value_type j = witness_vector_.at(r).second;
450  decomposition.at(i) += j;
451  if (m < j * alphabet_.getWeight(i))
452  {
453  break;
454  }
455  m -= j * alphabet_.getWeight(i);
456  r = m % alphabet_.getWeight(0);
457  }
458  return decomposition;
459  }
460 
461  template <typename ValueType, typename DecompositionValueType>
464  {
465  decompositions_type decompositionsStore;
466  decomposition_type decomposition(alphabet_.size());
467  collectDecompositionsRecursively_(mass, alphabet_.size() - 1, decomposition, decompositionsStore);
468  return decompositionsStore;
469  }
470 
471  template <typename ValueType, typename DecompositionValueType>
474  decomposition_type decomposition, decompositions_type & decompositionsStore)
475  {
476  if (alphabetMassIndex == 0)
477  {
478  value_type numberOfMasses0 = mass / alphabet_.getWeight(0);
479  if (numberOfMasses0 * alphabet_.getWeight(0) == mass)
480  {
481  decomposition[0] = static_cast<decomposition_value_type>(numberOfMasses0);
482  decompositionsStore.push_back(decomposition);
483  }
484  return;
485  }
486 
487  // tested: caching these values gives us 15% better performance, at least
488  // with aminoacid-mono.masses
489  const value_type lcm = lcms_[alphabetMassIndex];
490  const value_type mass_in_lcm = mass_in_lcms_[alphabetMassIndex]; // this is alphabet mass divided by gcd
491 
492  value_type mass_mod_alphabet0 = mass % alphabet_.getWeight(0); // trying to avoid modulo
493  const value_type mass_mod_decrement = alphabet_.getWeight(alphabetMassIndex) % alphabet_.getWeight(0);
494 
495  for (value_type i = 0; i < mass_in_lcm; ++i)
496  {
497  // here is the conversion from value_type to decomposition_value_type
498  decomposition[alphabetMassIndex] = static_cast<decomposition_value_type>(i);
499 
500  // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
501  // and that will end up in infinite loop
502  if (mass < i * alphabet_.getWeight(alphabetMassIndex))
503  {
504  break;
505  }
506 
507  // r: current residue class. will stay the same in the following loop
508  value_type r = ertable_[alphabetMassIndex - 1][mass_mod_alphabet0];
509 
510  // TODO: if infty was std::numeric_limits<...>... the following 'if' would not be necessary
511  if (r != infty_)
512  {
513  for (value_type m = mass - i * alphabet_.getWeight(alphabetMassIndex); m >= r; m -= lcm)
514  {
515  // the condition of the 'for' loop (m >= r) and decrementing the mass
516  // in steps of the lcm ensures that m is decomposable. Therefore
517  // the recursion will result in at least one witness.
518  collectDecompositionsRecursively_(m, alphabetMassIndex - 1, decomposition, decompositionsStore);
519  decomposition[alphabetMassIndex] += mass_in_lcm;
520  // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
521  // and that will end up in infinite loop
522  if (m < lcm)
523  {
524  break;
525  }
526  }
527  }
528  // subtle way of changing the modulo, instead of plain calculation it from (mass - i*currentAlphabetMass) % alphabetMass0 every time
529  if (mass_mod_alphabet0 < mass_mod_decrement)
530  {
531  mass_mod_alphabet0 += alphabet_.getWeight(0) - mass_mod_decrement;
532  }
533  else
534  {
535  mass_mod_alphabet0 -= mass_mod_decrement;
536  }
537  }
538 
539  }
540 
549  template <typename ValueType, typename DecompositionValueType>
551  DecompositionValueType>::getNumberOfDecompositions(value_type mass)
552  {
553  return static_cast<typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type>(getAllDecompositions(mass).size());
554  }
555 
556  } // namespace ims
557 } // namespace OpenMS
558 
Implements MassDecomposer interface using algorithm and data structures described in paper "Efficient...
Definition: IntegerMassDecomposer.h:72
residues_table_row_type lcms_
Definition: IntegerMassDecomposer.h:163
Weights alphabet_
Definition: IntegerMassDecomposer.h:150
value_type infty_
Definition: IntegerMassDecomposer.h:175
IntegerMassDecomposer(const Weights &alphabet)
Definition: IntegerMassDecomposer.h:204
std::vector< residues_table_row_type > residues_table_type
Definition: IntegerMassDecomposer.h:145
std::vector< std::pair< size_type, decomposition_value_type > > witness_vector_type
Definition: IntegerMassDecomposer.h:135
void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex, decomposition_type decomposition, decompositions_type &decompositionsStore)
Definition: IntegerMassDecomposer.h:473
decompositions_type getAllDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:463
std::vector< value_type > residues_table_row_type
Definition: IntegerMassDecomposer.h:140
residues_table_row_type mass_in_lcms_
Definition: IntegerMassDecomposer.h:170
decomposition_type getDecomposition(value_type mass) override
Definition: IntegerMassDecomposer.h:427
MassDecomposer< ValueType, DecompositionValueType >::decomposition_type decomposition_type
Type of decomposition.
Definition: IntegerMassDecomposer.h:81
decomposition_type::size_type size_type
Type of decomposition's size.
Definition: IntegerMassDecomposer.h:87
witness_vector_type witness_vector_
Definition: IntegerMassDecomposer.h:181
bool exist(value_type mass) override
Definition: IntegerMassDecomposer.h:418
MassDecomposer< ValueType, DecompositionValueType >::decomposition_value_type decomposition_value_type
Type of decomposition value.
Definition: IntegerMassDecomposer.h:78
residues_table_type ertable_
Definition: IntegerMassDecomposer.h:157
MassDecomposer< ValueType, DecompositionValueType >::decompositions_type decompositions_type
Type of container for many decompositions.
Definition: IntegerMassDecomposer.h:84
MassDecomposer< ValueType, DecompositionValueType >::value_type value_type
Type of value to be decomposed.
Definition: IntegerMassDecomposer.h:75
decomposition_value_type getNumberOfDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:551
void fillExtendedResidueTable_(const Weights &_alphabet, residues_table_row_type &_lcms, residues_table_row_type &_mass_in_lcms, const value_type _infty, witness_vector_type &_witness_vector, residues_table_type &_ertable)
Definition: IntegerMassDecomposer.h:219
An interface to handle decomposing of integer values/masses over a set of integer weights (alphabet).
Definition: MassDecomposer.h:68
DecompositionValueType decomposition_value_type
Definition: MassDecomposer.h:78
ValueType value_type
Definition: MassDecomposer.h:73
std::vector< decomposition_type > decompositions_type
Definition: MassDecomposer.h:88
std::vector< decomposition_value_type > decomposition_type
Definition: MassDecomposer.h:83
Represents a set of weights (double values and scaled with a certain precision their integer counterp...
Definition: Weights.h:68
size_type size() const
Definition: Weights.h:124
weight_type getWeight(size_type i) const
Definition: Weights.h:135
T gcd(T a, T b)
Returns the greatest common divisor (gcd) of two numbers by applying the Euclidean algorithm.
Definition: MathFunctions.h:241
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48