OpenMS
IntegerMassDecomposer.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Anton Pervukhin <Anton.Pervukhin@CeBiTec.Uni-Bielefeld.DE> $
7 // --------------------------------------------------------------------------
8 //
9 
10 #pragma once
11 
12 #include <vector>
13 #include <utility>
14 
17 
19 
20 namespace OpenMS
21 {
22 
23  namespace ims
24  {
25 
42  template <typename ValueType = long unsigned int,
43  typename DecompositionValueType = unsigned int>
45  public MassDecomposer<ValueType, DecompositionValueType>
46  {
47 public:
50 
53 
56 
59 
61  typedef typename decomposition_type::size_type size_type;
62 
68  explicit IntegerMassDecomposer(const Weights & alphabet);
69 
76  bool exist(value_type mass) override;
77 
85 
93 
103 
104 private:
105 
109  typedef std::vector<std::pair<size_type, decomposition_value_type> > witness_vector_type;
110 
114  typedef std::vector<value_type> residues_table_row_type;
115 
119  typedef std::vector<residues_table_row_type> residues_table_type;
120 
125 
132 
138 
145 
150 
156 
160  void fillExtendedResidueTable_(const Weights & _alphabet, residues_table_row_type & _lcms,
161  residues_table_row_type & _mass_in_lcms, const value_type _infty,
162  witness_vector_type & _witness_vector, residues_table_type & _ertable);
163 
172  void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex,
173  decomposition_type decomposition, decompositions_type & decompositionsStore);
174  };
175 
176 
177  template <typename ValueType, typename DecompositionValueType>
179  const Weights & alphabet) :
180  alphabet_(alphabet)
181  {
182 
183  lcms_.resize(alphabet.size());
184  mass_in_lcms_.resize(alphabet.size());
185 
186  infty_ = alphabet.getWeight(0) * alphabet.getWeight(alphabet.size() - 1);
187 
189 
190  }
191 
192  template <typename ValueType, typename DecompositionValueType>
194  const Weights & _alphabet, residues_table_row_type & _lcms, residues_table_row_type & _mass_in_lcms,
195  const value_type _infty, witness_vector_type & _witnessVector, residues_table_type & _ertable)
196  {
197 
198  if (_alphabet.size() < 2)
199  {
200  return;
201  }
202  // caches the most often used mass - smallest mass
203  value_type smallestMass = _alphabet.getWeight(0), secondMass = _alphabet.getWeight(1);
204 
205  // initializes table: infinity everywhere except in the first field of every column
206  _ertable.reserve(_alphabet.size());
207  _ertable.assign(_alphabet.size(), std::vector<value_type>(smallestMass, _infty));
208 
209  for (size_type i = 0; i < _alphabet.size(); ++i)
210  {
211  _ertable[i][0] = 0;
212  }
213 
214  // initializes witness vector
215  _witnessVector.resize(smallestMass);
216 
217  // fills second column (the first one is already correct)
218  size_type it_inc = secondMass % smallestMass, witness = 1;
219  //typename residues_table_row_type::iterator it = _ertable[1].begin() + it_inc;
220  value_type mass = secondMass;
221  // initializes counter to create a witness vector
222  decomposition_value_type counter = 0;
223  size_type it_i = it_inc;
224  while (it_i != 0)
225  {
226  _ertable[1][it_i] = mass;
227  mass += secondMass;
228  ++counter;
229  _witnessVector[it_i] = std::make_pair(witness, counter);
230  //std::cerr << "BLA: " << counter << " " << &_ertable[1][0] << " " << it - _ertable[1].begin() << " " << _ertable[1].size() << std::endl;
231  it_i += it_inc;
232  if (it_i >= _ertable[1].size())
233  {
234  it_i -= _ertable[1].size();
235  }
236  }
237  // fills cache variables for i==1
238  value_type tmp_d = Math::gcd(smallestMass, secondMass);
239  _lcms[1] = secondMass * smallestMass / tmp_d;
240  _mass_in_lcms[1] = smallestMass / tmp_d;
241 
242  // fills remaining table. i is the column index.
243  for (size_type i = 2; i < _alphabet.size(); ++i)
244  {
245  // caches often used i-th alphabet mass
246  value_type currentMass = _alphabet.getWeight(i);
247 
248  value_type d = Math::gcd(smallestMass, currentMass);
249 
250  // fills cache for various variables.
251  // note that values for i==0 are never assigned since they're unused anyway.
252  _lcms[i] = currentMass * smallestMass / d;
253  _mass_in_lcms[i] = smallestMass / d;
254 
255  // Nijenhuis' improvement: Is currentMass composable with smaller alphabet?
256  if (currentMass >= _ertable[i - 1][currentMass % smallestMass])
257  {
258  _ertable[i] = _ertable[i - 1];
259  continue;
260  }
261 
262  const residues_table_row_type & prev_column = _ertable[i - 1];
263  residues_table_row_type & cur_column = _ertable[i];
264 
265  if (d == 1)
266  {
267  // This loop is for the case that the gcd is 1. The optimization used below
268  // is not applicable here.
269 
270  // p_inc is used to change residue (p) efficiently
271  size_type p_inc = currentMass % smallestMass;
272 
273  // n is the value that will be written into the table
274  value_type n = 0;
275  // current residue (in paper variable 'r' is used)
276  size_type p = 0;
277  // counter for creation of witness vector
278  decomposition_value_type local_counter = 0;
279 
280  for (size_type m = smallestMass; m > 0; --m)
281  {
282  n += currentMass;
283  p += p_inc;
284  ++local_counter;
285  if (p >= smallestMass)
286  {
287  p -= smallestMass;
288  }
289  if (n > prev_column[p])
290  {
291  n = prev_column[p];
292  local_counter = 0;
293  }
294  else
295  {
296  _witnessVector[p] = std::make_pair(i, local_counter);
297  }
298  cur_column[p] = n;
299  }
300  }
301  else
302  {
303  // If we're here, the gcd is not 1. We can use the following cache-optimized
304  // version of the algorithm. The trick is to put the iteration over all
305  // residue classes into the _inner_ loop.
306  //
307  // One could see it as going through one column in blocks which are gcd entries long.
308  size_type cur = currentMass % smallestMass;
309  size_type prev = 0;
310  size_type p_inc = cur - d;
311  // counters for creation of one witness vector
312  std::vector<decomposition_value_type> counters(smallestMass);
313 
314  // copies first block from prev_column to cur_column
315  for (size_type j = 1; j < d; ++j)
316  {
317  cur_column[j] = prev_column[j];
318  }
319 
320  // first loop: goes through all blocks, updating cur_column for the first time.
321  for (size_type m = smallestMass / d; m > 1; m--)
322  {
323  // r: current residue class
324  for (size_type r = 0; r < d; r++)
325  {
326 
327  ++counters[cur];
328  if (cur_column[prev] + currentMass > prev_column[cur])
329  {
330  cur_column[cur] = prev_column[cur];
331  counters[cur] = 0;
332  }
333  else
334  {
335  cur_column[cur] = cur_column[prev] + currentMass;
336  _witnessVector[cur] = std::make_pair(i, counters[cur]);
337  }
338 
339  prev++;
340  cur++;
341  }
342 
343  prev = cur - d;
344 
345  // this does: cur = (cur + currentMass) % smallestMass - d;
346  cur += p_inc;
347  if (cur >= smallestMass)
348  {
349  cur -= smallestMass;
350  }
351  }
352 
353  // second loop:
354  bool cont = true;
355  while (cont)
356  {
357  cont = false;
358  prev++;
359  cur++;
360  ++counters[cur];
361  for (size_type r = 1; r < d; ++r)
362  {
363  if (cur_column[prev] + currentMass < cur_column[cur])
364  {
365  cur_column[cur] = cur_column[prev] + currentMass;
366  cont = true;
367  _witnessVector[cur] = std::make_pair(i, counters[cur]);
368  }
369  else
370  {
371  counters[cur] = 0;
372  }
373  prev++;
374  cur++;
375  }
376 
377  prev = cur - d;
378 
379  cur += p_inc;
380  if (cur >= smallestMass)
381  {
382  cur -= smallestMass;
383  }
384  }
385  }
386 
387  }
388  }
389 
390  template <typename ValueType, typename DecompositionValueType>
392  exist(value_type mass)
393  {
394 
395  value_type residue = ertable_.back().at(mass % alphabet_.getWeight(0));
396  return residue != infty_ && mass >= residue;
397  }
398 
399  template <typename ValueType, typename DecompositionValueType>
402  {
403 
404  decomposition_type decomposition;
405  if (!this->exist(mass))
406  {
407  return decomposition;
408  }
409 
410  decomposition.reserve(alphabet_.size());
411  decomposition.resize(alphabet_.size());
412 
413  // initial mass residue: in FIND-ONE algorithm in paper corresponds variable "r"
414  value_type r = mass % alphabet_.getWeight(0);
415  value_type m = ertable_.back().at(r);
416 
417  decomposition.at(0) = static_cast<decomposition_value_type>
418  ((mass - m) / alphabet_.getWeight(0));
419 
420  while (m != 0)
421  {
422  size_type i = witness_vector_.at(r).first;
423  decomposition_value_type j = witness_vector_.at(r).second;
424  decomposition.at(i) += j;
425  if (m < j * alphabet_.getWeight(i))
426  {
427  break;
428  }
429  m -= j * alphabet_.getWeight(i);
430  r = m % alphabet_.getWeight(0);
431  }
432  return decomposition;
433  }
434 
435  template <typename ValueType, typename DecompositionValueType>
438  {
439  decompositions_type decompositionsStore;
440  decomposition_type decomposition(alphabet_.size());
441  collectDecompositionsRecursively_(mass, alphabet_.size() - 1, decomposition, decompositionsStore);
442  return decompositionsStore;
443  }
444 
445  template <typename ValueType, typename DecompositionValueType>
448  decomposition_type decomposition, decompositions_type & decompositionsStore)
449  {
450  if (alphabetMassIndex == 0)
451  {
452  value_type numberOfMasses0 = mass / alphabet_.getWeight(0);
453  if (numberOfMasses0 * alphabet_.getWeight(0) == mass)
454  {
455  decomposition[0] = static_cast<decomposition_value_type>(numberOfMasses0);
456  decompositionsStore.push_back(decomposition);
457  }
458  return;
459  }
460 
461  // tested: caching these values gives us 15% better performance, at least
462  // with aminoacid-mono.masses
463  const value_type lcm = lcms_[alphabetMassIndex];
464  const value_type mass_in_lcm = mass_in_lcms_[alphabetMassIndex]; // this is alphabet mass divided by gcd
465 
466  value_type mass_mod_alphabet0 = mass % alphabet_.getWeight(0); // trying to avoid modulo
467  const value_type mass_mod_decrement = alphabet_.getWeight(alphabetMassIndex) % alphabet_.getWeight(0);
468 
469  for (value_type i = 0; i < mass_in_lcm; ++i)
470  {
471  // here is the conversion from value_type to decomposition_value_type
472  decomposition[alphabetMassIndex] = static_cast<decomposition_value_type>(i);
473 
474  // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
475  // and that will end up in infinite loop
476  if (mass < i * alphabet_.getWeight(alphabetMassIndex))
477  {
478  break;
479  }
480 
481  // r: current residue class. will stay the same in the following loop
482  value_type r = ertable_[alphabetMassIndex - 1][mass_mod_alphabet0];
483 
484  // TODO: if infty was std::numeric_limits<...>... the following 'if' would not be necessary
485  if (r != infty_)
486  {
487  for (value_type m = mass - i * alphabet_.getWeight(alphabetMassIndex); m >= r; m -= lcm)
488  {
489  // the condition of the 'for' loop (m >= r) and decrementing the mass
490  // in steps of the lcm ensures that m is decomposable. Therefore
491  // the recursion will result in at least one witness.
492  collectDecompositionsRecursively_(m, alphabetMassIndex - 1, decomposition, decompositionsStore);
493  decomposition[alphabetMassIndex] += mass_in_lcm;
494  // this check is needed because mass could have unsigned type and after reduction on i*alphabetMass will be still be positive but huge
495  // and that will end up in infinite loop
496  if (m < lcm)
497  {
498  break;
499  }
500  }
501  }
502  // subtle way of changing the modulo, instead of plain calculation it from (mass - i*currentAlphabetMass) % alphabetMass0 every time
503  if (mass_mod_alphabet0 < mass_mod_decrement)
504  {
505  mass_mod_alphabet0 += alphabet_.getWeight(0) - mass_mod_decrement;
506  }
507  else
508  {
509  mass_mod_alphabet0 -= mass_mod_decrement;
510  }
511  }
512 
513  }
514 
523  template <typename ValueType, typename DecompositionValueType>
525  DecompositionValueType>::getNumberOfDecompositions(value_type mass)
526  {
527  return static_cast<typename IntegerMassDecomposer<ValueType, DecompositionValueType>::decomposition_value_type>(getAllDecompositions(mass).size());
528  }
529 
530  } // namespace ims
531 } // namespace OpenMS
532 
Implements MassDecomposer interface using algorithm and data structures described in paper "Efficient...
Definition: IntegerMassDecomposer.h:46
residues_table_row_type lcms_
Definition: IntegerMassDecomposer.h:137
Weights alphabet_
Definition: IntegerMassDecomposer.h:124
value_type infty_
Definition: IntegerMassDecomposer.h:149
IntegerMassDecomposer(const Weights &alphabet)
Definition: IntegerMassDecomposer.h:178
std::vector< residues_table_row_type > residues_table_type
Definition: IntegerMassDecomposer.h:119
std::vector< std::pair< size_type, decomposition_value_type > > witness_vector_type
Definition: IntegerMassDecomposer.h:109
void collectDecompositionsRecursively_(value_type mass, size_type alphabetMassIndex, decomposition_type decomposition, decompositions_type &decompositionsStore)
Definition: IntegerMassDecomposer.h:447
decompositions_type getAllDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:437
std::vector< value_type > residues_table_row_type
Definition: IntegerMassDecomposer.h:114
residues_table_row_type mass_in_lcms_
Definition: IntegerMassDecomposer.h:144
decomposition_type getDecomposition(value_type mass) override
Definition: IntegerMassDecomposer.h:401
MassDecomposer< ValueType, DecompositionValueType >::decomposition_type decomposition_type
Type of decomposition.
Definition: IntegerMassDecomposer.h:55
decomposition_type::size_type size_type
Type of decomposition's size.
Definition: IntegerMassDecomposer.h:61
witness_vector_type witness_vector_
Definition: IntegerMassDecomposer.h:155
bool exist(value_type mass) override
Definition: IntegerMassDecomposer.h:392
MassDecomposer< ValueType, DecompositionValueType >::decomposition_value_type decomposition_value_type
Type of decomposition value.
Definition: IntegerMassDecomposer.h:52
residues_table_type ertable_
Definition: IntegerMassDecomposer.h:131
MassDecomposer< ValueType, DecompositionValueType >::decompositions_type decompositions_type
Type of container for many decompositions.
Definition: IntegerMassDecomposer.h:58
MassDecomposer< ValueType, DecompositionValueType >::value_type value_type
Type of value to be decomposed.
Definition: IntegerMassDecomposer.h:49
decomposition_value_type getNumberOfDecompositions(value_type mass) override
Definition: IntegerMassDecomposer.h:525
void fillExtendedResidueTable_(const Weights &_alphabet, residues_table_row_type &_lcms, residues_table_row_type &_mass_in_lcms, const value_type _infty, witness_vector_type &_witness_vector, residues_table_type &_ertable)
Definition: IntegerMassDecomposer.h:193
An interface to handle decomposing of integer values/masses over a set of integer weights (alphabet).
Definition: MassDecomposer.h:42
DecompositionValueType decomposition_value_type
Definition: MassDecomposer.h:52
ValueType value_type
Definition: MassDecomposer.h:47
std::vector< decomposition_type > decompositions_type
Definition: MassDecomposer.h:62
std::vector< decomposition_value_type > decomposition_type
Definition: MassDecomposer.h:57
Represents a set of weights (double values and scaled with a certain precision their integer counterp...
Definition: Weights.h:42
size_type size() const
Definition: Weights.h:98
weight_type getWeight(size_type i) const
Definition: Weights.h:109
T gcd(T a, T b)
Returns the greatest common divisor (gcd) of two numbers by applying the Euclidean algorithm.
Definition: MathFunctions.h:215
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22