OpenMS  2.8.0
NASequence.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Samuel Wein $
32 // $Authors: Samuel Wein, Timo Sachsenberg, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <OpenMS/CONCEPT/Types.h>
42 
43 #include <vector>
44 #include <iosfwd>
45 
46 namespace OpenMS
47 {
62  class OPENMS_DLLAPI NASequence
63  {
64 
69  public:
71  { //< NB: Not all fragments types are valid for all residue types, this class should probably get split
72  Full = 0,
76  AIon,
77  BIon,
78  CIon,
79  XIon,
80  YIon,
81  ZIon,
89  WIon,
91  DIon,
92  SizeOfNASFragmentType
93  };
94 
96 
97  class Iterator;
98 
104  class OPENMS_DLLAPI ConstIterator
105  {
106  public:
108  typedef const value_type& const_reference;
110  typedef const value_type* const_pointer;
111  typedef std::vector<const value_type*>::difference_type difference_type;
112  typedef const value_type* pointer;
113  typedef std::random_access_iterator_tag iterator_category;
114 
119  ConstIterator() = default;
120 
122  ConstIterator(const std::vector<const Ribonucleotide*>* vec_ptr,
123  difference_type position)
124  {
125  vector_ = vec_ptr;
126  position_ = position;
127  }
128 
131  vector_(rhs.vector_),
132  position_(rhs.position_)
133  {
134  }
135 
138  vector_(rhs.vector_),
139  position_(rhs.position_)
140  {
141  }
142 
144  virtual ~ConstIterator() {}
145 
147 
150  {
151  if (this != &rhs)
152  {
153  position_ = rhs.position_;
154  vector_ = rhs.vector_;
155  }
156  return *this;
157  }
158 
164  {
165  return *(*vector_)[position_];
166  }
167 
170  {
171  return (*vector_)[position_];
172  }
173 
176  {
177  return ConstIterator(vector_, position_ + diff);
178  }
179 
181  {
182  return position_ - rhs.position_;
183  }
184 
187  {
188  return ConstIterator(vector_, position_ - diff);
189  }
190 
192  bool operator==(const ConstIterator& rhs) const
193  {
194  return (std::tie(vector_, position_) ==
195  std::tie(rhs.vector_, rhs.position_));
196  }
197 
199  bool operator!=(const ConstIterator& rhs) const
200  {
201  return !(operator==(rhs));
202  }
203 
206  {
207  ++position_;
208  return *this;
209  }
210 
213  {
214  --position_;
215  return *this;
216  }
217 
219 
220  protected:
221 
222  // pointer to the vector
223  const std::vector<const Ribonucleotide*>* vector_;
224 
225  // position in the vector
227  };
228 
229 
235  class OPENMS_DLLAPI Iterator
236  {
237  public:
238 
240 
242  typedef const value_type& const_reference;
244  typedef const value_type* const_pointer;
245  typedef const value_type* pointer;
246  typedef std::vector<const value_type*>::difference_type difference_type;
247 
251  Iterator() = default;
252 
254  Iterator(std::vector<const Ribonucleotide*>* vec_ptr,
255  difference_type position)
256  {
257  vector_ = vec_ptr;
258  position_ = position;
259  }
260 
262  Iterator(const Iterator& rhs) :
263  vector_(rhs.vector_),
264  position_(rhs.position_)
265  {
266  }
267 
269  virtual ~Iterator() {}
270 
272 
275  {
276  if (this != &rhs)
277  {
278  position_ = rhs.position_;
279  vector_ = rhs.vector_;
280  }
281  return *this;
282  }
283 
289  {
290  return *(*vector_)[position_];
291  }
292 
295  {
296  return (*vector_)[position_];
297  }
298 
301  {
302  return (*vector_)[position_];
303  }
304 
307  {
308  return Iterator(vector_, position_ + diff);
309  }
310 
312  {
313  return position_ - rhs.position_;
314  }
315 
318  {
319  return Iterator(vector_, position_ - diff);
320  }
321 
323  bool operator==(const Iterator& rhs) const
324  {
325  return (std::tie(vector_,position_) ==
326  std::tie(rhs.vector_, rhs.position_));
327  }
328 
330  bool operator!=(const Iterator& rhs) const
331  {
332  return !this->operator==(rhs);
333  }
334 
337  {
338  ++position_;
339  return *this;
340  }
341 
344  {
345  --position_;
346  return *this;
347  }
348 
350 
351  protected:
352 
353  std::vector<const Ribonucleotide*>* vector_;
354 
355  // position in the vector
357  };
358 
359  public:
360  /*
361  * Default constructors and assignment operators.
362  */
363  NASequence() = default;
364  NASequence(const NASequence&) = default;
365  NASequence(NASequence&&) = default;
366  NASequence& operator=(const NASequence&) & = default;
367  NASequence& operator=(NASequence&&) & = default;
368 
370  NASequence(std::vector<const Ribonucleotide*> s,
371  const RibonucleotideChainEnd* five_prime,
372  const RibonucleotideChainEnd* three_prime);
373 
374  virtual ~NASequence() = default;
375 
376  bool operator==(const NASequence& rhs) const;
377  bool operator!=(const NASequence& rhs) const;
378  bool operator<(const NASequence& rhs) const;
379 
381  void setSequence(const std::vector<const Ribonucleotide*>& seq);
382 
383  const std::vector<const Ribonucleotide*>& getSequence() const
384  {
385  return seq_;
386  }
387 
388  std::vector<const Ribonucleotide*>& getSequence()
389  {
390  return seq_;
391  }
392 
394  void set(size_t index, const Ribonucleotide* r);
395 
396  const Ribonucleotide* get(size_t index)
397  {
398  return seq_[index];
399  }
400 
402  inline const Ribonucleotide*& operator[](size_t index)
403  {
404  return seq_[index];
405  }
406 
407  inline const Ribonucleotide* const& operator[](size_t index) const
408  {
409  return seq_[index];
410  }
411 
412  bool empty() const;
413  size_t size() const;
414  void clear();
415 
417  bool hasFivePrimeMod() const;
420  bool hasThreePrimeMod() const;
423 
425  inline Iterator begin()
426  {
427  return Iterator(&seq_, 0);
428  }
429 
430  inline ConstIterator begin() const
431  {
432  return ConstIterator(&seq_, 0);
433  }
434 
435  inline Iterator end()
436  {
437  return Iterator(&seq_, (Int) seq_.size());
438  }
439 
440  inline ConstIterator end() const
441  {
442  return ConstIterator(&seq_, (Int) seq_.size());
443  }
444 
445  inline ConstIterator cbegin() const
446  {
447  return ConstIterator(&seq_, 0);
448  }
449 
450  inline ConstIterator cend() const
451  {
452  return ConstIterator(&seq_, (Int) seq_.size());
453  }
454 
456  double getMonoWeight(NASFragmentType type = Full, Int charge = 0) const;
457  double getAverageWeight(NASFragmentType type = Full, Int charge = 0) const;
458  EmpiricalFormula getFormula(NASFragmentType type = Full, Int charge = 0) const;
459 
461  NASequence getPrefix(Size length) const;
462 
464  NASequence getSuffix(Size length) const;
465 
467  NASequence getSubsequence(Size start = 0, Size length = Size(-1)) const;
468 
476  static NASequence fromString(const String& s);
477 
481  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os,
482  const NASequence& seq);
483 
491  static NASequence fromString(const char* s);
492 
493  std::string toString() const ;
494 
495  private:
496  //TODO: query RNA / DNA depending on type
497  static void parseString_(const String& s, NASequence& nas);
498 
508  //TODO: query RNA / DNA depending on type
510  const String& str, NASequence& nas);
511 
512  std::vector<const Ribonucleotide*> seq_;
513 
514  const RibonucleotideChainEnd* five_prime_ = nullptr;
515  const RibonucleotideChainEnd* three_prime_ = nullptr;
516  };
517 
518 }
Representation of an empirical formula.
Definition: EmpiricalFormula.h:82
ConstIterator of NASequence class.
Definition: NASequence.h:105
ConstIterator(const std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:122
const_pointer operator->() const
dereference operator
Definition: NASequence.h:169
const value_type * const_pointer
Definition: NASequence.h:110
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:186
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition: NASequence.h:149
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: NASequence.h:199
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:175
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition: NASequence.h:212
std::random_access_iterator_tag iterator_category
Definition: NASequence.h:113
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: NASequence.h:192
Ribonucleotide value_type
Definition: NASequence.h:107
ConstIterator & operator++()
increment operator
Definition: NASequence.h:205
ConstIterator(const NASequence::Iterator &rhs)
copy constructor from Iterator
Definition: NASequence.h:137
difference_type position_
Definition: NASequence.h:226
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:111
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition: NASequence.h:130
value_type & reference
Definition: NASequence.h:109
const std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:223
const value_type * pointer
Definition: NASequence.h:112
const_reference operator*() const
dereference operator
Definition: NASequence.h:163
virtual ~ConstIterator()
destructor
Definition: NASequence.h:144
difference_type operator-(ConstIterator rhs) const
Definition: NASequence.h:180
const value_type & const_reference
Definition: NASequence.h:108
Iterator of NASequence class.
Definition: NASequence.h:236
const_pointer operator->() const
dereference operator
Definition: NASequence.h:294
const value_type * const_pointer
Definition: NASequence.h:244
pointer operator->()
mutable dereference operator
Definition: NASequence.h:300
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:306
Iterator & operator--()
decrement operator
Definition: NASequence.h:343
virtual ~Iterator()
destructor
Definition: NASequence.h:269
Iterator(std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:254
difference_type operator-(Iterator rhs) const
Definition: NASequence.h:311
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: NASequence.h:274
Ribonucleotide value_type
Definition: NASequence.h:241
bool operator==(const Iterator &rhs) const
equality comparator
Definition: NASequence.h:323
difference_type position_
Definition: NASequence.h:356
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:246
value_type & reference
Definition: NASequence.h:243
Iterator(const Iterator &rhs)
copy constructor
Definition: NASequence.h:262
const value_type * pointer
Definition: NASequence.h:245
const_reference operator*() const
dereference operator
Definition: NASequence.h:288
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: NASequence.h:330
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:317
Iterator & operator++()
increment operator
Definition: NASequence.h:336
std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:353
const value_type & const_reference
Definition: NASequence.h:242
Representation of a nucleic acid sequence.
Definition: NASequence.h:63
ConstIterator cend() const
Definition: NASequence.h:450
virtual ~NASequence()=default
bool operator<(const NASequence &rhs) const
less operator
bool hasThreePrimeMod() const
NASequence getSuffix(Size length) const
Return sequence suffix of the given length (not start index!)
void setSequence(const std::vector< const Ribonucleotide * > &seq)
getter / setter for sequence
std::string toString() const
Iterator begin()
iterators
Definition: NASequence.h:425
size_t size() const
bool operator==(const NASequence &rhs) const
destructor
bool hasFivePrimeMod() const
5' and 3' modifications
const std::vector< const Ribonucleotide * > & getSequence() const
Definition: NASequence.h:383
double getMonoWeight(NASFragmentType type=Full, Int charge=0) const
utility functions
friend std::ostream & operator<<(std::ostream &os, const NASequence &seq)
void setThreePrimeMod(const RibonucleotideChainEnd *r)
NASequence getPrefix(Size length) const
Return sequence prefix of the given length (not end index!)
static void parseString_(const String &s, NASequence &nas)
NASequence()=default
bool empty() const
ConstIterator end() const
Definition: NASequence.h:440
const RibonucleotideChainEnd * getThreePrimeMod() const
bool operator!=(const NASequence &rhs) const
not quality
NASequence(std::vector< const Ribonucleotide * > s, const RibonucleotideChainEnd *five_prime, const RibonucleotideChainEnd *three_prime)
full constructor
void set(size_t index, const Ribonucleotide *r)
getter / setter for ribonucleotide elements (easily wrapped using pyOpenMS)
static NASequence fromString(const char *s)
create NASequence object by parsing a C string (character array)
EmpiricalFormula getFormula(NASFragmentType type=Full, Int charge=0) const
NASFragmentType
an enum of all possible fragment ion types
Definition: NASequence.h:71
@ AminusB
A ion with base loss, added for nucleic acid support.
Definition: NASequence.h:90
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition: NASequence.h:80
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition: NASequence.h:79
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond.
Definition: NASequence.h:81
@ WIon
W ion, added for nucleic acid support.
Definition: NASequence.h:89
@ BIonMinusH20
MS:1001222 b ion without water.
Definition: NASequence.h:83
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition: NASequence.h:85
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition: NASequence.h:76
@ Precursor
MS:1001523 Precursor ion.
Definition: NASequence.h:82
@ YIonMinusH20
MS:1001223 y ion without water.
Definition: NASequence.h:84
@ NonIdentified
MS:1001240 Non-identified ion.
Definition: NASequence.h:87
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition: NASequence.h:77
@ ThreePrime
only 3' terminus
Definition: NASequence.h:75
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition: NASequence.h:78
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition: NASequence.h:86
@ Internal
internal, without any termini
Definition: NASequence.h:73
@ Unannotated
no stored annotation
Definition: NASequence.h:88
@ FivePrime
only 5' terminus
Definition: NASequence.h:74
@ DIon
D ion, added for nucleic acid support.
Definition: NASequence.h:91
static NASequence fromString(const String &s)
create NASequence object by parsing an OpenMS string
const Ribonucleotide * get(size_t index)
Definition: NASequence.h:396
static String::ConstIterator parseMod_(const String::ConstIterator str_it, const String &str, NASequence &nas)
Parses modifications in square brackets.
NASequence & operator=(NASequence &&) &=default
Move assignment operator.
ConstIterator cbegin() const
Definition: NASequence.h:445
double getAverageWeight(NASFragmentType type=Full, Int charge=0) const
Iterator end()
Definition: NASequence.h:435
std::vector< const Ribonucleotide * > & getSequence()
Definition: NASequence.h:388
NASequence getSubsequence(Size start=0, Size length=Size(-1)) const
Return subsequence with given starting position and length.
const Ribonucleotide *& operator[](size_t index)
getter / setter for sequence elements (C++ container style)
Definition: NASequence.h:402
const RibonucleotideChainEnd * getFivePrimeMod() const
std::vector< const Ribonucleotide * > seq_
Definition: NASequence.h:512
void setFivePrimeMod(const RibonucleotideChainEnd *r)
ConstIterator begin() const
Definition: NASequence.h:430
const Ribonucleotide *const & operator[](size_t index) const
Definition: NASequence.h:407
NASequence & operator=(const NASequence &) &=default
Copy assignment operator.
NASequence(NASequence &&)=default
Move constructor.
NASequence(const NASequence &)=default
default constructor
Representation of a ribonucleotide (modified or unmodified)
Definition: Ribonucleotide.h:52
A more convenient string class.
Definition: String.h:60
const_iterator ConstIterator
Const Iterator.
Definition: String.h:72
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47