OpenMS
NASequence.h
Go to the documentation of this file.
1 // Copyright (c) 2002-2023, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Samuel Wein $
6 // $Authors: Samuel Wein, Timo Sachsenberg, Hendrik Weisser $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 #include <OpenMS/CONCEPT/Types.h>
15 #include <iosfwd>
16 #include <vector>
17 
18 namespace OpenMS
19 {
33  class OPENMS_DLLAPI NASequence
34  {
39  public:
41  { //< NB: Not all fragments types are valid for all residue types, this class should probably get split
42  Full = 0,
46  AIon,
47  BIon,
48  CIon,
49  XIon,
50  YIon,
51  ZIon,
59  WIon,
61  DIon,
62  SizeOfNASFragmentType
63  };
64 
66 
67  class Iterator;
68 
74  class OPENMS_DLLAPI ConstIterator
75  {
76  public:
78  typedef const value_type& const_reference;
80  typedef const value_type* const_pointer;
81  typedef std::vector<const value_type*>::difference_type difference_type;
82  typedef const value_type* pointer;
83  typedef std::random_access_iterator_tag iterator_category;
84 
89  ConstIterator() = default;
90 
92  ConstIterator(const std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
93  {
94  vector_ = vec_ptr;
95  position_ = position;
96  }
97 
99  ConstIterator(const ConstIterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
100  {
101  }
102 
104  ConstIterator(const NASequence::Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
105  {
106  }
107 
109  virtual ~ConstIterator()
110  {
111  }
112 
114 
117  {
118  if (this != &rhs)
119  {
120  position_ = rhs.position_;
121  vector_ = rhs.vector_;
122  }
123  return *this;
124  }
125 
131  {
132  return *(*vector_)[position_];
133  }
134 
137  {
138  return (*vector_)[position_];
139  }
140 
143  {
144  return ConstIterator(vector_, position_ + diff);
145  }
146 
148  {
149  return position_ - rhs.position_;
150  }
151 
154  {
155  return ConstIterator(vector_, position_ - diff);
156  }
157 
159  bool operator==(const ConstIterator& rhs) const
160  {
161  return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
162  }
163 
165  bool operator!=(const ConstIterator& rhs) const
166  {
167  return !(operator==(rhs));
168  }
169 
172  {
173  ++position_;
174  return *this;
175  }
176 
179  {
180  --position_;
181  return *this;
182  }
183 
185 
186  protected:
187  // pointer to the vector
188  const std::vector<const Ribonucleotide*>* vector_;
189 
190  // position in the vector
192  };
193 
194 
200  class OPENMS_DLLAPI Iterator
201  {
202  public:
204 
206  typedef const value_type& const_reference;
208  typedef const value_type* const_pointer;
209  typedef const value_type* pointer;
210  typedef std::vector<const value_type*>::difference_type difference_type;
211 
215  Iterator() = default;
216 
218  Iterator(std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
219  {
220  vector_ = vec_ptr;
221  position_ = position;
222  }
223 
225  Iterator(const Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
226  {
227  }
228 
230  virtual ~Iterator()
231  {
232  }
233 
235 
238  {
239  if (this != &rhs)
240  {
241  position_ = rhs.position_;
242  vector_ = rhs.vector_;
243  }
244  return *this;
245  }
246 
252  {
253  return *(*vector_)[position_];
254  }
255 
258  {
259  return (*vector_)[position_];
260  }
261 
264  {
265  return (*vector_)[position_];
266  }
267 
270  {
271  return Iterator(vector_, position_ + diff);
272  }
273 
275  {
276  return position_ - rhs.position_;
277  }
278 
281  {
282  return Iterator(vector_, position_ - diff);
283  }
284 
286  bool operator==(const Iterator& rhs) const
287  {
288  return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
289  }
290 
292  bool operator!=(const Iterator& rhs) const
293  {
294  return !this->operator==(rhs);
295  }
296 
299  {
300  ++position_;
301  return *this;
302  }
303 
306  {
307  --position_;
308  return *this;
309  }
310 
312 
313  protected:
314  std::vector<const Ribonucleotide*>* vector_;
315 
316  // position in the vector
318  };
319 
320  public:
321  /*
322  * Default constructors and assignment operators.
323  */
324  NASequence() = default;
325  NASequence(const NASequence&) = default;
326  NASequence(NASequence&&) = default;
327  NASequence& operator=(const NASequence&) & = default;
328  NASequence& operator=(NASequence&&) & = default;
329 
331  NASequence(std::vector<const Ribonucleotide*> s, const RibonucleotideChainEnd* five_prime, const RibonucleotideChainEnd* three_prime);
332 
333  virtual ~NASequence() = default;
334 
335  bool operator==(const NASequence& rhs) const;
336  bool operator!=(const NASequence& rhs) const;
337  bool operator<(const NASequence& rhs) const;
338 
340  void setSequence(const std::vector<const Ribonucleotide*>& seq);
341 
342  const std::vector<const Ribonucleotide*>& getSequence() const
343  {
344  return seq_;
345  }
346 
347  std::vector<const Ribonucleotide*>& getSequence()
348  {
349  return seq_;
350  }
351 
353  void set(size_t index, const Ribonucleotide* r);
354 
355  const Ribonucleotide* get(size_t index)
356  {
357  return seq_[index];
358  }
359 
361  inline const Ribonucleotide*& operator[](size_t index)
362  {
363  return seq_[index];
364  }
365 
366  inline const Ribonucleotide* const& operator[](size_t index) const
367  {
368  return seq_[index];
369  }
370 
371  bool empty() const;
372  size_t size() const;
373  void clear();
374 
376  bool hasFivePrimeMod() const;
379  bool hasThreePrimeMod() const;
382 
384  inline Iterator begin()
385  {
386  return Iterator(&seq_, 0);
387  }
388 
389  inline ConstIterator begin() const
390  {
391  return ConstIterator(&seq_, 0);
392  }
393 
394  inline Iterator end()
395  {
396  return Iterator(&seq_, (Int)seq_.size());
397  }
398 
399  inline ConstIterator end() const
400  {
401  return ConstIterator(&seq_, (Int)seq_.size());
402  }
403 
404  inline ConstIterator cbegin() const
405  {
406  return ConstIterator(&seq_, 0);
407  }
408 
409  inline ConstIterator cend() const
410  {
411  return ConstIterator(&seq_, (Int)seq_.size());
412  }
413 
415 
424  double getMonoWeight(NASFragmentType type = Full, Int charge = 0) const;
425 
434  double getAverageWeight(NASFragmentType type = Full, Int charge = 0) const;
435 
444  EmpiricalFormula getFormula(NASFragmentType type = Full, Int charge = 0) const;
445 
453  NASequence getPrefix(Size length) const;
454 
462  NASequence getSuffix(Size length) const;
463 
472  NASequence getSubsequence(Size start = 0, Size length = Size(-1)) const;
473 
481  static NASequence fromString(const String& s);
482 
486  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const NASequence& seq);
487 
495  static NASequence fromString(const char* s);
496 
497  std::string toString() const;
498 
499  private:
500  // TODO: query RNA / DNA depending on type
501  static void parseString_(const String& s, NASequence& nas);
502 
512  // TODO: query RNA / DNA depending on type
514 
515  std::vector<const Ribonucleotide*> seq_;
516 
517  const RibonucleotideChainEnd* five_prime_ = nullptr;
518  const RibonucleotideChainEnd* three_prime_ = nullptr;
519  };
520 
521 } // namespace OpenMS
Representation of an empirical formula.
Definition: EmpiricalFormula.h:59
ConstIterator of NASequence class.
Definition: NASequence.h:75
ConstIterator(const std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:92
const_pointer operator->() const
dereference operator
Definition: NASequence.h:136
const value_type * const_pointer
Definition: NASequence.h:80
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:153
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition: NASequence.h:116
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: NASequence.h:165
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:142
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition: NASequence.h:178
std::random_access_iterator_tag iterator_category
Definition: NASequence.h:83
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: NASequence.h:159
Ribonucleotide value_type
Definition: NASequence.h:77
ConstIterator & operator++()
increment operator
Definition: NASequence.h:171
ConstIterator(const NASequence::Iterator &rhs)
copy constructor from Iterator
Definition: NASequence.h:104
difference_type position_
Definition: NASequence.h:191
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:81
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition: NASequence.h:99
value_type & reference
Definition: NASequence.h:79
const std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:188
const value_type * pointer
Definition: NASequence.h:82
const_reference operator*() const
dereference operator
Definition: NASequence.h:130
virtual ~ConstIterator()
destructor
Definition: NASequence.h:109
difference_type operator-(ConstIterator rhs) const
Definition: NASequence.h:147
const value_type & const_reference
Definition: NASequence.h:78
Iterator of NASequence class.
Definition: NASequence.h:201
const_pointer operator->() const
dereference operator
Definition: NASequence.h:257
const value_type * const_pointer
Definition: NASequence.h:208
pointer operator->()
mutable dereference operator
Definition: NASequence.h:263
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:269
Iterator & operator--()
decrement operator
Definition: NASequence.h:305
virtual ~Iterator()
destructor
Definition: NASequence.h:230
Iterator(std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:218
difference_type operator-(Iterator rhs) const
Definition: NASequence.h:274
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: NASequence.h:237
Ribonucleotide value_type
Definition: NASequence.h:205
bool operator==(const Iterator &rhs) const
equality comparator
Definition: NASequence.h:286
difference_type position_
Definition: NASequence.h:317
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:210
value_type & reference
Definition: NASequence.h:207
Iterator(const Iterator &rhs)
copy constructor
Definition: NASequence.h:225
const value_type * pointer
Definition: NASequence.h:209
const_reference operator*() const
dereference operator
Definition: NASequence.h:251
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: NASequence.h:292
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:280
Iterator & operator++()
increment operator
Definition: NASequence.h:298
std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:314
const value_type & const_reference
Definition: NASequence.h:206
Representation of a nucleic acid sequence.
Definition: NASequence.h:34
ConstIterator cend() const
Definition: NASequence.h:409
virtual ~NASequence()=default
bool operator<(const NASequence &rhs) const
less operator
bool hasThreePrimeMod() const
NASequence getSuffix(Size length) const
Return sequence suffix of the given length (not start index!)
void setSequence(const std::vector< const Ribonucleotide * > &seq)
getter / setter for sequence
std::string toString() const
Iterator begin()
iterators
Definition: NASequence.h:384
size_t size() const
bool operator==(const NASequence &rhs) const
destructor
bool hasFivePrimeMod() const
5' and 3' modifications
const std::vector< const Ribonucleotide * > & getSequence() const
Definition: NASequence.h:342
double getMonoWeight(NASFragmentType type=Full, Int charge=0) const
utility functions
friend std::ostream & operator<<(std::ostream &os, const NASequence &seq)
void setThreePrimeMod(const RibonucleotideChainEnd *r)
NASequence getPrefix(Size length) const
Return sequence prefix of the given length (not end index!)
static void parseString_(const String &s, NASequence &nas)
NASequence()=default
bool empty() const
ConstIterator end() const
Definition: NASequence.h:399
const RibonucleotideChainEnd * getThreePrimeMod() const
bool operator!=(const NASequence &rhs) const
not quality
NASequence(std::vector< const Ribonucleotide * > s, const RibonucleotideChainEnd *five_prime, const RibonucleotideChainEnd *three_prime)
full constructor
void set(size_t index, const Ribonucleotide *r)
getter / setter for ribonucleotide elements (easily wrapped using pyOpenMS)
static NASequence fromString(const char *s)
create NASequence object by parsing a C string (character array)
EmpiricalFormula getFormula(NASFragmentType type=Full, Int charge=0) const
Get the formula for a NASequence.
NASFragmentType
an enum of all possible fragment ion types
Definition: NASequence.h:41
@ AminusB
A ion with base loss, added for nucleic acid support.
Definition: NASequence.h:60
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition: NASequence.h:50
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition: NASequence.h:49
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond.
Definition: NASequence.h:51
@ WIon
W ion, added for nucleic acid support.
Definition: NASequence.h:59
@ BIonMinusH20
MS:1001222 b ion without water.
Definition: NASequence.h:53
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition: NASequence.h:55
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition: NASequence.h:46
@ Precursor
MS:1001523 Precursor ion.
Definition: NASequence.h:52
@ YIonMinusH20
MS:1001223 y ion without water.
Definition: NASequence.h:54
@ NonIdentified
MS:1001240 Non-identified ion.
Definition: NASequence.h:57
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition: NASequence.h:47
@ ThreePrime
only 3' terminus
Definition: NASequence.h:45
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition: NASequence.h:48
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition: NASequence.h:56
@ Internal
internal, without any termini
Definition: NASequence.h:43
@ Unannotated
no stored annotation
Definition: NASequence.h:58
@ FivePrime
only 5' terminus
Definition: NASequence.h:44
@ DIon
D ion, added for nucleic acid support.
Definition: NASequence.h:61
static NASequence fromString(const String &s)
create NASequence object by parsing an OpenMS string
const Ribonucleotide * get(size_t index)
Definition: NASequence.h:355
static String::ConstIterator parseMod_(const String::ConstIterator str_it, const String &str, NASequence &nas)
Parses modifications in square brackets.
NASequence & operator=(NASequence &&) &=default
Move assignment operator.
ConstIterator cbegin() const
Definition: NASequence.h:404
double getAverageWeight(NASFragmentType type=Full, Int charge=0) const
Get the Average Weight of a NASequence. NB returns the uncharged mass + or - proton masses to match t...
Iterator end()
Definition: NASequence.h:394
std::vector< const Ribonucleotide * > & getSequence()
Definition: NASequence.h:347
NASequence getSubsequence(Size start=0, Size length=Size(-1)) const
Return subsequence with given starting position and length.
const Ribonucleotide *& operator[](size_t index)
getter / setter for sequence elements (C++ container style)
Definition: NASequence.h:361
const RibonucleotideChainEnd * getFivePrimeMod() const
std::vector< const Ribonucleotide * > seq_
Definition: NASequence.h:515
void setFivePrimeMod(const RibonucleotideChainEnd *r)
ConstIterator begin() const
Definition: NASequence.h:389
const Ribonucleotide *const & operator[](size_t index) const
Definition: NASequence.h:366
NASequence & operator=(const NASequence &) &=default
Copy assignment operator.
NASequence(NASequence &&)=default
Move constructor.
NASequence(const NASequence &)=default
default constructor
Representation of a ribonucleotide (modified or unmodified)
Definition: Ribonucleotide.h:26
A more convenient string class.
Definition: String.h:34
const_iterator ConstIterator
Const Iterator.
Definition: String.h:46
int Int
Signed integer type.
Definition: Types.h:76
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:101
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:22