OpenMS
AASequence.h
Go to the documentation of this file.
1 // Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
2 // SPDX-License-Identifier: BSD-3-Clause
3 //
4 // --------------------------------------------------------------------------
5 // $Maintainer: Timo Sachsenberg $
6 // $Authors: Andreas Bertsch, Timo Sachsenberg $
7 // --------------------------------------------------------------------------
8 
9 #pragma once
10 
13 #include <OpenMS/CONCEPT/Types.h>
16 
17 #include <vector>
18 #include <iosfwd>
19 #include <map>
20 
21 namespace OpenMS
22 {
23 
85  class OPENMS_DLLAPI AASequence final
86  {
87 public:
88 
89  class Iterator;
90 
95  class OPENMS_DLLAPI ConstIterator final
96  {
97  public:
98  // TODO Iterator constructor for ConstIterator
99 
100  typedef const Residue& const_reference;
101  typedef Residue& reference;
102  typedef const Residue* const_pointer;
103  typedef std::vector<const Residue*>::difference_type difference_type;
105  typedef const Residue* pointer;
106  typedef std::random_access_iterator_tag iterator_category;
107 
112  ConstIterator() = default;
113 
115  ConstIterator(const std::vector<const Residue*>* vec_ptr, difference_type position)
116  : vector_{vec_ptr},
117  position_{position}
118  {
119  }
120 
122  ConstIterator(const ConstIterator& rhs) = default;
123 
126  vector_(rhs.vector_),
127  position_(rhs.position_)
128  {
129  }
130 
132  ~ConstIterator() = default;
133 
135 
137  ConstIterator& operator=(const ConstIterator& rhs) = default;
138 
144  {
145  return *(*vector_)[position_];
146  }
147 
150  {
151  return (*vector_)[position_];
152  }
153 
156  {
157  return ConstIterator(vector_, position_ + diff);
158  }
159 
161  {
162  return position_ - rhs.position_;
163  }
164 
167  {
168  return ConstIterator(vector_, position_ - diff);
169  }
170 
172  bool operator==(const ConstIterator& rhs) const
173  {
174  return vector_ == rhs.vector_ && position_ == rhs.position_;
175  }
176 
178  bool operator!=(const ConstIterator& rhs) const
179  {
180  return vector_ != rhs.vector_ || position_ != rhs.position_;
181  }
182 
185  {
186  ++position_;
187  return *this;
188  }
189 
192  {
193  --position_;
194  return *this;
195  }
196 
198 
199 protected:
200 
201  // pointer to the AASequence vector
202  const std::vector<const Residue*>* vector_ {};
203 
204  // position in the AASequence vector
205  difference_type position_ {};
206  };
207 
208 
213  class OPENMS_DLLAPI Iterator final
214  {
215 public:
216 
218 
219  typedef const Residue& const_reference;
220  typedef Residue& reference;
221  typedef const Residue* const_pointer;
222  typedef const Residue* pointer;
223  typedef std::vector<const Residue*>::difference_type difference_type;
224 
229  Iterator() = default;
230 
232  Iterator(std::vector<const Residue*>* vec_ptr, difference_type position)
233  : vector_ {vec_ptr},
234  position_{position}
235  {
236  }
237 
239  Iterator(const Iterator& rhs) = default;
240 
242  ~Iterator() = default;
243 
245 
248  {
249  if (this != &rhs)
250  {
251  position_ = rhs.position_;
252  vector_ = rhs.vector_;
253  }
254  return *this;
255  }
256 
262  {
263  return *(*vector_)[position_];
264  }
265 
268  {
269  return (*vector_)[position_];
270  }
271 
274  {
275  return (*vector_)[position_];
276  }
277 
280  {
281  return Iterator(vector_, position_ + diff);
282  }
283 
285  {
286  return position_ - rhs.position_;
287  }
288 
291  {
292  return Iterator(vector_, position_ - diff);
293  }
294 
296  bool operator==(const Iterator& rhs) const
297  {
298  return vector_ == rhs.vector_ && position_ == rhs.position_;
299  }
300 
302  bool operator!=(const Iterator& rhs) const
303  {
304  return vector_ != rhs.vector_ || position_ != rhs.position_;
305  }
306 
309  {
310  ++position_;
311  return *this;
312  }
313 
316  {
317  --position_;
318  return *this;
319  }
320 
322 
323 protected:
324 
325  // pointer to the AASequence vector
326  std::vector<const Residue*>* vector_ {};
327 
328  // position in the AASequence vector
329  difference_type position_ {};
330  };
331 
335 
337  AASequence() = default;
338 
340  AASequence(const AASequence&) = default;
341 
343  AASequence(AASequence&&) = default;
344 
346  ~AASequence() = default;
348 
350  AASequence& operator=(const AASequence&) = default;
351 
354 
356  bool empty() const;
357 
361 
373  String toString() const;
374 
377 
387 
406  String toBracketString(bool integer_mass = true,
407  bool mass_delta = false,
408  const std::vector<String> & fixed_modifications = std::vector<String>()) const;
409 
412  void setModification(Size index, const String& modification);
413 
415  void setModification(Size index, const Residue* modification);
416 
418  void setModification(Size index, const ResidueModification* modification);
419 
422  void setModification(Size index, const ResidueModification& modification);
423 
425  void setModificationByDiffMonoMass(Size index, double diffMonoMass);
426 
429  void setNTerminalModification(const String& modification);
430 
433 
436 
438  void setNTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term);
439 
442 
445 
448  void setCTerminalModification(const String& modification);
449 
452 
455 
457  void setCTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term);
458 
461 
464 
466  const Residue& getResidue(Size index) const;
467 
470 
472  double getAverageWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
473 
478  double getMonoWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
479 
485  double getMZ(Int charge, Residue::ResidueType type = Residue::Full) const;
486 
488  const Residue& operator[](Size index) const;
489 
491  AASequence operator+(const AASequence& peptide) const;
492 
495 
497  AASequence operator+(const Residue* residue) const;
498 
501 
503  Size size() const;
504 
506  AASequence getPrefix(Size index) const;
507 
509  AASequence getSuffix(Size index) const;
510 
513 
515  void getAAFrequencies(std::map<String, Size>& frequency_table) const;
516 
518 
523  bool has(const Residue& residue) const;
524 
527  bool hasSubsequence(const AASequence& peptide) const;
528 
531  bool hasPrefix(const AASequence& peptide) const;
532 
535  bool hasSuffix(const AASequence& peptide) const;
536 
539 
542 
544  bool isModified() const;
545 
547  bool operator==(const AASequence& rhs) const;
548 
550  bool operator<(const AASequence& rhs) const;
551 
553  bool operator!=(const AASequence& rhs) const;
555 
559  inline Iterator begin() { return Iterator(&peptide_, 0); }
560 
561  inline ConstIterator begin() const { return ConstIterator(&peptide_, 0); }
562 
563  inline Iterator end() { return Iterator(&peptide_, (Int) peptide_.size()); }
564 
565  inline ConstIterator end() const { return ConstIterator(&peptide_, (Int) peptide_.size()); }
567 
572  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
573 
575  friend OPENMS_DLLAPI std::istream& operator>>(std::istream& is, const AASequence& peptide);
577 
586  static AASequence fromString(const String& s,
587  bool permissive = true);
588 
597  static AASequence fromString(const char* s,
598  bool permissive = true);
599 
602  explicit AASequence(const String& s);
603 
606  explicit AASequence(const char* s);
607 
611  explicit AASequence(const String& s, bool permissive);
612 
616  explicit AASequence(const char* s, bool permissive);
617 
618  protected:
619 
620  std::vector<const Residue*> peptide_;
621 
622  const ResidueModification* n_term_mod_ = nullptr;
623 
624  const ResidueModification* c_term_mod_ = nullptr;
625 
640  const String& str,
641  AASequence& aas,
642  const ResidueModification::TermSpecificity& specificity);
643 
658  const String& str,
659  AASequence& aas,
660  const ResidueModification::TermSpecificity& specificity);
661 
662  static void parseString_(const String& peptide, AASequence& aas,
663  bool permissive = true);
664  };
665 
666  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
667 
668  OPENMS_DLLAPI std::istream& operator>>(std::istream& os, const AASequence& peptide);
669 
670 } // namespace OpenMS
671 
672 
ConstIterator for AASequence.
Definition: AASequence.h:96
const_pointer operator->() const
dereference operator
Definition: AASequence.h:149
ConstIterator(const ConstIterator &rhs)=default
copy constructor
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: AASequence.h:166
Residue value_type
Definition: AASequence.h:104
ConstIterator(const std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: AASequence.h:115
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: AASequence.h:178
ConstIterator(const AASequence::Iterator &rhs)
copy constructor from Iterator
Definition: AASequence.h:125
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: AASequence.h:155
~ConstIterator()=default
destructor
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition: AASequence.h:191
std::random_access_iterator_tag iterator_category
Definition: AASequence.h:106
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: AASequence.h:172
const std::vector< const Residue * > * vector_
Definition: AASequence.h:202
ConstIterator & operator++()
increment operator
Definition: AASequence.h:184
const Residue * const_pointer
Definition: AASequence.h:102
std::vector< const Residue * >::difference_type difference_type
Definition: AASequence.h:103
difference_type position_
Definition: AASequence.h:205
const Residue * pointer
Definition: AASequence.h:105
Residue & reference
Definition: AASequence.h:101
const Residue & const_reference
Definition: AASequence.h:100
const_reference operator*() const
dereference operator
Definition: AASequence.h:143
ConstIterator & operator=(const ConstIterator &rhs)=default
assignment operator
difference_type operator-(ConstIterator rhs) const
Definition: AASequence.h:160
Iterator class for AASequence.
Definition: AASequence.h:214
const_pointer operator->() const
dereference operator
Definition: AASequence.h:267
Iterator()=default
default constructor
pointer operator->()
mutable dereference operator
Definition: AASequence.h:273
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: AASequence.h:279
Iterator & operator--()
decrement operator
Definition: AASequence.h:315
Iterator(const Iterator &rhs)=default
copy constructor
std::vector< const Residue * > * vector_
Definition: AASequence.h:326
difference_type operator-(Iterator rhs) const
Definition: AASequence.h:284
~Iterator()=default
destructor
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: AASequence.h:247
Iterator(std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: AASequence.h:232
bool operator==(const Iterator &rhs) const
equality comparator
Definition: AASequence.h:296
const Residue * const_pointer
Definition: AASequence.h:221
std::vector< const Residue * >::difference_type difference_type
Definition: AASequence.h:223
difference_type position_
Definition: AASequence.h:329
const Residue * pointer
Definition: AASequence.h:222
Residue & reference
Definition: AASequence.h:220
const Residue & const_reference
Definition: AASequence.h:219
const_reference operator*() const
dereference operator
Definition: AASequence.h:261
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: AASequence.h:302
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: AASequence.h:290
Iterator & operator++()
increment operator
Definition: AASequence.h:308
Representation of a peptide/protein sequence.
Definition: AASequence.h:86
void setModification(Size index, const Residue *modification)
sets the modification of AA at index by providing an already, potentially modified residue
const ResidueModification * getNTerminalModification() const
returns a pointer to the N-terminal modification, or zero if none is set
void setModificationByDiffMonoMass(Size index, double diffMonoMass)
modifies the residue at index in the sequence and potentially in the ResidueDB
bool hasNTerminalModification() const
predicate which is true if the peptide is N-term modified
double getMZ(Int charge, Residue::ResidueType type=Residue::Full) const
Iterator begin()
Definition: AASequence.h:559
String toString() const
returns the peptide as string with modifications embedded in brackets
EmpiricalFormula getFormula(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the formula of the peptide
std::vector< const Residue * > peptide_
Definition: AASequence.h:620
void setNTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term)
sets the N-terminal modification by the monoisotopic mass difference it introduces (creates a "user-d...
void setModification(Size index, const String &modification)
AASequence(AASequence &&)=default
Move constructor.
double getAverageWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the average weight of the peptide
const Residue & operator[](Size index) const
returns a pointer to the residue at given position
AASequence(const AASequence &)=default
Copy constructor.
void setModification(Size index, const ResidueModification *modification)
sets the modification of AA at index by providing a pointer to a ResidueModification object found in ...
const String & getCTerminalModificationName() const
returns the name (ID) of the C-terminal modification, or an empty string if none is set
AASequence(const char *s)
constructor from C string
AASequence getPrefix(Size index) const
returns a peptide sequence of the first index residues
bool empty() const
check if sequence is empty
AASequence()=default
Default constructor.
static String::ConstIterator parseModSquareBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in square brackets (a mass)
void setModification(Size index, const ResidueModification &modification)
ConstIterator end() const
Definition: AASequence.h:565
bool operator==(const AASequence &rhs) const
equality operator. Two sequences are equal iff all amino acids including PTMs are equal
void setCTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term)
sets the C-terminal modification by the monoisotopic mass difference it introduces (creates a "user-d...
bool isModified() const
returns true if any of the residues or termini are modified
double getMonoWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
AASequence & operator+=(const Residue *)
adds the residues of a peptide
void setCTerminalModification(const ResidueModification &mod)
sets the C-terminal modification (copies and adds to database if not present)
AASequence & operator=(AASequence &&)=default
Move assignment operator.
static AASequence fromString(const char *s, bool permissive=true)
create AASequence object by parsing a C string (character array)
static String::ConstIterator parseModRoundBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in round brackets (an identifier)
String toBracketString(bool integer_mass=true, bool mass_delta=false, const std::vector< String > &fixed_modifications=std::vector< String >()) const
create a TPP compatible string of the modified sequence using bracket notation.
static void parseString_(const String &peptide, AASequence &aas, bool permissive=true)
const Residue & getResidue(Size index) const
returns a pointer to the residue at position index
void setCTerminalModification(const ResidueModification *modification)
sets the C-terminal modification (must be present in the database)
friend std::ostream & operator<<(std::ostream &os, const AASequence &peptide)
writes a peptide to an output stream
~AASequence()=default
Destructor.
AASequence operator+(const AASequence &peptide) const
adds the residues of the peptide
void getAAFrequencies(std::map< String, Size > &frequency_table) const
compute frequency table of amino acids
bool hasCTerminalModification() const
predicate which is true if the peptide is C-term modified
bool has(const Residue &residue) const
returns true if the peptide contains the given residue
AASequence getSubsequence(Size index, UInt number) const
returns a peptide sequence of number residues, beginning at position index
const ResidueModification * getCTerminalModification() const
returns a pointer to the C-terminal modification, or zero if none is set
bool hasSubsequence(const AASequence &peptide) const
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
bool operator!=(const AASequence &rhs) const
inequality operator. Complement of equality operator.
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
void setNTerminalModification(const String &modification)
AASequence operator+(const Residue *residue) const
adds the residues of the peptide
AASequence getSuffix(Size index) const
returns a peptide sequence of the last index residues
AASequence & operator=(const AASequence &)=default
Assignment operator.
Iterator end()
Definition: AASequence.h:563
void setNTerminalModification(const ResidueModification *modification)
sets the N-terminal modification
AASequence(const char *s, bool permissive)
constructor from C string
bool hasPrefix(const AASequence &peptide) const
bool operator<(const AASequence &rhs) const
lesser than operator which compares the C-term mods, sequence including PTMS and N-term mods; can be ...
Size size() const
returns the number of residues
String toUniModString() const
returns the peptide as string with UniMod-style modifications embedded in brackets
ConstIterator begin() const
Definition: AASequence.h:561
AASequence & operator+=(const AASequence &)
adds the residues of a peptide
const String & getNTerminalModificationName() const
returns the name (ID) of the N-terminal modification, or an empty string if none is set
void setNTerminalModification(const ResidueModification &mod)
sets the N-terminal modification (copies and adds to database if not present)
void setCTerminalModification(const String &modification)
bool hasSuffix(const AASequence &peptide) const
AASequence(const String &s, bool permissive)
constructor from String
AASequence(const String &s)
constructor from String
friend std::istream & operator>>(std::istream &is, const AASequence &peptide)
reads a peptide from an input stream
Representation of an empirical formula.
Definition: EmpiricalFormula.h:59
Representation of a modification on an amino acid residue.
Definition: ResidueModification.h:53
TermSpecificity
Position where the modification is allowed to occur.
Definition: ResidueModification.h:72
Representation of an amino acid residue.
Definition: Residue.h:40
ResidueType
Definition: Residue.h:152
@ Full
with N-terminus and C-terminus
Definition: Residue.h:153
A more convenient string class.
Definition: String.h:34
const_iterator ConstIterator
Const Iterator.
Definition: String.h:46
int Int
Signed integer type.
Definition: Types.h:72
unsigned int UInt
Unsigned integer type.
Definition: Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:97
static String number(double d, UInt n)
Definition: StringUtils.h:191
Main OpenMS namespace.
Definition: openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::istream & operator>>(std::istream &os, const AASequence &peptide)