OpenMS  2.7.0
AASequence.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Andreas Bertsch, Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 #include <OpenMS/CONCEPT/Types.h>
43 
44 #include <vector>
45 #include <iosfwd>
46 
47 namespace OpenMS
48 {
49 
111  class OPENMS_DLLAPI AASequence
112  {
113 public:
114 
115  class Iterator;
116 
121  class OPENMS_DLLAPI ConstIterator
122  {
123 public:
124 
125  // TODO Iterator constructor for ConstIterator
126 
127  typedef const Residue& const_reference;
128  typedef Residue& reference;
129  typedef const Residue* const_pointer;
130  typedef std::vector<const Residue*>::difference_type difference_type;
132  typedef const Residue* pointer;
133  typedef std::random_access_iterator_tag iterator_category;
134 
140  {
141  }
142 
144  ConstIterator(const std::vector<const Residue*>* vec_ptr, difference_type position)
145  {
146  vector_ = vec_ptr;
147  position_ = position;
148  }
149 
152  vector_(rhs.vector_),
153  position_(rhs.position_)
154  {
155  }
156 
159  vector_(rhs.vector_),
160  position_(rhs.position_)
161  {
162  }
163 
165  virtual ~ConstIterator()
166  {
167  }
168 
170 
173  {
174  if (this != &rhs)
175  {
176  position_ = rhs.position_;
177  vector_ = rhs.vector_;
178  }
179  return *this;
180  }
181 
187  {
188  return *(*vector_)[position_];
189  }
190 
193  {
194  return (*vector_)[position_];
195  }
196 
199  {
200  return ConstIterator(vector_, position_ + diff);
201  }
202 
204  {
205  return position_ - rhs.position_;
206  }
207 
210  {
211  return ConstIterator(vector_, position_ - diff);
212  }
213 
215  bool operator==(const ConstIterator& rhs) const
216  {
217  return vector_ == rhs.vector_ && position_ == rhs.position_;
218  }
219 
221  bool operator!=(const ConstIterator& rhs) const
222  {
223  return vector_ != rhs.vector_ || position_ != rhs.position_;
224  }
225 
228  {
229  ++position_;
230  return *this;
231  }
232 
235  {
236  --position_;
237  return *this;
238  }
239 
241 
242 protected:
243 
244  // pointer to the AASequence vector
245  const std::vector<const Residue*>* vector_;
246 
247  // position in the AASequence vector
249  };
250 
251 
256  class OPENMS_DLLAPI Iterator
257  {
258 public:
259 
261 
262  typedef const Residue& const_reference;
263  typedef Residue& reference;
264  typedef const Residue* const_pointer;
265  typedef const Residue* pointer;
266  typedef std::vector<const Residue*>::difference_type difference_type;
267 
273  {
274  }
275 
277  Iterator(std::vector<const Residue*>* vec_ptr, difference_type position)
278  {
279  vector_ = vec_ptr;
280  position_ = position;
281  }
282 
284  Iterator(const Iterator& rhs) :
285  vector_(rhs.vector_),
286  position_(rhs.position_)
287  {
288  }
289 
291  virtual ~Iterator()
292  {
293  }
294 
296 
299  {
300  if (this != &rhs)
301  {
302  position_ = rhs.position_;
303  vector_ = rhs.vector_;
304  }
305  return *this;
306  }
307 
313  {
314  return *(*vector_)[position_];
315  }
316 
319  {
320  return (*vector_)[position_];
321  }
322 
325  {
326  return (*vector_)[position_];
327  }
328 
331  {
332  return Iterator(vector_, position_ + diff);
333  }
334 
336  {
337  return position_ - rhs.position_;
338  }
339 
342  {
343  return Iterator(vector_, position_ - diff);
344  }
345 
347  bool operator==(const Iterator& rhs) const
348  {
349  return vector_ == rhs.vector_ && position_ == rhs.position_;
350  }
351 
353  bool operator!=(const Iterator& rhs) const
354  {
355  return vector_ != rhs.vector_ || position_ != rhs.position_;
356  }
357 
360  {
361  ++position_;
362  return *this;
363  }
364 
367  {
368  --position_;
369  return *this;
370  }
371 
373 
374 protected:
375 
376  // pointer to the AASequence vector
377  std::vector<const Residue*>* vector_;
378 
379  // position in the AASequence vector
381  };
382 
386 
389 
391  AASequence(const AASequence&) = default;
392 
394  AASequence(AASequence&&) noexcept = default;
395 
397  virtual ~AASequence();
399 
401  AASequence& operator=(const AASequence&) = default;
402 
404  AASequence& operator=(AASequence&&) = default; // TODO: add noexcept (gcc 4.8 bug)
405 
407  bool empty() const;
408 
412 
424  String toString() const;
425 
427  String toUnmodifiedString() const;
428 
437  String toUniModString() const;
438 
457  String toBracketString(bool integer_mass = true,
458  bool mass_delta = false,
459  const std::vector<String> & fixed_modifications = std::vector<String>()) const;
460 
463  void setModification(Size index, const String& modification);
464 
465  // sets the (potentially modified) residue
466  void setModification(Size index, const Residue* modification) { peptide_[index] = modification; }
467 
470  void setNTerminalModification(const String& modification);
471 
474 
477 
480 
483  void setCTerminalModification(const String& modification);
484 
487 
490 
493 
495  const Residue& getResidue(Size index) const;
496 
499 
501  double getAverageWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
502 
507  double getMonoWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
508 
514  double getMZ(Int charge, Residue::ResidueType type = Residue::Full) const;
515 
517  const Residue& operator[](Size index) const;
518 
520  AASequence operator+(const AASequence& peptide) const;
521 
524 
526  AASequence operator+(const Residue* residue) const;
527 
530 
532  Size size() const;
533 
535  AASequence getPrefix(Size index) const;
536 
538  AASequence getSuffix(Size index) const;
539 
541  AASequence getSubsequence(Size index, UInt number) const;
542 
544  void getAAFrequencies(Map<String, Size>& frequency_table) const;
545 
547 
552  bool has(const Residue& residue) const;
553 
556  bool hasSubsequence(const AASequence& peptide) const;
557 
560  bool hasPrefix(const AASequence& peptide) const;
561 
564  bool hasSuffix(const AASequence& peptide) const;
565 
568 
571 
573  bool isModified() const;
574 
576  bool operator==(const AASequence& rhs) const;
577 
579  bool operator<(const AASequence& rhs) const;
580 
582  bool operator!=(const AASequence& rhs) const;
584 
588  inline Iterator begin() { return Iterator(&peptide_, 0); }
589 
590  inline ConstIterator begin() const { return ConstIterator(&peptide_, 0); }
591 
592  inline Iterator end() { return Iterator(&peptide_, (Int) peptide_.size()); }
593 
594  inline ConstIterator end() const { return ConstIterator(&peptide_, (Int) peptide_.size()); }
596 
601  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
602 
604  friend OPENMS_DLLAPI std::istream& operator>>(std::istream& is, const AASequence& peptide);
606 
615  static AASequence fromString(const String& s,
616  bool permissive = true);
617 
626  static AASequence fromString(const char* s,
627  bool permissive = true);
628 
629  protected:
630 
631  std::vector<const Residue*> peptide_;
632 
634 
636 
652  const String& str,
653  AASequence& aas,
654  const ResidueModification::TermSpecificity& specificity);
655 
670  const String& str,
671  AASequence& aas,
672  const ResidueModification::TermSpecificity& specificity);
673 
674  static void parseString_(const String& peptide, AASequence& aas,
675  bool permissive = true);
676  };
677 
678  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
679 
680  OPENMS_DLLAPI std::istream& operator>>(std::istream& os, const AASequence& peptide);
681 
682 } // namespace OpenMS
683 
684 
ConstIterator for AASequence.
Definition: AASequence.h:122
const_pointer operator->() const
dereference operator
Definition: AASequence.h:192
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: AASequence.h:209
Residue value_type
Definition: AASequence.h:131
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition: AASequence.h:172
ConstIterator(const std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: AASequence.h:144
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: AASequence.h:221
ConstIterator(const AASequence::Iterator &rhs)
copy constructor from Iterator
Definition: AASequence.h:158
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: AASequence.h:198
ConstIterator & operator--()
decrement operator
Definition: AASequence.h:234
std::random_access_iterator_tag iterator_category
Definition: AASequence.h:133
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: AASequence.h:215
const std::vector< const Residue * > * vector_
Definition: AASequence.h:245
ConstIterator & operator++()
increment operator
Definition: AASequence.h:227
const Residue * const_pointer
Definition: AASequence.h:129
std::vector< const Residue * >::difference_type difference_type
Definition: AASequence.h:130
difference_type position_
Definition: AASequence.h:248
const Residue * pointer
Definition: AASequence.h:132
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition: AASequence.h:151
Residue & reference
Definition: AASequence.h:128
const Residue & const_reference
Definition: AASequence.h:127
const_reference operator*() const
dereference operator
Definition: AASequence.h:186
virtual ~ConstIterator()
destructor
Definition: AASequence.h:165
ConstIterator()
default constructor
Definition: AASequence.h:139
difference_type operator-(ConstIterator rhs) const
Definition: AASequence.h:203
Iterator class for AASequence.
Definition: AASequence.h:257
const_pointer operator->() const
dereference operator
Definition: AASequence.h:318
pointer operator->()
mutable dereference operator
Definition: AASequence.h:324
Iterator()
default constructor
Definition: AASequence.h:272
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: AASequence.h:330
Iterator & operator--()
decrement operator
Definition: AASequence.h:366
virtual ~Iterator()
destructor
Definition: AASequence.h:291
std::vector< const Residue * > * vector_
Definition: AASequence.h:377
difference_type operator-(Iterator rhs) const
Definition: AASequence.h:335
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: AASequence.h:298
Iterator(std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: AASequence.h:277
bool operator==(const Iterator &rhs) const
equality comparator
Definition: AASequence.h:347
const Residue * const_pointer
Definition: AASequence.h:264
std::vector< const Residue * >::difference_type difference_type
Definition: AASequence.h:266
difference_type position_
Definition: AASequence.h:380
const Residue * pointer
Definition: AASequence.h:265
Residue & reference
Definition: AASequence.h:263
const Residue & const_reference
Definition: AASequence.h:262
Iterator(const Iterator &rhs)
copy constructor
Definition: AASequence.h:284
const_reference operator*() const
dereference operator
Definition: AASequence.h:312
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: AASequence.h:353
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: AASequence.h:341
Iterator & operator++()
increment operator
Definition: AASequence.h:359
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
const ResidueModification * getNTerminalModification() const
returns a pointer to the N-terminal modification, or zero if none is set
bool hasNTerminalModification() const
predicate which is true if the peptide is N-term modified
double getMZ(Int charge, Residue::ResidueType type=Residue::Full) const
Iterator begin()
Definition: AASequence.h:588
EmpiricalFormula getFormula(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the formula of the peptide
std::vector< const Residue * > peptide_
Definition: AASequence.h:631
double getAverageWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the average weight of the peptide
const Residue & operator[](Size index) const
returns a pointer to the residue at given position
AASequence(const AASequence &)=default
Copy constructor.
const String & getCTerminalModificationName() const
returns the name (ID) of the C-terminal modification, or an empty string if none is set
AASequence getPrefix(Size index) const
returns a peptide sequence of the first index residues
static String::ConstIterator parseModSquareBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in square brackets (a mass)
ConstIterator end() const
Definition: AASequence.h:594
bool operator==(const AASequence &rhs) const
equality operator. Two sequences are equal iff all amino acids including PTMs are equal
bool isModified() const
returns true if any of the residues or termini are modified
double getMonoWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
AASequence & operator+=(const Residue *)
adds the residues of a peptide
static AASequence fromString(const char *s, bool permissive=true)
create AASequence object by parsing a C string (character array)
static String::ConstIterator parseModRoundBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in round brackets (an identifier)
static void parseString_(const String &peptide, AASequence &aas, bool permissive=true)
const Residue & getResidue(Size index) const
returns a pointer to the residue at position index
void setCTerminalModification(const ResidueModification *modification)
sets the C-terminal modification
friend std::ostream & operator<<(std::ostream &os, const AASequence &peptide)
writes a peptide to an output stream
AASequence operator+(const AASequence &peptide) const
adds the residues of the peptide
bool hasCTerminalModification() const
predicate which is true if the peptide is C-term modified
bool has(const Residue &residue) const
returns true if the peptide contains the given residue
AASequence getSubsequence(Size index, UInt number) const
returns a peptide sequence of number residues, beginning at position index
const ResidueModification * getCTerminalModification() const
returns a pointer to the C-terminal modification, or zero if none is set
bool hasSubsequence(const AASequence &peptide) const
bool operator!=(const AASequence &rhs) const
inequality operator. Complement of equality operator.
const ResidueModification * c_term_mod_
Definition: AASequence.h:635
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
void setNTerminalModification(const String &modification)
AASequence operator+(const Residue *residue) const
adds the residues of the peptide
AASequence getSuffix(Size index) const
returns a peptide sequence of the last index residues
Iterator end()
Definition: AASequence.h:592
AASequence(AASequence &&) noexcept=default
Move constructor.
void setNTerminalModification(const ResidueModification *modification)
sets the N-terminal modification
const ResidueModification * n_term_mod_
Definition: AASequence.h:633
bool hasPrefix(const AASequence &peptide) const
bool operator<(const AASequence &rhs) const
lesser than operator which compares the C-term mods, sequence including PTMS and N-term mods; can be ...
Size size() const
returns the number of residues
ConstIterator begin() const
Definition: AASequence.h:590
AASequence & operator+=(const AASequence &)
adds the residues of a peptide
AASequence()
Default constructor.
const String & getNTerminalModificationName() const
returns the name (ID) of the N-terminal modification, or an empty string if none is set
void setCTerminalModification(const String &modification)
bool hasSuffix(const AASequence &peptide) const
void getAAFrequencies(Map< String, Size > &frequency_table) const
compute frequency table of amino acids
friend std::istream & operator>>(std::istream &is, const AASequence &peptide)
reads a peptide from an input stream
Representation of an empirical formula.
Definition: EmpiricalFormula.h:83
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
Representation of a modification.
Definition: ResidueModification.h:77
TermSpecificity
Position where the modification is allowed to occur.
Definition: ResidueModification.h:96
Representation of a residue.
Definition: Residue.h:63
ResidueType
Definition: Residue.h:152
@ Full
with N-terminus and C-terminus
Definition: Residue.h:153
A more convenient string class.
Definition: String.h:61
const_iterator ConstIterator
Const Iterator.
Definition: String.h:73
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void setModification(int location, int max_size, String modification, OpenMS::AASequence &aas)
helper function that sets a modification on a AASequence object
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::istream & operator>>(std::istream &os, const AASequence &peptide)
const std::string & toString(const DriftTimeUnit value)
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:563