OpenMS
Loading...
Searching...
No Matches
AASequence.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Andreas Bertsch, Timo Sachsenberg $
7// --------------------------------------------------------------------------
8
9#pragma once
10
17
18#include <vector>
19#include <iosfwd>
20#include <map>
21#include <functional>
22
23namespace OpenMS
24{
25
87 class OPENMS_DLLAPI AASequence final
88 {
89public:
90
91 class Iterator;
92
97 class OPENMS_DLLAPI ConstIterator final
98 {
99 public:
100 // TODO Iterator constructor for ConstIterator
101
102 typedef const Residue& const_reference;
104 typedef const Residue* const_pointer;
105 typedef std::vector<const Residue*>::difference_type difference_type;
107 typedef const Residue* pointer;
108 typedef std::random_access_iterator_tag iterator_category;
109
114 ConstIterator() = default;
115
117 ConstIterator(const std::vector<const Residue*>* vec_ptr, difference_type position)
118 : vector_{vec_ptr},
119 position_{position}
120 {
121 }
122
124 ConstIterator(const ConstIterator& rhs) = default;
125
128 vector_(rhs.vector_),
129 position_(rhs.position_)
130 {
131 }
132
134 ~ConstIterator() = default;
135
137
139 ConstIterator& operator=(const ConstIterator& rhs) = default;
140
146 {
147 return *(*vector_)[position_];
148 }
149
152 {
153 return (*vector_)[position_];
154 }
155
158 {
159 return ConstIterator(vector_, position_ + diff);
160 }
161
163 {
164 return position_ - rhs.position_;
165 }
166
169 {
170 return ConstIterator(vector_, position_ - diff);
171 }
172
174 bool operator==(const ConstIterator& rhs) const
175 {
176 return vector_ == rhs.vector_ && position_ == rhs.position_;
177 }
178
180 bool operator!=(const ConstIterator& rhs) const
181 {
182 return vector_ != rhs.vector_ || position_ != rhs.position_;
183 }
184
187 {
188 ++position_;
189 return *this;
190 }
191
194 {
195 --position_;
196 return *this;
197 }
198
200
201protected:
202
203 // pointer to the AASequence vector
204 const std::vector<const Residue*>* vector_ {};
205
206 // position in the AASequence vector
207 difference_type position_ {};
208 };
209
210
215 class OPENMS_DLLAPI Iterator final
216 {
217public:
218
220
221 typedef const Residue& const_reference;
223 typedef const Residue* const_pointer;
224 typedef const Residue* pointer;
225 typedef std::vector<const Residue*>::difference_type difference_type;
226
231 Iterator() = default;
232
234 Iterator(std::vector<const Residue*>* vec_ptr, difference_type position)
235 : vector_ {vec_ptr},
236 position_{position}
237 {
238 }
239
241 Iterator(const Iterator& rhs) = default;
242
244 ~Iterator() = default;
245
247
250 {
251 if (this != &rhs)
252 {
253 position_ = rhs.position_;
254 vector_ = rhs.vector_;
255 }
256 return *this;
257 }
258
264 {
265 return *(*vector_)[position_];
266 }
267
270 {
271 return (*vector_)[position_];
272 }
273
276 {
277 return (*vector_)[position_];
278 }
279
282 {
283 return Iterator(vector_, position_ + diff);
284 }
285
287 {
288 return position_ - rhs.position_;
289 }
290
293 {
294 return Iterator(vector_, position_ - diff);
295 }
296
298 bool operator==(const Iterator& rhs) const
299 {
300 return vector_ == rhs.vector_ && position_ == rhs.position_;
301 }
302
304 bool operator!=(const Iterator& rhs) const
305 {
306 return vector_ != rhs.vector_ || position_ != rhs.position_;
307 }
308
311 {
312 ++position_;
313 return *this;
314 }
315
318 {
319 --position_;
320 return *this;
321 }
322
324
325protected:
326
327 // pointer to the AASequence vector
328 std::vector<const Residue*>* vector_ {};
329
330 // position in the AASequence vector
331 difference_type position_ {};
332 };
333
337
339 AASequence() = default;
340
342 AASequence(const AASequence&) = default;
343
345 AASequence(AASequence&&) = default;
346
348 ~AASequence() = default;
350
352 AASequence& operator=(const AASequence&) = default;
353
356
358 bool empty() const;
359
363
376
379
389
408 String toBracketString(bool integer_mass = true,
409 bool mass_delta = false,
410 const std::vector<String> & fixed_modifications = std::vector<String>()) const;
411
414 void setModification(Size index, const String& modification);
415
417 void setModification(Size index, const Residue* modification);
418
420 void setModification(Size index, const ResidueModification* modification);
421
424 void setModification(Size index, const ResidueModification& modification);
425
427 void setModificationByDiffMonoMass(Size index, double diffMonoMass);
428
431 void setNTerminalModification(const String& modification);
432
435
438
440 void setNTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term);
441
444
447
450 void setCTerminalModification(const String& modification);
451
454
457
459 void setCTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term);
460
463
466
468 const Residue& getResidue(Size index) const;
469
471 EmpiricalFormula getFormula(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
472
474 double getAverageWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
475
480 double getMonoWeight(Residue::ResidueType type = Residue::Full, Int charge = 0) const;
481
487 double getMZ(Int charge, Residue::ResidueType type = Residue::Full) const;
488
490 const Residue& operator[](Size index) const;
491
493 AASequence operator+(const AASequence& peptide) const;
494
497
499 AASequence operator+(const Residue* residue) const;
500
503
505 Size size() const;
506
509
512
514 AASequence getSubsequence(Size index, UInt number) const;
515
517 void getAAFrequencies(std::map<String, Size>& frequency_table) const;
518
520
525 bool has(const Residue& residue) const;
526
529 bool hasSubsequence(const AASequence& peptide) const;
530
533 bool hasPrefix(const AASequence& peptide) const;
534
537 bool hasSuffix(const AASequence& peptide) const;
538
541
544
546 bool isModified() const;
547
549 bool operator==(const AASequence& rhs) const;
550
552 bool operator<(const AASequence& rhs) const;
553
555 bool operator!=(const AASequence& rhs) const;
557
561 inline Iterator begin() { return Iterator(&peptide_, 0); }
562
563 inline ConstIterator begin() const { return ConstIterator(&peptide_, 0); }
564
565 inline Iterator end() { return Iterator(&peptide_, (Int) peptide_.size()); }
566
567 inline ConstIterator end() const { return ConstIterator(&peptide_, (Int) peptide_.size()); }
569
574 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
575
577 friend OPENMS_DLLAPI std::istream& operator>>(std::istream& is, const AASequence& peptide);
579
588 static AASequence fromString(const String& s,
589 bool permissive = true);
590
599 static AASequence fromString(const char* s,
600 bool permissive = true);
601
604 explicit AASequence(const String& s);
605
608 explicit AASequence(const char* s);
609
613 explicit AASequence(const String& s, bool permissive);
614
618 explicit AASequence(const char* s, bool permissive);
619
620 protected:
621
622 std::vector<const Residue*> peptide_;
623
624 const ResidueModification* n_term_mod_ = nullptr;
625
626 const ResidueModification* c_term_mod_ = nullptr;
627
642 const String& str,
643 AASequence& aas,
644 const ResidueModification::TermSpecificity& specificity);
645
660 const String& str,
661 AASequence& aas,
662 const ResidueModification::TermSpecificity& specificity);
663
664 static void parseString_(const String& peptide, AASequence& aas,
665 bool permissive = true);
666 };
667
668 OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AASequence& peptide);
669
670 OPENMS_DLLAPI std::istream& operator>>(std::istream& os, const AASequence& peptide);
671
672} // namespace OpenMS
673
674// Hash function specialization for AASequence
675// Placed in std namespace to allow use with std::unordered_map/set
676namespace std
677{
692 template<>
693 struct hash<OpenMS::AASequence>
694 {
695 std::size_t operator()(const OpenMS::AASequence& seq) const noexcept
696 {
697 std::size_t seed = 0;
698
699 // Hash each residue
700 for (const auto& residue : seq)
701 {
702 // Hash one-letter code (single character, fast)
703 const OpenMS::String& olc = residue.getOneLetterCode();
704 if (!olc.empty())
705 {
707 }
708
709 // Hash modification if present
710 const OpenMS::ResidueModification* mod = residue.getModification();
711 if (mod != nullptr)
712 {
713 // Use full ID for portability (e.g., "Oxidation (M)")
714 // String inherits from std::string, no copy needed
716 }
717 }
718
719 // Hash N-terminal modification if present
720 const OpenMS::ResidueModification* n_mod = seq.getNTerminalModification();
721 if (n_mod != nullptr)
722 {
723 // Use a different seed offset for N-term to distinguish from C-term
724 std::size_t n_hash = OpenMS::fnv1a_hash_string(n_mod->getFullId());
725 OpenMS::hash_combine(seed, n_hash ^ 0x4e5445524dULL); // "NTERM" in hex-like
726 }
727
728 // Hash C-terminal modification if present
729 const OpenMS::ResidueModification* c_mod = seq.getCTerminalModification();
730 if (c_mod != nullptr)
731 {
732 // Use a different seed offset for C-term
733 std::size_t c_hash = OpenMS::fnv1a_hash_string(c_mod->getFullId());
734 OpenMS::hash_combine(seed, c_hash ^ 0x435445524dULL); // "CTERM" in hex-like
735 }
736
737 return seed;
738 }
739 };
740} // namespace std
741
742
ConstIterator for AASequence.
Definition AASequence.h:98
const_pointer operator->() const
dereference operator
Definition AASequence.h:151
ConstIterator(const ConstIterator &rhs)=default
copy constructor
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition AASequence.h:168
Residue value_type
Definition AASequence.h:106
ConstIterator(const std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition AASequence.h:117
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition AASequence.h:180
ConstIterator(const AASequence::Iterator &rhs)
copy constructor from Iterator
Definition AASequence.h:127
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition AASequence.h:157
~ConstIterator()=default
destructor
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition AASequence.h:193
std::random_access_iterator_tag iterator_category
Definition AASequence.h:108
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition AASequence.h:174
const std::vector< const Residue * > * vector_
Definition AASequence.h:204
const Residue * const_pointer
Definition AASequence.h:104
difference_type position_
Definition AASequence.h:207
std::vector< constResidue * >::difference_type difference_type
Definition AASequence.h:105
const Residue * pointer
Definition AASequence.h:107
Residue & reference
Definition AASequence.h:103
const Residue & const_reference
Definition AASequence.h:102
const_reference operator*() const
dereference operator
Definition AASequence.h:145
ConstIterator & operator++()
increment operator
Definition AASequence.h:186
ConstIterator & operator=(const ConstIterator &rhs)=default
assignment operator
difference_type operator-(ConstIterator rhs) const
Definition AASequence.h:162
Iterator class for AASequence.
Definition AASequence.h:216
const_pointer operator->() const
dereference operator
Definition AASequence.h:269
Iterator()=default
default constructor
pointer operator->()
mutable dereference operator
Definition AASequence.h:275
const Iterator operator+(difference_type diff) const
forward jump operator
Definition AASequence.h:281
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition AASequence.h:249
Iterator(const Iterator &rhs)=default
copy constructor
std::vector< const Residue * > * vector_
Definition AASequence.h:328
difference_type operator-(Iterator rhs) const
Definition AASequence.h:286
~Iterator()=default
destructor
Iterator(std::vector< const Residue * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition AASequence.h:234
bool operator==(const Iterator &rhs) const
equality comparator
Definition AASequence.h:298
const Residue * const_pointer
Definition AASequence.h:223
difference_type position_
Definition AASequence.h:331
std::vector< constResidue * >::difference_type difference_type
Definition AASequence.h:225
const Residue * pointer
Definition AASequence.h:224
Residue & reference
Definition AASequence.h:222
const Residue & const_reference
Definition AASequence.h:221
const_reference operator*() const
dereference operator
Definition AASequence.h:263
bool operator!=(const Iterator &rhs) const
inequality operator
Definition AASequence.h:304
Iterator & operator++()
increment operator
Definition AASequence.h:310
Iterator & operator--()
decrement operator
Definition AASequence.h:317
const Iterator operator-(difference_type diff) const
backward jump operator
Definition AASequence.h:292
Representation of a peptide/protein sequence.
Definition AASequence.h:88
void setModification(Size index, const Residue *modification)
sets the modification of AA at index by providing an already, potentially modified residue
void setModificationByDiffMonoMass(Size index, double diffMonoMass)
modifies the residue at index in the sequence and potentially in the ResidueDB
const ResidueModification * getCTerminalModification() const
returns a pointer to the C-terminal modification, or zero if none is set
bool hasNTerminalModification() const
predicate which is true if the peptide is N-term modified
const Residue & operator[](Size index) const
returns a pointer to the residue at given position
AASequence & operator+=(const Residue *)
adds the residues of a peptide
double getMZ(Int charge, Residue::ResidueType type=Residue::Full) const
Iterator begin()
Definition AASequence.h:561
friend std::ostream & operator<<(std::ostream &os, const AASequence &peptide)
writes a peptide to an output stream
String toString() const
returns the peptide as string with modifications embedded in brackets
EmpiricalFormula getFormula(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the formula of the peptide
const Residue & getResidue(Size index) const
returns a pointer to the residue at position index
std::vector< const Residue * > peptide_
Definition AASequence.h:622
void setNTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term)
sets the N-terminal modification by the monoisotopic mass difference it introduces (creates a "user-d...
void setModification(Size index, const String &modification)
AASequence(AASequence &&)=default
Move constructor.
double getAverageWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
returns the average weight of the peptide
AASequence(const AASequence &)=default
Copy constructor.
void setModification(Size index, const ResidueModification *modification)
sets the modification of AA at index by providing a pointer to a ResidueModification object found in ...
AASequence(const char *s)
constructor from C string
AASequence getPrefix(Size index) const
returns a peptide sequence of the first index residues
bool empty() const
check if sequence is empty
AASequence()=default
Default constructor.
static String::ConstIterator parseModSquareBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in square brackets (a mass)
void setModification(Size index, const ResidueModification &modification)
ConstIterator end() const
Definition AASequence.h:567
bool operator==(const AASequence &rhs) const
equality operator. Two sequences are equal iff all amino acids including PTMs are equal
void setCTerminalModificationByDiffMonoMass(double diffMonoMass, bool protein_term)
sets the C-terminal modification by the monoisotopic mass difference it introduces (creates a "user-d...
bool isModified() const
returns true if any of the residues or termini are modified
double getMonoWeight(Residue::ResidueType type=Residue::Full, Int charge=0) const
void setCTerminalModification(const ResidueModification &mod)
sets the C-terminal modification (copies and adds to database if not present)
static AASequence fromString(const char *s, bool permissive=true)
create AASequence object by parsing a C string (character array)
static String::ConstIterator parseModRoundBrackets_(const String::ConstIterator str_it, const String &str, AASequence &aas, const ResidueModification::TermSpecificity &specificity)
Parses modifications in round brackets (an identifier)
String toBracketString(bool integer_mass=true, bool mass_delta=false, const std::vector< String > &fixed_modifications=std::vector< String >()) const
create a TPP compatible string of the modified sequence using bracket notation.
static void parseString_(const String &peptide, AASequence &aas, bool permissive=true)
AASequence & operator=(const AASequence &)=default
Assignment operator.
void setCTerminalModification(const ResidueModification *modification)
sets the C-terminal modification (must be present in the database)
~AASequence()=default
Destructor.
AASequence operator+(const AASequence &peptide) const
adds the residues of the peptide
AASequence & operator=(AASequence &&)=default
Move assignment operator.
void getAAFrequencies(std::map< String, Size > &frequency_table) const
compute frequency table of amino acids
bool hasCTerminalModification() const
predicate which is true if the peptide is C-term modified
bool has(const Residue &residue) const
returns true if the peptide contains the given residue
AASequence getSubsequence(Size index, UInt number) const
returns a peptide sequence of number residues, beginning at position index
bool hasSubsequence(const AASequence &peptide) const
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
bool operator!=(const AASequence &rhs) const
inequality operator. Complement of equality operator.
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
void setNTerminalModification(const String &modification)
AASequence operator+(const Residue *residue) const
adds the residues of the peptide
AASequence getSuffix(Size index) const
returns a peptide sequence of the last index residues
Iterator end()
Definition AASequence.h:565
const String & getCTerminalModificationName() const
returns the name (ID) of the C-terminal modification, or an empty string if none is set
void setNTerminalModification(const ResidueModification *modification)
sets the N-terminal modification
AASequence(const char *s, bool permissive)
constructor from C string
bool hasPrefix(const AASequence &peptide) const
const String & getNTerminalModificationName() const
returns the name (ID) of the N-terminal modification, or an empty string if none is set
bool operator<(const AASequence &rhs) const
lesser than operator which compares the C-term mods, sequence including PTMS and N-term mods; can be ...
Size size() const
returns the number of residues
const ResidueModification * getNTerminalModification() const
returns a pointer to the N-terminal modification, or zero if none is set
String toUniModString() const
returns the peptide as string with UniMod-style modifications embedded in brackets
ConstIterator begin() const
Definition AASequence.h:563
friend std::istream & operator>>(std::istream &is, const AASequence &peptide)
reads a peptide from an input stream
AASequence & operator+=(const AASequence &)
adds the residues of a peptide
void setNTerminalModification(const ResidueModification &mod)
sets the N-terminal modification (copies and adds to database if not present)
void setCTerminalModification(const String &modification)
bool hasSuffix(const AASequence &peptide) const
AASequence(const String &s, bool permissive)
constructor from String
AASequence(const String &s)
constructor from String
Representation of an empirical formula.
Definition EmpiricalFormula.h:63
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
const String & getFullId() const
returns the full identifier of the mod (Unimod accession + origin, if available)
TermSpecificity
Position where the modification is allowed to occur.
Definition ResidueModification.h:74
Representation of an amino acid residue.
Definition Residue.h:40
ResidueType
Definition Residue.h:152
A more convenient string class.
Definition String.h:34
const_iterator ConstIterator
Const Iterator.
Definition String.h:46
int Int
Signed integer type.
Definition Types.h:72
unsigned int UInt
Unsigned integer type.
Definition Types.h:64
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t hash_char(char c) noexcept
Hash for a character.
Definition HashUtils.h:119
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
std::istream & operator>>(std::istream &os, const AASequence &peptide)
STL namespace.
std::size_t operator()(const OpenMS::AASequence &seq) const noexcept
Definition AASequence.h:695