OpenMS
Loading...
Searching...
No Matches
Residue.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Andreas Bertsch, Jang Jang Jin$
7// --------------------------------------------------------------------------
8//
9
10#pragma once
11
16
17#include <array>
18#include <functional>
19#include <iosfwd>
20#include <set>
21#include <vector>
22
23namespace OpenMS
24{
25 class ResidueModification;
26
39 class OPENMS_DLLAPI Residue
40 {
41 friend class ResidueDB;
42
43public:
44
61
62 inline static const EmpiricalFormula& getInternalToFull()
63 {
64 static const EmpiricalFormula to_full = EmpiricalFormula("H2O");
65 return to_full;
66 }
67
68 inline static const EmpiricalFormula& getInternalToNTerm()
69 {
70 static const EmpiricalFormula to_full = EmpiricalFormula("H");
71 return to_full;
72 }
73
74 inline static const EmpiricalFormula& getInternalToCTerm()
75 {
76 static const EmpiricalFormula to_full = EmpiricalFormula("OH");
77 return to_full;
78 }
79
80 inline static const EmpiricalFormula& getInternalToAIon()
81 {
82 // Mind the "-"
83 static const EmpiricalFormula to_full =
84 getInternalToNTerm() - EmpiricalFormula("CHO");
85 return to_full;
86 }
87
88 inline static const EmpiricalFormula& getInternalToBIon()
89 {
90 // Mind the "-"
91 static const EmpiricalFormula to_full =
92 getInternalToNTerm() - EmpiricalFormula("H");
93 return to_full;
94 }
95
96 inline static const EmpiricalFormula& getInternalToCIon()
97 {
98 static const EmpiricalFormula to_full =
99 getInternalToNTerm() + EmpiricalFormula("NH2");
100 return to_full;
101 }
102
103 inline static const EmpiricalFormula& getInternalToXIon()
104 {
105 // Mind the "-"
106 static const EmpiricalFormula to_full =
107 getInternalToCTerm() + EmpiricalFormula("CO") - EmpiricalFormula("H");
108 return to_full;
109 }
110
111 inline static const EmpiricalFormula& getInternalToYIon()
112 {
113 static const EmpiricalFormula to_full =
114 getInternalToCTerm() + EmpiricalFormula("H");
115 return to_full;
116 }
117
118 inline static const EmpiricalFormula& getInternalToZIon()
119 {
120 // Mind the "-"
121 static const EmpiricalFormula to_full =
122 getInternalToCTerm() - EmpiricalFormula("NH2");
123 return to_full;
124 }
125
127 {
128 // Mind the "-"
129 static const EmpiricalFormula to_full =
130 getInternalToCTerm() - EmpiricalFormula("NH");
131 return to_full;
132 }
133
135 {
136 // Mind the "-"
137 static const EmpiricalFormula to_full =
138 getInternalToCTerm() - EmpiricalFormula("N");
139 return to_full;
140 }
141
143
147
175
177 static inline std::array<std::string_view, Residue::ResidueType::SizeOfResidueType> names_of_residuetype {
178 "full",
179 "internal",
180 "N-terminal",
181 "C-terminal",
182 "a-ion",
183 "b-ion",
184 "c-ion",
185 "x-ion",
186 "y-ion",
187 "z-ion",
188 "z+1-ion",
189 "z+2-ion",
190 "precursor-ion",
191 "b-H2O-ion",
192 "y-H2O-ion",
193 "b-NH3-ion",
194 "y-NH3-ion",
195 "Non-identified ion",
196 "unannotated"
197 };
198
200 static String getResidueTypeName(const ResidueType res_type);
201
205
208
210 Residue(const Residue&) = default;
211
213 Residue(Residue&&) = default;
214
215 // Detailed constructor
216 Residue(const String& name,
217 const String& three_letter_code,
218 const String& one_letter_code,
219 const EmpiricalFormula& formula,
220 double pka = 0,
221 double pkb = 0,
222 double pkc = -1,
223 double gb_sc = 0,
224 double gb_bb_l = 0,
225 double gb_bb_r = 0,
226 const std::set<String>& synonyms = std::set<String>());
227
229 virtual ~Residue();
231
235
237 Residue& operator=(const Residue&) = default;
238
240 Residue& operator=(Residue&&) & = default;
242
247 void setName(const String& name);
248
250 const String& getName() const;
251
253 void setSynonyms(const std::set<String>& synonyms);
254
256 void addSynonym(const String& synonym);
257
259 const std::set<String>& getSynonyms() const;
260
262 void setThreeLetterCode(const String& three_letter_code);
263
266
268 void setOneLetterCode(const String& one_letter_code);
269
271 const String& getOneLetterCode() const;
272
275
277 void setLossFormulas(const std::vector<EmpiricalFormula>&);
278
281
283 void setNTermLossFormulas(const std::vector<EmpiricalFormula>&);
284
286 const std::vector<EmpiricalFormula>& getLossFormulas() const;
287
289 const std::vector<EmpiricalFormula>& getNTermLossFormulas() const;
290
292 void setLossNames(const std::vector<String>& name);
293
295 void setNTermLossNames(const std::vector<String>& name);
296
298 void addLossName(const String& name);
299
301 void addNTermLossName(const String& name);
302
304 const std::vector<String>& getLossNames() const;
305
307 const std::vector<String>& getNTermLossNames() const;
308
310 void setFormula(const EmpiricalFormula& formula);
311
314
316 void setAverageWeight(double weight);
317
319 double getAverageWeight(ResidueType res_type = Full) const;
320
322 void setMonoWeight(double weight);
323
325 double getMonoWeight(ResidueType res_type = Full) const;
326
329
331 void setModification(const String& name);
332
335
339
342 void setModificationByDiffMonoMass(double diffMonoMass);
343
346
348 void setLowMassIons(const std::vector<EmpiricalFormula>& low_mass_ions);
349
351 const std::vector<EmpiricalFormula>& getLowMassIons() const;
352
354 void setResidueSets(const std::set<String>& residues_sets);
355
357 void addResidueSet(const String& residue_sets);
358
360 const std::set<String>& getResidueSets() const;
361
363 double getPka() const;
364
366 double getPkb() const;
367
369 double getPkc() const;
370
372 double getPiValue() const;
373
375 void setPka(double value);
376
378 void setPkb(double value);
379
381 void setPkc(double value);
382
384 double getSideChainBasicity() const;
385
387 void setSideChainBasicity(double gb_sc);
388
391
393 void setBackboneBasicityLeft(double gb_bb_l);
394
397
399 void setBackboneBasicityRight(double gb_bb_r);
401
406 bool hasNeutralLoss() const;
407
410
412 bool operator==(const Residue& residue) const;
413
415 bool operator!=(const Residue& residue) const;
416
418 bool operator==(char one_letter_code) const;
419
421 bool operator!=(char one_letter_code) const;
422
424 bool isModified() const;
425
427 bool isInResidueSet(const String& residue_set);
429
431 static std::string residueTypeToIonLetter(const ResidueType& res_type);
432
436
438 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Residue& residue);
439
440protected:
441
443 String name_ = "unknown";
444
445 std::set<String> synonyms_;
446
448
450
452
454
455 double average_weight_ = 0;
456
457 double mono_weight_ = 0;
458
460 const ResidueModification* modification_ = nullptr;
461
462 // loss
463 std::vector<String> loss_names_;
464
465 std::vector<EmpiricalFormula> loss_formulas_;
466
467 std::vector<String> NTerm_loss_names_;
468
469 std::vector<EmpiricalFormula> NTerm_loss_formulas_;
470
472 std::vector<EmpiricalFormula> low_mass_ions_;
473
474 // pka values
475 double pka_ = 0;
476
477 // pkb values
478 double pkb_ = 0;
479
480 // pkc values
481 double pkc_ = -1.0;
482
484 double gb_sc_ = 0;
485
487 double gb_bb_l_ = 0;
488
490 double gb_bb_r_ = 0;
491
493 std::set<String> residue_sets_;
494
495 // pre-calculated residue type delta weights for more efficient weight calculation
496 static const double internal_to_full_monoweight_;
497 static const double internal_to_nterm_monoweight_;
498 static const double internal_to_cterm_monoweight_;
499 static const double internal_to_a_monoweight_;
500 static const double internal_to_b_monoweight_;
501 static const double internal_to_c_monoweight_;
502 static const double internal_to_x_monoweight_;
503 static const double internal_to_y_monoweight_;
504 static const double internal_to_z_monoweight_;
505 static const double internal_to_zp1_monoweight_;
506 static const double internal_to_zp2_monoweight_;
507 };
508
509 // write 'name threelettercode onelettercode formula'
510 OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const Residue& residue);
511
512} // namespace OpenMS
513
514namespace std
515{
518 template<>
519 struct hash<OpenMS::Residue>
520 {
521 std::size_t operator()(const OpenMS::Residue& r) const noexcept
522 {
523 std::size_t seed = 0;
524
525 // Hash name_
527
528 // Hash synonyms_ (std::set<String>)
529 for (const auto& syn : r.getSynonyms())
530 {
532 }
533
534 // Hash three_letter_code_
535 OpenMS::hash_combine(seed, OpenMS::fnv1a_hash_string(r.getThreeLetterCode()));
536
537 // Hash one_letter_code_
538 OpenMS::hash_combine(seed, OpenMS::fnv1a_hash_string(r.getOneLetterCode()));
539
540 // Hash formula_
541 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(r.getFormula()));
542
543 // Hash average_weight_
544 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getAverageWeight()));
545
546 // Hash mono_weight_
547 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getMonoWeight()));
548
549 // Hash modification_ (pointer comparison in operator==)
550 OpenMS::hash_combine(seed, OpenMS::hash_int(reinterpret_cast<std::uintptr_t>(r.getModification())));
551
552 // Hash loss_names_ (std::vector<String>)
553 for (const auto& name : r.getLossNames())
554 {
556 }
557
558 // Hash loss_formulas_ (std::vector<EmpiricalFormula>)
559 for (const auto& formula : r.getLossFormulas())
560 {
561 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
562 }
563
564 // Hash NTerm_loss_names_ (std::vector<String>)
565 for (const auto& name : r.getNTermLossNames())
566 {
568 }
569
570 // Hash NTerm_loss_formulas_ (std::vector<EmpiricalFormula>)
571 for (const auto& formula : r.getNTermLossFormulas())
572 {
573 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
574 }
575
576 // Hash low_mass_ions_ (std::vector<EmpiricalFormula>)
577 for (const auto& formula : r.getLowMassIons())
578 {
579 OpenMS::hash_combine(seed, std::hash<OpenMS::EmpiricalFormula>{}(formula));
580 }
581
582 // Hash pka_
583 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPka()));
584
585 // Hash pkb_
586 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPkb()));
587
588 // Hash pkc_
589 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getPkc()));
590
591 // Hash gb_sc_
592 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getSideChainBasicity()));
593
594 // Hash gb_bb_l_
595 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getBackboneBasicityLeft()));
596
597 // Hash gb_bb_r_
598 OpenMS::hash_combine(seed, OpenMS::hash_float(r.getBackboneBasicityRight()));
599
600 // Hash residue_sets_ (std::set<String>)
601 for (const auto& rs : r.getResidueSets())
602 {
604 }
605
606 return seed;
607 }
608 };
609} // namespace std
Representation of an empirical formula.
Definition EmpiricalFormula.h:63
OpenMS stores a central database of all residues in the ResidueDB. All (unmodified) residues are adde...
Definition ResidueDB.h:32
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
Representation of an amino acid residue.
Definition Residue.h:40
static const double internal_to_b_monoweight_
Definition Residue.h:500
static const double internal_to_x_monoweight_
Definition Residue.h:502
const String & getOneLetterCode() const
returns the name as one letter code (String of size 1)
void setPkb(double value)
sets the pkb of the residue
bool isInResidueSet(const String &residue_set)
true if the residue is contained in the set
void addLossName(const String &name)
add neutral loss molecule name
static const EmpiricalFormula & getInternalToZp2Ion()
Definition Residue.h:134
bool hasNTermNeutralLosses() const
true if N-terminal neutral losses are set
static const double internal_to_z_monoweight_
Definition Residue.h:504
double getBackboneBasicityRight() const
returns the C-terminal direction backbone basicity
const std::vector< String > & getNTermLossNames() const
returns the N-terminal loss names
static const double internal_to_a_monoweight_
Definition Residue.h:499
static const double internal_to_zp1_monoweight_
Definition Residue.h:505
String toString() const
std::vector< String > loss_names_
Definition Residue.h:463
const std::vector< EmpiricalFormula > & getLowMassIons() const
returns a vector of formulas with the low mass markers of the residue
Residue()
Default constructor (needed by pyOpenMS)
std::vector< EmpiricalFormula > NTerm_loss_formulas_
Definition Residue.h:469
bool operator==(char one_letter_code) const
equality operator for one letter code
static const double internal_to_nterm_monoweight_
Definition Residue.h:497
static const double internal_to_zp2_monoweight_
Definition Residue.h:506
bool hasNeutralLoss() const
true if the residue has neutral loss
void setNTermLossNames(const std::vector< String > &name)
sets the N-terminal loss names
EmpiricalFormula getFormula(ResidueType res_type=Full) const
returns the empirical formula of the residue
const std::set< String > & getResidueSets() const
returns the residue sets this residue is contained in (e.g. Natural20)
double getPiValue() const
calculates the isoelectric point using the pk* values
const ResidueModification * getModification() const
returns a pointer to the modification, or a null pointer if none is set
void setPkc(double value)
sets the pkc of the residue
static String getResidueTypeName(const ResidueType res_type)
returns the ion name given as a residue type
static const double internal_to_full_monoweight_
Definition Residue.h:496
static const double internal_to_y_monoweight_
Definition Residue.h:503
Residue & operator=(const Residue &)=default
Assignment operator.
static const double internal_to_cterm_monoweight_
Definition Residue.h:498
void setSynonyms(const std::set< String > &synonyms)
sets the synonyms
const std::vector< EmpiricalFormula > & getLossFormulas() const
returns the neutral loss formulas
bool operator==(const Residue &residue) const
equality operator
void setLossNames(const std::vector< String > &name)
set the neutral loss molecule name
bool isModified() const
true if the residue is a modified one
void setModification(const String &name)
sets the modification by name; the mod should be present in ModificationsDB
void setPka(double value)
sets the pka of the residue
std::vector< EmpiricalFormula > loss_formulas_
Definition Residue.h:465
double getPkb() const
returns the pkb of the residue
ResidueType
Definition Residue.h:152
@ CTerminal
only C-terminus
Definition Residue.h:156
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition Residue.h:161
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition Residue.h:160
@ Zp2Ion
MS:1001230 C-alpha/carbonyl carbon bond (free radical, z+2 "ion" with additional abstracted hydrogen)...
Definition Residue.h:164
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond [CID fragment].
Definition Residue.h:162
@ BIonMinusH20
MS:1001222 b ion without water.
Definition Residue.h:166
@ NTerminal
only N-terminus
Definition Residue.h:155
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition Residue.h:168
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition Residue.h:157
@ Precursor
MS:1001523 Precursor ion.
Definition Residue.h:165
@ YIonMinusH20
MS:1001223 y ion without water.
Definition Residue.h:167
@ NonIdentified
MS:1001240 Non-identified ion.
Definition Residue.h:170
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition Residue.h:158
@ Zp1Ion
MS:1001230 C-alpha/carbonyl carbon bond (free radical, z+1 "ion") [main EAD fragment].
Definition Residue.h:163
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition Residue.h:159
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition Residue.h:169
@ Internal
internal residue, without any termini
Definition Residue.h:154
@ Unannotated
no stored annotation
Definition Residue.h:171
const std::set< String > & getSynonyms() const
returns the synonyms
void setBackboneBasicityRight(double gb_bb_r)
sets the C-terminal direction backbone basicity
void setName(const String &name)
sets the name of the residue
friend std::ostream & operator<<(std::ostream &os, const Residue &residue)
ostream iterator to write the residue to a stream
void setSideChainBasicity(double gb_sc)
sets the side chain basicity
Residue & operator=(Residue &&) &=default
Move assignment operator.
std::vector< EmpiricalFormula > low_mass_ions_
low mass markers like immonium ions
Definition Residue.h:472
void setModification(const ResidueModification &mod)
void setLowMassIons(const std::vector< EmpiricalFormula > &low_mass_ions)
sets the low mass marker ions as a vector of formulas
static const EmpiricalFormula & getInternalToZp1Ion()
Definition Residue.h:126
double getSideChainBasicity() const
returns the side chain basicity
static std::string residueTypeToIonLetter(const ResidueType &res_type)
helper for mapping residue types to letters for Text annotations and labels
Residue(Residue &&)=default
Move constructor.
const String & getThreeLetterCode() const
returns the name of the residue as three letter code (String of size 3)
const String & getModificationName() const
returns the name (ID) of the modification, or an empty string if none is set
static const double internal_to_c_monoweight_
Definition Residue.h:501
virtual ~Residue()
Destructor.
const std::vector< EmpiricalFormula > & getNTermLossFormulas() const
returns N-terminal loss formulas
void addNTermLossName(const String &name)
adds a N-terminal loss name
const String & getName() const
returns the name of the residue
static const EmpiricalFormula & getInternalToYIon()
Definition Residue.h:111
std::vector< String > NTerm_loss_names_
Definition Residue.h:467
void setResidueSets(const std::set< String > &residues_sets)
sets the residue sets the amino acid is contained in (e.g. Natural20)
void setOneLetterCode(const String &one_letter_code)
sets the name as one letter code (String of size 1)
static const EmpiricalFormula & getInternalToAIon()
Definition Residue.h:80
double getBackboneBasicityLeft() const
returns the backbone basicity if located in N-terminal direction
void setModification(const ResidueModification *mod)
sets the modification by existing ResMod (make sure it exists in ModificationsDB)
String three_letter_code_
Definition Residue.h:447
static const EmpiricalFormula & getInternalToZIon()
Definition Residue.h:118
void setModificationByDiffMonoMass(double diffMonoMass)
String one_letter_code_
Definition Residue.h:449
void setAverageWeight(double weight)
sets average weight of the residue (must be full, with N and C-terminus)
std::set< String > synonyms_
Definition Residue.h:445
static const EmpiricalFormula & getInternalToNTerm()
Definition Residue.h:68
double getPka() const
returns the pka of the residue
void setThreeLetterCode(const String &three_letter_code)
sets the name of the residue as three letter code (String of size 3)
static const EmpiricalFormula & getInternalToFull()
Definition Residue.h:62
static const EmpiricalFormula & getInternalToCIon()
Definition Residue.h:96
bool operator!=(char one_letter_code) const
equality operator for one letter code
void addNTermLossFormula(const EmpiricalFormula &)
adds N-terminal losses
void addLossFormula(const EmpiricalFormula &)
adds a neutral loss formula
double getAverageWeight(ResidueType res_type=Full) const
returns average weight of the residue
void addSynonym(const String &synonym)
adds a synonym
Residue(const Residue &)=default
Copy constructor.
EmpiricalFormula internal_formula_
Definition Residue.h:453
void addResidueSet(const String &residue_sets)
adds a residue set to the residue sets (e.g. Natural20)
void setNTermLossFormulas(const std::vector< EmpiricalFormula > &)
sets the N-terminal losses
std::set< String > residue_sets_
residue sets this amino acid is contained in
Definition Residue.h:493
EmpiricalFormula formula_
Definition Residue.h:451
void setBackboneBasicityLeft(double gb_bb_l)
sets the N-terminal direction backbone basicity
static const EmpiricalFormula & getInternalToCTerm()
Definition Residue.h:74
static const EmpiricalFormula & getInternalToBIon()
Definition Residue.h:88
Residue(const String &name, const String &three_letter_code, const String &one_letter_code, const EmpiricalFormula &formula, double pka=0, double pkb=0, double pkc=-1, double gb_sc=0, double gb_bb_l=0, double gb_bb_r=0, const std::set< String > &synonyms=std::set< String >())
double getPkc() const
returns the pkc of the residue if it exists otherwise -1
void setFormula(const EmpiricalFormula &formula)
set empirical formula of the residue (must be full, with N and C-terminus)
double getMonoWeight(ResidueType res_type=Full) const
returns monoisotopic weight of the residue
const std::vector< String > & getLossNames() const
gets neutral loss name (if there is one, else returns an empty string)
void setLossFormulas(const std::vector< EmpiricalFormula > &)
sets the neutral loss formulas
void setMonoWeight(double weight)
sets monoisotopic weight of the residue (must be full, with N and C-terminus)
bool operator!=(const Residue &residue) const
inequality operator
static const EmpiricalFormula & getInternalToXIon()
Definition Residue.h:103
A more convenient string class.
Definition String.h:34
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
std::size_t hash_int(T value) noexcept
Hash for an integer type.
Definition HashUtils.h:107
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t hash_float(T value) noexcept
Hash for a floating point type (float or double).
Definition HashUtils.h:142
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
STL namespace.
std::size_t operator()(const OpenMS::Residue &r) const noexcept
Definition Residue.h:521