OpenMS
Loading...
Searching...
No Matches
NASequence.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Samuel Wein $
6// $Authors: Samuel Wein, Timo Sachsenberg, Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
16#include <functional>
17#include <iosfwd>
18#include <vector>
19
20namespace OpenMS
21{
35 class OPENMS_DLLAPI NASequence
36 {
41 public:
43 { //< NB: Not all fragments types are valid for all residue types, this class should probably get split
44 Full = 0,
64 SizeOfNASFragmentType
65 };
66
68
69 class Iterator;
70
76 class OPENMS_DLLAPI ConstIterator
77 {
78 public:
82 typedef const value_type* const_pointer;
83 typedef std::vector<const value_type*>::difference_type difference_type;
84 typedef const value_type* pointer;
85 typedef std::random_access_iterator_tag iterator_category;
86
91 ConstIterator() = default;
92
94 ConstIterator(const std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
95 {
96 vector_ = vec_ptr;
97 position_ = position;
98 }
99
101 ConstIterator(const ConstIterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
102 {
103 }
104
106 ConstIterator(const NASequence::Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
107 {
108 }
109
112 {
113 }
114
116
119 {
120 if (this != &rhs)
121 {
122 position_ = rhs.position_;
123 vector_ = rhs.vector_;
124 }
125 return *this;
126 }
127
133 {
134 return *(*vector_)[position_];
135 }
136
139 {
140 return (*vector_)[position_];
141 }
142
145 {
146 return ConstIterator(vector_, position_ + diff);
147 }
148
150 {
151 return position_ - rhs.position_;
152 }
153
156 {
157 return ConstIterator(vector_, position_ - diff);
158 }
159
161 bool operator==(const ConstIterator& rhs) const
162 {
163 return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
164 }
165
167 bool operator!=(const ConstIterator& rhs) const
168 {
169 return !(operator==(rhs));
170 }
171
174 {
175 ++position_;
176 return *this;
177 }
178
181 {
182 --position_;
183 return *this;
184 }
185
187
188 protected:
189 // pointer to the vector
190 const std::vector<const Ribonucleotide*>* vector_;
191
192 // position in the vector
194 };
195
196
202 class OPENMS_DLLAPI Iterator
203 {
204 public:
206
210 typedef const value_type* const_pointer;
211 typedef const value_type* pointer;
212 typedef std::vector<const value_type*>::difference_type difference_type;
213
217 Iterator() = default;
218
220 Iterator(std::vector<const Ribonucleotide*>* vec_ptr, difference_type position)
221 {
222 vector_ = vec_ptr;
223 position_ = position;
224 }
225
227 Iterator(const Iterator& rhs) : vector_(rhs.vector_), position_(rhs.position_)
228 {
229 }
230
232 virtual ~Iterator()
233 {
234 }
235
237
240 {
241 if (this != &rhs)
242 {
243 position_ = rhs.position_;
244 vector_ = rhs.vector_;
245 }
246 return *this;
247 }
248
254 {
255 return *(*vector_)[position_];
256 }
257
260 {
261 return (*vector_)[position_];
262 }
263
266 {
267 return (*vector_)[position_];
268 }
269
272 {
273 return Iterator(vector_, position_ + diff);
274 }
275
277 {
278 return position_ - rhs.position_;
279 }
280
283 {
284 return Iterator(vector_, position_ - diff);
285 }
286
288 bool operator==(const Iterator& rhs) const
289 {
290 return (std::tie(vector_, position_) == std::tie(rhs.vector_, rhs.position_));
291 }
292
294 bool operator!=(const Iterator& rhs) const
295 {
296 return !this->operator==(rhs);
297 }
298
301 {
302 ++position_;
303 return *this;
304 }
305
308 {
309 --position_;
310 return *this;
311 }
312
314
315 protected:
316 std::vector<const Ribonucleotide*>* vector_;
317
318 // position in the vector
320 };
321
322 public:
323 /*
324 * Default constructors and assignment operators.
325 */
326 NASequence() = default;
327 NASequence(const NASequence&) = default;
328 NASequence(NASequence&&) = default;
329 NASequence& operator=(const NASequence&) & = default;
331
333 NASequence(std::vector<const Ribonucleotide*> s, const RibonucleotideChainEnd* five_prime, const RibonucleotideChainEnd* three_prime);
334
335 virtual ~NASequence() = default;
336
337 bool operator==(const NASequence& rhs) const;
338 bool operator!=(const NASequence& rhs) const;
339 bool operator<(const NASequence& rhs) const;
340
342 void setSequence(const std::vector<const Ribonucleotide*>& seq);
343
344 const std::vector<const Ribonucleotide*>& getSequence() const
345 {
346 return seq_;
347 }
348
349 std::vector<const Ribonucleotide*>& getSequence()
350 {
351 return seq_;
352 }
353
355 void set(size_t index, const Ribonucleotide* r);
356
357 const Ribonucleotide* get(size_t index)
358 {
359 return seq_[index];
360 }
361
363 inline const Ribonucleotide*& operator[](size_t index)
364 {
365 return seq_[index];
366 }
367
368 inline const Ribonucleotide* const& operator[](size_t index) const
369 {
370 return seq_[index];
371 }
372
373 bool empty() const;
374 size_t size() const;
375 void clear();
376
378 bool hasFivePrimeMod() const;
381 bool hasThreePrimeMod() const;
384
387 {
388 return Iterator(&seq_, 0);
389 }
390
391 inline ConstIterator begin() const
392 {
393 return ConstIterator(&seq_, 0);
394 }
395
396 inline Iterator end()
397 {
398 return Iterator(&seq_, (Int)seq_.size());
399 }
400
401 inline ConstIterator end() const
402 {
403 return ConstIterator(&seq_, (Int)seq_.size());
404 }
405
406 inline ConstIterator cbegin() const
407 {
408 return ConstIterator(&seq_, 0);
409 }
410
411 inline ConstIterator cend() const
412 {
413 return ConstIterator(&seq_, (Int)seq_.size());
414 }
415
417
426 double getMonoWeight(NASFragmentType type = Full, Int charge = 0) const;
427
436 double getAverageWeight(NASFragmentType type = Full, Int charge = 0) const;
437
446 EmpiricalFormula getFormula(NASFragmentType type = Full, Int charge = 0) const;
447
455 NASequence getPrefix(Size length) const;
456
464 NASequence getSuffix(Size length) const;
465
474 NASequence getSubsequence(Size start = 0, Size length = Size(-1)) const;
475
483 static NASequence fromString(const String& s);
484
488 friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const NASequence& seq);
489
497 static NASequence fromString(const char* s);
498
499 std::string toString() const;
500
501 private:
502 // TODO: query RNA / DNA depending on type
503 static void parseString_(const String& s, NASequence& nas);
504
514 // TODO: query RNA / DNA depending on type
516
517 std::vector<const Ribonucleotide*> seq_;
518
519 const RibonucleotideChainEnd* five_prime_ = nullptr;
520 const RibonucleotideChainEnd* three_prime_ = nullptr;
521 };
522
523} // namespace OpenMS
524
525// Hash function specialization for NASequence
526// Placed in std namespace to allow use with std::unordered_map/set
527namespace std
528{
547 template<>
548 struct hash<OpenMS::NASequence>
549 {
550 std::size_t operator()(const OpenMS::NASequence& seq) const noexcept
551 {
552 std::size_t seed = 0;
553
554 // Hash each ribonucleotide in the sequence
555 for (const auto& ribo : seq)
556 {
557 // Hash the code (stable identifier)
558 const OpenMS::String& code = ribo.getCode();
560 }
561
562 // Hash 5' terminal modification if present
563 const OpenMS::RibonucleotideChainEnd* five_prime = seq.getFivePrimeMod();
564 if (five_prime != nullptr)
565 {
566 // Use a different seed offset for 5' to distinguish from 3'
567 std::size_t five_hash = OpenMS::fnv1a_hash_string(five_prime->getCode());
568 OpenMS::hash_combine(seed, five_hash ^ 0x355052494dULL); // "5PRIM" in hex-like
569 }
570
571 // Hash 3' terminal modification if present
572 const OpenMS::RibonucleotideChainEnd* three_prime = seq.getThreePrimeMod();
573 if (three_prime != nullptr)
574 {
575 // Use a different seed offset for 3'
576 std::size_t three_hash = OpenMS::fnv1a_hash_string(three_prime->getCode());
577 OpenMS::hash_combine(seed, three_hash ^ 0x335052494dULL); // "3PRIM" in hex-like
578 }
579
580 return seed;
581 }
582 };
583} // namespace std
Representation of an empirical formula.
Definition EmpiricalFormula.h:63
ConstIterator of NASequence class.
Definition NASequence.h:77
ConstIterator(const std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition NASequence.h:94
const_pointer operator->() const
dereference operator
Definition NASequence.h:138
const value_type * const_pointer
Definition NASequence.h:82
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition NASequence.h:155
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition NASequence.h:167
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition NASequence.h:144
ConstIterator()=default
default constructor
ConstIterator & operator--()
decrement operator
Definition NASequence.h:180
std::random_access_iterator_tag iterator_category
Definition NASequence.h:85
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition NASequence.h:161
Ribonucleotide value_type
Definition NASequence.h:79
std::vector< constvalue_type * >::difference_type difference_type
Definition NASequence.h:83
ConstIterator(const NASequence::Iterator &rhs)
copy constructor from Iterator
Definition NASequence.h:106
difference_type position_
Definition NASequence.h:193
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition NASequence.h:101
value_type & reference
Definition NASequence.h:81
const std::vector< const Ribonucleotide * > * vector_
Definition NASequence.h:190
const value_type * pointer
Definition NASequence.h:84
const_reference operator*() const
dereference operator
Definition NASequence.h:132
virtual ~ConstIterator()
destructor
Definition NASequence.h:111
ConstIterator & operator++()
increment operator
Definition NASequence.h:173
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition NASequence.h:118
difference_type operator-(ConstIterator rhs) const
Definition NASequence.h:149
const value_type & const_reference
Definition NASequence.h:80
Iterator of NASequence class.
Definition NASequence.h:203
const_pointer operator->() const
dereference operator
Definition NASequence.h:259
const value_type * const_pointer
Definition NASequence.h:210
pointer operator->()
mutable dereference operator
Definition NASequence.h:265
const Iterator operator+(difference_type diff) const
forward jump operator
Definition NASequence.h:271
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition NASequence.h:239
virtual ~Iterator()
destructor
Definition NASequence.h:232
Iterator(std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition NASequence.h:220
difference_type operator-(Iterator rhs) const
Definition NASequence.h:276
Ribonucleotide value_type
Definition NASequence.h:207
bool operator==(const Iterator &rhs) const
equality comparator
Definition NASequence.h:288
std::vector< constvalue_type * >::difference_type difference_type
Definition NASequence.h:212
difference_type position_
Definition NASequence.h:319
value_type & reference
Definition NASequence.h:209
Iterator(const Iterator &rhs)
copy constructor
Definition NASequence.h:227
const value_type * pointer
Definition NASequence.h:211
const_reference operator*() const
dereference operator
Definition NASequence.h:253
bool operator!=(const Iterator &rhs) const
inequality operator
Definition NASequence.h:294
Iterator & operator++()
increment operator
Definition NASequence.h:300
Iterator & operator--()
decrement operator
Definition NASequence.h:307
const Iterator operator-(difference_type diff) const
backward jump operator
Definition NASequence.h:282
std::vector< const Ribonucleotide * > * vector_
Definition NASequence.h:316
const value_type & const_reference
Definition NASequence.h:208
Representation of a nucleic acid sequence.
Definition NASequence.h:36
const Ribonucleotide * get(size_t index)
Definition NASequence.h:357
ConstIterator cend() const
Definition NASequence.h:411
virtual ~NASequence()=default
NASequence & operator=(NASequence &&) &=default
Move assignment operator.
bool operator<(const NASequence &rhs) const
less operator
bool hasThreePrimeMod() const
NASequence getSuffix(Size length) const
Return sequence suffix of the given length (not start index!)
void setSequence(const std::vector< const Ribonucleotide * > &seq)
getter / setter for sequence
std::string toString() const
Iterator begin()
iterators
Definition NASequence.h:386
size_t size() const
bool operator==(const NASequence &rhs) const
destructor
bool hasFivePrimeMod() const
5' and 3' modifications
double getMonoWeight(NASFragmentType type=Full, Int charge=0) const
utility functions
void setThreePrimeMod(const RibonucleotideChainEnd *r)
NASequence getPrefix(Size length) const
Return sequence prefix of the given length (not end index!)
std::vector< const Ribonucleotide * > & getSequence()
Definition NASequence.h:349
const Ribonucleotide *const & operator[](size_t index) const
Definition NASequence.h:368
static void parseString_(const String &s, NASequence &nas)
friend std::ostream & operator<<(std::ostream &os, const NASequence &seq)
NASequence()=default
const std::vector< const Ribonucleotide * > & getSequence() const
Definition NASequence.h:344
bool empty() const
ConstIterator end() const
Definition NASequence.h:401
bool operator!=(const NASequence &rhs) const
not quality
NASequence(std::vector< const Ribonucleotide * > s, const RibonucleotideChainEnd *five_prime, const RibonucleotideChainEnd *three_prime)
full constructor
void set(size_t index, const Ribonucleotide *r)
getter / setter for ribonucleotide elements (easily wrapped using pyOpenMS)
static NASequence fromString(const char *s)
create NASequence object by parsing a C string (character array)
const Ribonucleotide *& operator[](size_t index)
getter / setter for sequence elements (C++ container style)
Definition NASequence.h:363
EmpiricalFormula getFormula(NASFragmentType type=Full, Int charge=0) const
Get the formula for a NASequence.
NASFragmentType
an enum of all possible fragment ion types
Definition NASequence.h:43
@ AminusB
A ion with base loss, added for nucleic acid support.
Definition NASequence.h:62
@ YIon
MS:1001220 peptide bond up to the C-terminus.
Definition NASequence.h:52
@ XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition NASequence.h:51
@ ZIon
MS:1001230 C-alpha/carbonyl carbon bond.
Definition NASequence.h:53
@ WIon
W ion, added for nucleic acid support.
Definition NASequence.h:61
@ BIonMinusH20
MS:1001222 b ion without water.
Definition NASequence.h:55
@ BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition NASequence.h:57
@ AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition NASequence.h:48
@ Precursor
MS:1001523 Precursor ion.
Definition NASequence.h:54
@ YIonMinusH20
MS:1001223 y ion without water.
Definition NASequence.h:56
@ NonIdentified
MS:1001240 Non-identified ion.
Definition NASequence.h:59
@ BIon
MS:1001224 N-terminus up to the peptide bond.
Definition NASequence.h:49
@ ThreePrime
only 3' terminus
Definition NASequence.h:47
@ CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition NASequence.h:50
@ YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition NASequence.h:58
@ Internal
internal, without any termini
Definition NASequence.h:45
@ Unannotated
no stored annotation
Definition NASequence.h:60
@ FivePrime
only 5' terminus
Definition NASequence.h:46
@ DIon
D ion, added for nucleic acid support.
Definition NASequence.h:63
static NASequence fromString(const String &s)
create NASequence object by parsing an OpenMS string
NASequence & operator=(const NASequence &) &=default
Copy assignment operator.
static String::ConstIterator parseMod_(const String::ConstIterator str_it, const String &str, NASequence &nas)
Parses modifications in square brackets.
ConstIterator cbegin() const
Definition NASequence.h:406
double getAverageWeight(NASFragmentType type=Full, Int charge=0) const
Get the Average Weight of a NASequence. NB returns the uncharged mass + or - proton masses to match t...
Iterator end()
Definition NASequence.h:396
NASequence getSubsequence(Size start=0, Size length=Size(-1)) const
Return subsequence with given starting position and length.
std::vector< const Ribonucleotide * > seq_
Definition NASequence.h:517
void setFivePrimeMod(const RibonucleotideChainEnd *r)
ConstIterator begin() const
Definition NASequence.h:391
const RibonucleotideChainEnd * getThreePrimeMod() const
const RibonucleotideChainEnd * getFivePrimeMod() const
NASequence(NASequence &&)=default
Move constructor.
NASequence(const NASequence &)=default
default constructor
Representation of a ribonucleotide (modified or unmodified)
Definition Ribonucleotide.h:28
const String getCode() const
Return the short name.
A more convenient string class.
Definition String.h:34
const_iterator ConstIterator
Const Iterator.
Definition String.h:46
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
void hash_combine(std::size_t &seed, std::size_t value) noexcept
Combine a hash value with additional data using golden ratio mixing.
Definition HashUtils.h:87
std::size_t fnv1a_hash_string(const std::string &s) noexcept
FNV-1a hash for a string.
Definition HashUtils.h:70
STL namespace.
std::size_t operator()(const OpenMS::NASequence &seq) const noexcept
Definition NASequence.h:550