OpenMS
Loading...
Searching...
No Matches
EnzymaticDigestion.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Chris Bielow, Xiao Liang $
6// $Authors: Marc Sturm, Chris Bielow, Jeremi Maciejewski $
7// --------------------------------------------------------------------------
8
9#pragma once
10
13#include <boost/regex_fwd.hpp> // forward declaration of boost::regex
14#include <functional> // for std::function
15#include <memory> // unique_ptr
16#include <string>
17#include <vector>
18
19namespace OpenMS
20{
21 class StringView;
22
37 class OPENMS_DLLAPI EnzymaticDigestion
38 {
39 public:
42 { // note: the value of the first three items is important, since some engines just report the number of required termini (0, 1, 2)
43 SPEC_NONE = 0,
44 SPEC_SEMI = 1,
45 SPEC_FULL = 2,
46 SPEC_UNKNOWN = 3,
47 SPEC_NOCTERM = 8,
48 SPEC_NONTERM = 9,
49 SIZE_OF_SPECIFICITY = 10
50 };
52 static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY];
53
55 static const std::string NoCleavage;
56
58 static const std::string UnspecificCleavage;
59
62
65
68
71
74
76 void setMissedCleavages(Size missed_cleavages);
77
80
82 virtual void setEnzyme(const DigestionEnzyme* enzyme);
83
86
89
93
105 Size digestUnmodified(const StringView& sequence, std::vector<StringView>& output, Size min_length = 1, Size max_length = 0) const;
106
120 Size digestUnmodified(const StringView& sequence, std::vector<std::pair<Size, Size>>& output, Size min_length = 1, Size max_length = 0) const;
121
133 bool isValidProduct(const String& protein, int pep_pos, int pep_length, bool ignore_missed_cleavages = true) const;
134
140 Size countInternalCleavageSites(const String& sequence) const;
141
149 bool filterByMissedCleavages(const String& sequence, const std::function<bool(const Int)>& filter) const;
150
151
152 protected:
159 bool isValidProduct_(const String& sequence,
160 int pos,
161 int length,
162 bool ignore_missed_cleavages,
163 bool allow_nterm_protein_cleavage,
164 bool allow_random_asp_pro_cleavage) const;
180 std::vector<int> tokenize_(const String& sequence, int start = 0, int end = -1) const;
181
199 Size semiSpecificDigestion_(const std::vector<int>& cleavage_positions, std::vector<std::pair<Size, Size>>& output, Size min_length = 0, Size max_length = -1) const;
200
209 Size digestAfterTokenize_(const std::vector<int>& fragment_positions, const StringView& sequence, std::vector<StringView>& output, Size min_length = 0, Size max_length = -1) const;
210 Size digestAfterTokenize_(const std::vector<int>& fragment_positions, const StringView& sequence, std::vector<std::pair<Size, Size>>& output, Size min_length = 0, Size max_length = -1) const;
211
220 Size countMissedCleavages_(const std::vector<int>& cleavage_positions, Size seq_start, Size seq_end) const;
221
224
228 std::unique_ptr<boost::regex> re_; // use PImpl, since #include cost is huge
229
232 };
233
234} // namespace OpenMS
Base class for digestion enzymes.
Definition DigestionEnzyme.h:29
Class for the enzymatic digestion of sequences.
Definition EnzymaticDigestion.h:38
bool isValidProduct(const String &protein, int pep_pos, int pep_length, bool ignore_missed_cleavages=true) const
Is the peptide fragment starting at position pep_pos with length pep_length within the sequence prote...
bool isValidProduct_(const String &sequence, int pos, int length, bool ignore_missed_cleavages, bool allow_nterm_protein_cleavage, bool allow_random_asp_pro_cleavage) const
supports functionality for ProteaseDigestion as well (which is deeply weaved into the function) To av...
Specificity specificity_
specificity of enzyme
Definition EnzymaticDigestion.h:231
Size digestAfterTokenize_(const std::vector< int > &fragment_positions, const StringView &sequence, std::vector< std::pair< Size, Size > > &output, Size min_length=0, Size max_length=-1) const
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Specificity getSpecificity() const
Returns the specificity for the digestion.
std::vector< int > tokenize_(const String &sequence, int start=0, int end=-1) const
Digests the sequence using the enzyme's regular expression.
static Specificity getSpecificityByName(const String &name)
Size semiSpecificDigestion_(const std::vector< int > &cleavage_positions, std::vector< std::pair< Size, Size > > &output, Size min_length=0, Size max_length=-1) const
Generates semi-specific digestion products.
Size digestUnmodified(const StringView &sequence, std::vector< std::pair< Size, Size > > &output, Size min_length=1, Size max_length=0) const
Performs the enzymatic digestion of an unmodified sequence.
Size missed_cleavages_
Number of missed cleavages.
Definition EnzymaticDigestion.h:223
Size countInternalCleavageSites(const String &sequence) const
Counts the number of internal cleavage sites (missed cleavages) in a protein sequence.
Size countMissedCleavages_(const std::vector< int > &cleavage_positions, Size seq_start, Size seq_end) const
Counts the number of missed cleavages in a sequence fragment.
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
Size digestAfterTokenize_(const std::vector< int > &fragment_positions, const StringView &sequence, std::vector< StringView > &output, Size min_length=0, Size max_length=-1) const
Helper function for digestUnmodified()
virtual ~EnzymaticDigestion()
Destructor.
EnzymaticDigestion()
Default constructor.
static const std::string UnspecificCleavage
Name for unspecific cleavage.
Definition EnzymaticDigestion.h:58
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
String getEnzymeName() const
Returns the enzyme for the digestion.
std::unique_ptr< boost::regex > re_
Regex for tokenizing (huge speedup by making this a member instead of stack object in tokenize_())
Definition EnzymaticDigestion.h:228
const DigestionEnzyme * enzyme_
Used enzyme.
Definition EnzymaticDigestion.h:226
static const std::string NoCleavage
Name for no cleavage.
Definition EnzymaticDigestion.h:55
virtual void setEnzyme(const DigestionEnzyme *enzyme)
Sets the enzyme for the digestion.
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
EnzymaticDigestion(const EnzymaticDigestion &rhs)
Copy constructor.
Size digestUnmodified(const StringView &sequence, std::vector< StringView > &output, Size min_length=1, Size max_length=0) const
Performs the enzymatic digestion of an unmodified sequence.
Size getMissedCleavages() const
Returns the number of missed cleavages for the digestion.
EnzymaticDigestion & operator=(const EnzymaticDigestion &rhs)
Assignment operator.
StringView provides a non-owning view on an existing string.
Definition StringView.h:30
A more convenient string class.
Definition String.h:34
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19