40 #include <boost/regex.hpp>
74 SIZE_OF_SPECIFICITY = 10
77 static const std::string NamesOfSpecificity[SIZE_OF_SPECIFICITY];
174 bool ignore_missed_cleavages,
175 bool allow_nterm_protein_cleavage,
176 bool allow_random_asp_pro_cleavage)
const;
192 std::vector<int>
tokenize_(
const String& sequence,
int start = 0,
int end = -1)
const;
Base class for digestion enzymes.
Definition: DigestionEnzyme.h:53
Class for the enzymatic digestion of sequences.
Definition: EnzymaticDigestion.h:63
bool isValidProduct_(const String &sequence, int pos, int length, bool ignore_missed_cleavages, bool allow_nterm_protein_cleavage, bool allow_random_asp_pro_cleavage) const
supports functionality for ProteaseDigestion as well (which is deeply weaved into the function) To av...
Specificity specificity_
specificity of enzyme
Definition: EnzymaticDigestion.h:224
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition: EnzymaticDigestion.h:67
Size digestAfterTokenize_(const std::vector< int > &fragment_positions, const StringView &sequence, std::vector< std::pair< Size, Size >> &output, Size min_length=0, Size max_length=-1) const
boost::regex re_
Regex for tokenizing (huge speedup by making this a member instead of stack object in tokenize_())
Definition: EnzymaticDigestion.h:221
Specificity getSpecificity() const
Returns the specificity for the digestion.
static Specificity getSpecificityByName(const String &name)
Size digestUnmodified(const StringView &sequence, std::vector< std::pair< Size, Size >> &output, Size min_length=1, Size max_length=0) const
Performs the enzymatic digestion of an unmodified sequence.
Size missed_cleavages_
Number of missed cleavages.
Definition: EnzymaticDigestion.h:216
Size countMissedCleavages_(const std::vector< int > &cleavage_positions, Size seq_start, Size seq_end) const
Counts the number of missed cleavages in a sequence fragment.
void setMissedCleavages(Size missed_cleavages)
Sets the number of missed cleavages for the digestion (default is 0). This setting is ignored when lo...
Size digestAfterTokenize_(const std::vector< int > &fragment_positions, const StringView &sequence, std::vector< StringView > &output, Size min_length=0, Size max_length=-1) const
Helper function for digestUnmodified()
virtual ~EnzymaticDigestion()
Destructor.
EnzymaticDigestion()
Default constructor.
bool isValidProduct(const String &sequence, int pos, int length, bool ignore_missed_cleavages=true) const
Is the peptide fragment starting at position pos with length length within the sequence sequence gene...
static const std::string UnspecificCleavage
Name for unspecific cleavage.
Definition: EnzymaticDigestion.h:83
bool filterByMissedCleavages(const String &sequence, const std::function< bool(const Int)> &filter) const
Filter based on the number of missed cleavages.
String getEnzymeName() const
Returns the enzyme for the digestion.
std::vector< int > tokenize_(const String &sequence, int start=0, int end=-1) const
Digests the sequence using the enzyme's regular expression.
const DigestionEnzyme * enzyme_
Used enzyme.
Definition: EnzymaticDigestion.h:219
static const std::string NoCleavage
Name for no cleavage.
Definition: EnzymaticDigestion.h:80
virtual void setEnzyme(const DigestionEnzyme *enzyme)
Sets the enzyme for the digestion.
void setSpecificity(Specificity spec)
Sets the specificity for the digestion (default is SPEC_FULL).
Size digestUnmodified(const StringView &sequence, std::vector< StringView > &output, Size min_length=1, Size max_length=0) const
Performs the enzymatic digestion of an unmodified sequence.
Size getMissedCleavages() const
Returns the number of missed cleavages for the digestion.
StringView provides a non-owning view on an existing string.
Definition: String.h:490
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47