OpenMS  2.7.0
EnzymaticDigestionLogModel.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow, Xiao Liang $
32 // $Authors: Marc Sturm, Chris Bielow, Xiao Liang $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 
41 #include <string>
42 #include <vector>
43 
44 namespace OpenMS
45 {
57  class OPENMS_DLLAPI EnzymaticDigestionLogModel
58  {
59 public:
62 
65 
68 
71 
73  void setEnzyme(const String name);
74 
76  void digest(const AASequence& protein, std::vector<AASequence>& output) const;
77 
79  Size peptideCount(const AASequence& protein);
80 
82  double getLogThreshold() const;
83 
86  void setLogThreshold(double threshold);
87 
88 protected:
89  // define a binding site by position and AA
90  struct BindingSite_
91  {
94 
96  position(), AAname() {}
97 
98  BindingSite_(const Size& p, const String& name) :
99  position(p), AAname(name) {}
100 
101  bool operator<(const BindingSite_& rhs) const
102  {
103  return (position < rhs.position) || ((position == rhs.position) && (AAname < rhs.AAname));
104  }
105 
106  bool operator==(const BindingSite_& rhs) const
107  {
108  return position == rhs.position && AAname == rhs.AAname;
109  }
110 
111  };
112 
113  // define the log likelihood for missed and cleavage model
115  {
116  double p_cleave;
117  double p_miss;
118 
120  p_cleave(0), p_miss(0) {}
121  CleavageModel_(const double& p_c, const double& p_m) :
122  p_cleave(p_c), p_miss(p_m) {}
123  };
124 
126  void nextCleavageSite_(const AASequence& sequence, AASequence::ConstIterator& p) const;
127 
129  bool isCleavageSite_(const AASequence& sequence, const AASequence::ConstIterator& p) const;
130 
133 
138  };
139 
140 } // namespace OpenMS
141 
ConstIterator for AASequence.
Definition: AASequence.h:122
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
Base class for digestion enzymes.
Definition: DigestionEnzyme.h:53
Class for the Log L model of enzymatic digestion of proteins.
Definition: EnzymaticDigestionLogModel.h:58
void setLogThreshold(double threshold)
double getLogThreshold() const
Returns the threshold which needs to be exceeded to call a cleavage (only for the trained cleavage mo...
EnzymaticDigestionLogModel()
Default constructor.
void digest(const AASequence &protein, std::vector< AASequence > &output) const
Performs the enzymatic digestion of a protein.
EnzymaticDigestionLogModel(const EnzymaticDigestionLogModel &rhs)
Copy constructor.
void nextCleavageSite_(const AASequence &sequence, AASequence::ConstIterator &p) const
Moves the iterator p behind (i.e., C-term) the next cleavage site of the sequence.
bool isCleavageSite_(const AASequence &sequence, const AASequence::ConstIterator &p) const
Tests if position pointed to by p (N-term side) is a valid cleavage site.
double log_model_threshold_
Threshold to decide if position is cleaved or missed (only for the model)
Definition: EnzymaticDigestionLogModel.h:135
String getEnzymeName() const
Returns the enzyme for the digestion.
void setEnzyme(const String name)
Sets the enzyme for the digestion.
const DigestionEnzyme * enzyme_
Used enzyme.
Definition: EnzymaticDigestionLogModel.h:132
EnzymaticDigestionLogModel & operator=(const EnzymaticDigestionLogModel &rhs)
Assignment operator.
Map< BindingSite_, CleavageModel_ > model_data_
Holds the cleavage model.
Definition: EnzymaticDigestionLogModel.h:137
Size peptideCount(const AASequence &protein)
Returns the number of peptides a digestion of protein would yield under the current enzyme and missed...
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:52
A more convenient string class.
Definition: String.h:61
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:563
Definition: EnzymaticDigestionLogModel.h:91
BindingSite_(const Size &p, const String &name)
Definition: EnzymaticDigestionLogModel.h:98
bool operator==(const BindingSite_ &rhs) const
Definition: EnzymaticDigestionLogModel.h:106
BindingSite_()
Definition: EnzymaticDigestionLogModel.h:95
String AAname
Definition: EnzymaticDigestionLogModel.h:93
bool operator<(const BindingSite_ &rhs) const
Definition: EnzymaticDigestionLogModel.h:101
Size position
Definition: EnzymaticDigestionLogModel.h:92
Definition: EnzymaticDigestionLogModel.h:115
double p_cleave
Definition: EnzymaticDigestionLogModel.h:116
CleavageModel_()
Definition: EnzymaticDigestionLogModel.h:119
double p_miss
Definition: EnzymaticDigestionLogModel.h:117
CleavageModel_(const double &p_c, const double &p_m)
Definition: EnzymaticDigestionLogModel.h:121