OpenMS
EnzymaticDigestionLogModel.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2023.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow, Xiao Liang $
32 // $Authors: Marc Sturm, Chris Bielow, Xiao Liang $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 
41 #include <string>
42 #include <vector>
43 #include <map>
44 
45 namespace OpenMS
46 {
58  class OPENMS_DLLAPI EnzymaticDigestionLogModel
59  {
60 public:
63 
66 
69 
72 
74  void setEnzyme(const String& name);
75 
77  void digest(const AASequence& protein, std::vector<AASequence>& output) const;
78 
80  Size peptideCount(const AASequence& protein);
81 
83  double getLogThreshold() const;
84 
87  void setLogThreshold(double threshold);
88 
89 protected:
90  // define a binding site by position and AA
91  struct BindingSite_
92  {
95 
97  position(), AAname() {}
98 
99  BindingSite_(const Size& p, const String& name) :
100  position(p), AAname(name) {}
101 
102  bool operator<(const BindingSite_& rhs) const
103  {
104  return (position < rhs.position) || ((position == rhs.position) && (AAname < rhs.AAname));
105  }
106 
107  bool operator==(const BindingSite_& rhs) const
108  {
109  return position == rhs.position && AAname == rhs.AAname;
110  }
111 
112  };
113 
114  // define the log likelihood for missed and cleavage model
116  {
117  double p_cleave;
118  double p_miss;
119 
121  p_cleave(0), p_miss(0) {}
122  CleavageModel_(const double& p_c, const double& p_m) :
123  p_cleave(p_c), p_miss(p_m) {}
124  };
125 
127  void nextCleavageSite_(const AASequence& sequence, AASequence::ConstIterator& p) const;
128 
130  bool isCleavageSite_(const AASequence& sequence, const AASequence::ConstIterator& p) const;
131 
134 
138  std::map<BindingSite_, CleavageModel_> model_data_;
139  };
140 
141 } // namespace OpenMS
142 
ConstIterator for AASequence.
Definition: AASequence.h:122
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
Base class for digestion enzymes.
Definition: DigestionEnzyme.h:53
Class for the Log L model of enzymatic digestion of proteins.
Definition: EnzymaticDigestionLogModel.h:59
void setLogThreshold(double threshold)
double getLogThreshold() const
Returns the threshold which needs to be exceeded to call a cleavage (only for the trained cleavage mo...
EnzymaticDigestionLogModel()
Default constructor.
void digest(const AASequence &protein, std::vector< AASequence > &output) const
Performs the enzymatic digestion of a protein.
std::map< BindingSite_, CleavageModel_ > model_data_
Holds the cleavage model.
Definition: EnzymaticDigestionLogModel.h:138
EnzymaticDigestionLogModel(const EnzymaticDigestionLogModel &rhs)
Copy constructor.
void nextCleavageSite_(const AASequence &sequence, AASequence::ConstIterator &p) const
Moves the iterator p behind (i.e., C-term) the next cleavage site of the sequence.
bool isCleavageSite_(const AASequence &sequence, const AASequence::ConstIterator &p) const
Tests if position pointed to by p (N-term side) is a valid cleavage site.
double log_model_threshold_
Threshold to decide if position is cleaved or missed (only for the model)
Definition: EnzymaticDigestionLogModel.h:136
String getEnzymeName() const
Returns the enzyme for the digestion.
const DigestionEnzyme * enzyme_
Used enzyme.
Definition: EnzymaticDigestionLogModel.h:133
EnzymaticDigestionLogModel & operator=(const EnzymaticDigestionLogModel &rhs)
Assignment operator.
Size peptideCount(const AASequence &protein)
Returns the number of peptides a digestion of protein would yield under the current enzyme and missed...
void setEnzyme(const String &name)
Sets the enzyme for the digestion.
A more convenient string class.
Definition: String.h:60
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:48
Definition: EnzymaticDigestionLogModel.h:92
BindingSite_(const Size &p, const String &name)
Definition: EnzymaticDigestionLogModel.h:99
bool operator==(const BindingSite_ &rhs) const
Definition: EnzymaticDigestionLogModel.h:107
BindingSite_()
Definition: EnzymaticDigestionLogModel.h:96
String AAname
Definition: EnzymaticDigestionLogModel.h:94
bool operator<(const BindingSite_ &rhs) const
Definition: EnzymaticDigestionLogModel.h:102
Size position
Definition: EnzymaticDigestionLogModel.h:93
Definition: EnzymaticDigestionLogModel.h:116
double p_cleave
Definition: EnzymaticDigestionLogModel.h:117
CleavageModel_()
Definition: EnzymaticDigestionLogModel.h:120
double p_miss
Definition: EnzymaticDigestionLogModel.h:118
CleavageModel_(const double &p_c, const double &p_m)
Definition: EnzymaticDigestionLogModel.h:122