OpenMS  2.5.0
ConsensusIDAlgorithmPEPMatrix.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Andreas Bertsch, Marc Sturm, Sven Nahnsen, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 
40 // Extend SeqAn by a user-define scoring matrix.
41 namespace seqan
42 {
43 
44  // We have to create a new specialization of the _ScoringMatrix class
45  // for amino acids. For this, we first create a new tag.
46  struct PAM30MS {}; // PAM30MS matrix
47  struct AdaptedIdentity {}; // identity matrix adapted for I/L, Q/K ambiguity
48 
49  // Then, we specialize the class _ScoringMatrix.
50  template <>
51  struct ScoringMatrixData_<int, AminoAcid, PAM30MS>
52  {
53  enum
54  {
55  VALUE_SIZE = ValueSize<AminoAcid>::VALUE,
56  TAB_SIZE = VALUE_SIZE * VALUE_SIZE
57  };
58  static inline const int* getData()
59  {
60  // Rant: I cannot find a primary source for the PAM30MS scoring matrix!
61  // It seems to have been first published in Huang et al., JBC 2001
62  // (http://www.jbc.org/content/276/30/28327), but the paper does not show
63  // the actual matrix (gah!).
64  // The matrix here comes from old OpenMS code and also matches this one:
65  // http://proteomics.fiocruz.br/supplementaryfiles/pepexplorer/BeforeRevision/PFUGridResults/PFUGridSearch/pam30ms.txt
66 
67  static const int _data[TAB_SIZE] =
68  {
69  // A R N D C Q E G H I L K M F P S T W Y V B Z X *
70  /* A */ 6, -7, -4, -3, -6, -4, -2, -2, -7, -5, -6, -7, -5, -8, -2, 0, -1,-13, -8, -2, -7, -6, 0,-17,
71  /* R */ -7, 8, -6,-10, -8, -2, -9, -9, -2, -5, -7, 0, -4, -9, -4, -3, -6, -2,-10, -8, 5, -1, 0,-17,
72  /* N */ -4, -6, 8, 2,-11, -3, -2, -3, 0, -5, -6, -1, -9, -9, -6, 0, -2, -8, -4, -8, -4, -2, 0,-17,
73  /* D */ -3,-10, 2, 8,-14, -2, 2, -3, -4, -7,-10, -4,-11,-15, -8, -4, -5,-15,-11, -8, -7, -3, 0,-17,
74  /* C */ -6, -8,-11,-14, 10,-14,-14, -9, -7, -6,-11,-14,-13,-13, -8, -3, -8,-15, -4, -6,-11,-14, 0,-17,
75  /* Q */ -4, -2, -3, -2,-14, 8, 1, -7, 1, -8, -7, -3, -4,-13, -3, -5, -5,-13,-12, -7, -3, 4, 0,-17,
76  /* E */ -2, -9, -2, 2,-14, 1, 8, -4, -5, -5, -7, -4, -7,-14, -5, -4, -6,-17, -8, -6, -7, -2, 0,-17,
77  /* G */ -2, -9, -3, -3, -9, -7, -4, 6, -9,-11,-11, -7, -8, -9, -6, -2, -6,-15,-14, -5, -8, -7, 0,-17,
78  /* H */ -7, -2, 0, -4, -7, 1, -5, -9, 9, -9, -8, -6,-10, -6, -4, -6, -7, -7, -3, -6, -4, -3, 0,-17,
79  /* I */ -5, -5, -5, -7, -6, -8, -5,-11, -9, 8, 5, -6, -1, -2, -8, -7, -2,-14, -6, 2, -6, -7, 0,-17,
80  /* L */ -6, -7, -6,-10,-11, -7, -7,-11, -8, 5, 5, -7, 0, -3, -8, -8, -5,-10, -7, 0, -7, -7, 0,-17,
81  /* K */ -7, 0, -1, -4,-14, -3, -4, -7, -6, -6, -7, 7, -2,-14, -6, -4, -3,-12, -9, -9, 5, 4, 0,-17,
82  /* M */ -5, -4, -9,-11,-13, -4, -7, -8,-10, -1, 0, -2, 11, -4, -8, -5, -4,-13,-11, -1, -3, -3, 0,-17,
83  /* F */ -8, -9, -9,-15,-13,-13,-14, -9, -6, -2, -3,-14, -4, 9,-10, -6, -9, -4, 2, -8,-12,-14, 0,-17,
84  /* P */ -2, -4, -6, -8, -8, -3, -5, -6, -4, -8, -8, -6, -8,-10, 8, -2, -4,-14,-13, -6, -5, -5, 0,-17,
85  /* S */ 0, -3, 0, -4, -3, -5, -4, -2, -6, -7, -8, -4, -5, -6, -2, 6, 0, -5, -7, -6, -4, -5, 0,-17,
86  /* T */ -1, -6, -2, -5, -8, -5, -6, -6, -7, -2, -5, -3, -4, -9, -4, 0, 7,-13, -6, -3, -5, -4, 0,-17,
87  /* W */ -13, -2, -8,-15,-15,-13,-17,-15, -7,-14,-10,-12,-13, -4,-14, -5,-13, 13, -5,-15, -7,-13, 0,-17,
88  /* Y */ -8,-10, -4,-11, -4,-12, -8,-14, -3, -6, -7, -9,-11, 2,-13, -7, -6, -5, 10, -7,-10,-11, 0,-17,
89  /* V */ -2, -8, -8, -8, -6, -7, -6, -5, -6, 2, 0, -9, -1, -8, -6, -6, -3,-15, -7, 7, -9, -8, 0,-17,
90  /* B */ -7, 5, -4, -7,-11, -3, -7, -8, -4, -6, -7, 5, -3,-12, -5, -4, -5, -7,-10, -9, 5, 1, 0,-17,
91  /* Z */ -6, -1, -2, -3,-14, 4, -2, -7, -3, -7, -7, 4, -3,-14, -5, -5, -4,-13,-11, -8, 1, 4, 0,-17,
92  /* X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-17,
93  /* * */ -17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17, 1
94  };
95 
96  return _data;
97  }
98  };
99 
100  template <>
101  struct ScoringMatrixData_<int, AminoAcid, AdaptedIdentity>
102  {
103  enum
104  {
105  VALUE_SIZE = ValueSize<AminoAcid>::VALUE,
106  TAB_SIZE = VALUE_SIZE * VALUE_SIZE
107  };
108  static inline const int* getData()
109  {
110  static const int _data[TAB_SIZE] =
111  {
112  // A R N D C Q E G H I L K M F P S T W Y V B Z X *
113  /* A */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
114  /* R */ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
115  /* N */ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
116  /* D */ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
117  /* C */ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
118  /* Q */ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
119  /* E */ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
120  /* G */ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
121  /* H */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
122  /* I */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
123  /* L */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
124  /* K */ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
125  /* M */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
126  /* F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
127  /* P */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -17,
128  /* S */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -17,
129  /* T */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, -17,
130  /* W */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, -17,
131  /* Y */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, -17,
132  /* V */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -17,
133  /* B */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, -17,
134  /* Z */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, -17,
135  /* X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -17,
136  /* * */ -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, 1
137  };
138 
139  return _data;
140  }
141  };
142 
143 } // namespace seqan
144 
145 
146 namespace OpenMS
147 {
157  class OPENMS_DLLAPI ConsensusIDAlgorithmPEPMatrix :
159  {
160  public:
163 
164  private:
166  typedef ::seqan::Score<int, ::seqan::ScoreMatrix< ::seqan::AminoAcid, ::seqan::Default> > SeqAnScore;
167 
169  typedef ::seqan::String< ::seqan::AminoAcid> SeqAnSequence;
170 
173 
175  ::seqan::Align<SeqAnSequence, ::seqan::ArrayGaps> alignment_;
176 
179 
182 
184  void updateMembers_() override;
185 
187  double getSimilarity_(AASequence seq1, AASequence seq2) override;
188 
189  };
190 
191 } // namespace OpenMS
192 
OpenMS::ConsensusIDAlgorithmPEPMatrix::SeqAnScore
::seqan::Score< int, ::seqan::ScoreMatrix< ::seqan::AminoAcid, ::seqan::Default > > SeqAnScore
SeqAn similarity scoring.
Definition: ConsensusIDAlgorithmPEPMatrix.h:166
int
ConsensusIDAlgorithmSimilarity.h
seqan::ScoringMatrixData_< int, AminoAcid, AdaptedIdentity >::getData
static const int * getData()
Definition: ConsensusIDAlgorithmPEPMatrix.h:108
SeqanIncludeWrapper.h
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:113
OpenMS::ConsensusIDAlgorithmPEPMatrix::scoring_method_
SeqAnScore scoring_method_
Similarity scoring method.
Definition: ConsensusIDAlgorithmPEPMatrix.h:172
OpenMS::ConsensusIDAlgorithmPEPMatrix::SeqAnSequence
::seqan::String< ::seqan::AminoAcid > SeqAnSequence
SeqAn amino acid sequence.
Definition: ConsensusIDAlgorithmPEPMatrix.h:169
seqan::ScoringMatrixData_< int, AminoAcid, PAM30MS >::getData
static const int * getData()
Definition: ConsensusIDAlgorithmPEPMatrix.h:58
seqan
Definition: AhoCorasickAmbiguous.h:51
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OpenMS::ConsensusIDAlgorithmSimilarity
Abstract base class for ConsensusID algorithms that take peptide similarity into account.
Definition: ConsensusIDAlgorithmSimilarity.h:54
OpenMS::ConsensusIDAlgorithmPEPMatrix::alignment_
::seqan::Align< SeqAnSequence, ::seqan::ArrayGaps > alignment_
Alignment data structure.
Definition: ConsensusIDAlgorithmPEPMatrix.h:175
OpenMS::ConsensusIDAlgorithmPEPMatrix
Calculates a consensus from multiple ID runs based on PEPs and sequence similarities.
Definition: ConsensusIDAlgorithmPEPMatrix.h:157
seqan::PAM30MS
Definition: ConsensusIDAlgorithmPEPMatrix.h:46
seqan::AdaptedIdentity
Definition: ConsensusIDAlgorithmPEPMatrix.h:47