OpenMS  2.7.0
ProteinResolver.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 namespace OpenMS
45 {
56  class OPENMS_DLLAPI ProteinResolver :
57  public DefaultParamHandler
58  {
59 
60 public:
61 
62  //default constructor
64 
65  //copy constructor
67 
68  //assignment operator
70 
71  //destructor
72  ~ProteinResolver() override;
73 
74 
75  struct ProteinEntry;
76  struct PeptideEntry;
77  struct ISDGroup;
78  struct MSDGroup;
79  struct ResolverResult;
80 
82  struct ProteinEntry
83  {
84  std::list<PeptideEntry *> peptides;
85  bool traversed;
87  enum type {primary, secondary, primary_indistinguishable, secondary_indistinguishable} protein_type;
88  double weight; //monoisotopic
89  float coverage; //in percent
90  //if Protein is indistinguishable all his fellows are in the list indis
91  std::list<ProteinEntry *> indis;
93  Size msd_group; //index
94  Size isd_group; //index
96  };
97 
99  struct PeptideEntry
100  {
101  std::list<ProteinEntry *> proteins;
102  bool traversed;
107  Size msd_group; //index
108  Size isd_group; //index
110  float intensity;
112  };
113 
115  struct MSDGroup
116  {
117  std::list<ProteinEntry *> proteins;
118  std::list<PeptideEntry *> peptides;
124  float intensity;
125  };
126 
127  struct ISDGroup
128  {
129  std::list<ProteinEntry *> proteins;
130  std::list<PeptideEntry *> peptides;
132  std::list<Size> msd_groups;
133  };
134 
136  {
138  std::vector<ISDGroup> * isds;
139  std::vector<MSDGroup> * msds;
140  std::vector<ProteinEntry> * protein_entries;
141  std::vector<PeptideEntry> * peptide_entries;
142  std::vector<Size> * reindexed_peptides;
143  std::vector<Size> * reindexed_proteins;
144  enum type {PeptideIdent, Consensus} input_type;
145  std::vector<PeptideIdentification> * peptide_identification;
147  };
148 
156  void resolveConsensus(ConsensusMap & consensus);
157 
165  void resolveID(std::vector<PeptideIdentification> & peptide_identifications);
166 
167  // /**
168  // @brief NOT IMPLEMENTED YET
169 
170  // @param protein_nodes
171  // @param peptide_nodes
172  // @param reindexed_proteins
173  // @param reindexed_peptides
174  // @param peptide_identifications
175  // @param output
176  // */
177  // void writeProteinsAndPeptidesmzTab(std::vector<ProteinEntry>& protein_nodes, std::vector<PeptideEntry>& peptide_nodes, std::vector<Size>& reindexed_proteins, std::vector<Size>& reindexed_peptides, std::vector<PeptideIdentification>& peptide_identifications, String& output );
178  // /**
179  // @brief Writing peptide table into text file
180 
181  // @param peptides
182  // @param reindexed_peptides
183  // @param identifications
184  // @param output_file
185  // */
186  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, std::vector<PeptideIdentification> & identifications, String & output_file); // not implemented
187  // /**
188  // @brief Writing peptide table into text file
189 
190  // @param peptides
191  // @param reindexed_peptides
192  // @param consensus
193  // @param output
194  // */
195  // void writePeptideTable(std::vector<PeptideEntry> & peptides, std::vector<Size> & reindexed_peptides, ConsensusMap & consensus, String & output_file); // not implemented
196  // /**
197  // @brief Writing protein table into text file
198 
199  // @param proteins
200  // @param reindexed_proteins
201  // @param output_file
202  // */
203  // void writeProteinTable(std::vector<ProteinEntry> & proteins, std::vector<Size> & reindexed_proteins, String & output_file); // not implemented
204  // /**
205  // @brief Writing protein groups into text file
206 
207  // @param isd_groups ISD groups
208  // @param msd_groups MSD groups
209  // @param output_file Path of output file
210  // */
211  // void writeProteinGroups(std::vector<ISDGroup> & isd_groups, std::vector<MSDGroup> & msd_groups, String & output_file); // not implemented
212 
219  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, ConsensusMap & consensus);
220 
227  void countTargetDecoy(std::vector<MSDGroup> & msd_groups, std::vector<PeptideIdentification> & peptide_nodes);
228 
229  void clearResult();
230 
231  void setProteinData(std::vector<FASTAFile::FASTAEntry> & protein_data);
232 
233  const std::vector<ResolverResult> & getResults();
234 
236  static const PeptideIdentification & getPeptideIdentification(const ConsensusMap & consensus, const PeptideEntry * peptide);
237  static const PeptideHit & getPeptideHit(const ConsensusMap & consensus, const PeptideEntry * peptide);
238  static const PeptideIdentification & getPeptideIdentification(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
239  static const PeptideHit & getPeptideHit(const std::vector<PeptideIdentification> & peptide_nodes, const PeptideEntry * peptide);
240 
241 private:
242 
243  std::vector<ResolverResult> resolver_result_;
244  std::vector<FASTAFile::FASTAEntry> protein_data_;
245 
246  void computeIntensityOfMSD_(std::vector<MSDGroup> & msd_groups);
247 
249  void traverseProtein_(ProteinEntry * prot_node, MSDGroup & group);
250  void traversePeptide_(PeptideEntry * pep_node, MSDGroup & group);
252  Size findPeptideEntry_(String seq, std::vector<PeptideEntry> & nodes);
254  Size binarySearchNodes_(String & seq, std::vector<PeptideEntry> & nodes, Size start, Size end);
256  Size includeMSMSPeptides_(std::vector<PeptideIdentification> & peptide_identifications, std::vector<PeptideEntry> & peptide_nodes);
259  Size includeMSMSPeptides_(ConsensusMap & consensus, std::vector<PeptideEntry> & peptide_nodes);
261  void reindexingNodes_(std::vector<MSDGroup> & msd_groups, std::vector<Size> & reindexed_proteins, std::vector<Size> & reindexed_peptides);
263  void primaryProteins_(std::vector<PeptideEntry> & peptide_nodes, std::vector<Size> & reindexed_peptides);
264  void buildingMSDGroups_(std::vector<MSDGroup> & msd_groups, std::vector<ISDGroup> & isd_groups);
265  void buildingISDGroups_(std::vector<ProteinEntry> & protein_nodes, std::vector<PeptideEntry> & peptide_nodes,
266  std::vector<ISDGroup> & isd_groups);
267  // disabled/buggy
268  //ProteinResolver::indistinguishableProteins(vector<MSDGroup>& msd_groups);
269 
270  }; // class
271 
272 } // namespace
273 
A container for consensus elements.
Definition: ConsensusMap.h:88
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Representation of a peptide hit.
Definition: PeptideHit.h:57
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
Helper class for peptide and protein quantification based on feature data annotated with IDs.
Definition: ProteinResolver.h:58
static const PeptideHit & getPeptideHit(const std::vector< PeptideIdentification > &peptide_nodes, const PeptideEntry *peptide)
Size peptide_hit
Definition: ProteinResolver.h:105
float intensity
Definition: ProteinResolver.h:110
void resolveID(std::vector< PeptideIdentification > &peptide_identifications)
Computing protein groups from peptide identifications OR consensus map.
String sequence
Definition: ProteinResolver.h:103
std::list< Size > msd_groups
Definition: ProteinResolver.h:132
std::vector< ResolverResult > resolver_result_
Definition: ProteinResolver.h:243
void buildingISDGroups_(std::vector< ProteinEntry > &protein_nodes, std::vector< PeptideEntry > &peptide_nodes, std::vector< ISDGroup > &isd_groups)
void countTargetDecoy(std::vector< MSDGroup > &msd_groups, std::vector< PeptideIdentification > &peptide_nodes)
brief
bool traversed
Definition: ProteinResolver.h:102
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:118
void resolveConsensus(ConsensusMap &consensus)
Computing protein groups from peptide identifications OR consensus map.
Size number_of_decoy
Definition: ProteinResolver.h:121
Size includeMSMSPeptides_(std::vector< PeptideIdentification > &peptide_identifications, std::vector< PeptideEntry > &peptide_nodes)
includes all MS/MS derived peptides into the graph –idXML
String origin
Definition: ProteinResolver.h:111
void traversePeptide_(PeptideEntry *pep_node, MSDGroup &group)
bool experimental
Definition: ProteinResolver.h:109
void primaryProteins_(std::vector< PeptideEntry > &peptide_nodes, std::vector< Size > &reindexed_peptides)
marks Proteins which have a unique peptide as primary. Uses reindexed vector, thus reindexingNodes ha...
Size number_of_target_plus_decoy
Definition: ProteinResolver.h:123
static const PeptideIdentification & getPeptideIdentification(const std::vector< PeptideIdentification > &peptide_nodes, const PeptideEntry *peptide)
const std::vector< ResolverResult > & getResults()
ISDGroup * isd_group
Definition: ProteinResolver.h:120
Size number_of_target
Definition: ProteinResolver.h:122
Size includeMSMSPeptides_(ConsensusMap &consensus, std::vector< PeptideEntry > &peptide_nodes)
void traverseProtein_(ProteinEntry *prot_node, MSDGroup &group)
traverse protein and peptide nodes for building MSD groups
void buildingMSDGroups_(std::vector< MSDGroup > &msd_groups, std::vector< ISDGroup > &isd_groups)
ProteinResolver(const ProteinResolver &rhs)
static const PeptideIdentification & getPeptideIdentification(const ConsensusMap &consensus, const PeptideEntry *peptide)
overloaded functions – return a const reference to a PeptideIdentification object or a peptideHit eit...
Size msd_group
Definition: ProteinResolver.h:107
Size findPeptideEntry_(String seq, std::vector< PeptideEntry > &nodes)
searches given sequence in all nodes and returns its index or nodes.size() if not found.
void countTargetDecoy(std::vector< MSDGroup > &msd_groups, ConsensusMap &consensus)
brief
std::list< ProteinEntry * > proteins
Definition: ProteinResolver.h:101
Size binarySearchNodes_(String &seq, std::vector< PeptideEntry > &nodes, Size start, Size end)
helper function for findPeptideEntry.
void setProteinData(std::vector< FASTAFile::FASTAEntry > &protein_data)
std::vector< FASTAFile::FASTAEntry > protein_data_
Definition: ProteinResolver.h:244
static const PeptideHit & getPeptideHit(const ConsensusMap &consensus, const PeptideEntry *peptide)
void reindexingNodes_(std::vector< MSDGroup > &msd_groups, std::vector< Size > &reindexed_proteins, std::vector< Size > &reindexed_peptides)
Proteins and Peptides get reindexed, based on whether they belong to msd groups or not....
Size peptide_identification
Definition: ProteinResolver.h:104
Size index
Definition: ProteinResolver.h:106
ProteinResolver & operator=(const ProteinResolver &rhs)
void computeIntensityOfMSD_(std::vector< MSDGroup > &msd_groups)
Size isd_group
Definition: ProteinResolver.h:108
Definition: ProteinResolver.h:128
representation of an msd group. Contains peptides, proteins and a pointer to its ISD group
Definition: ProteinResolver.h:116
represents a peptide. First in silico. If experimental is set to true it is MS/MS derived.
Definition: ProteinResolver.h:100
A more convenient string class.
Definition: String.h:61
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
FASTA entry type (identifier, description and sequence) The first String corresponds to the identifie...
Definition: FASTAFile.h:72
represents a protein from FASTA file
Definition: ProteinResolver.h:83
float coverage
Definition: ProteinResolver.h:89
std::list< ProteinEntry * > indis
Definition: ProteinResolver.h:91
bool traversed
Definition: ProteinResolver.h:85
std::list< PeptideEntry * > peptides
Definition: ProteinResolver.h:84
type
Definition: ProteinResolver.h:87
@ primary
Definition: ProteinResolver.h:87
double weight
Definition: ProteinResolver.h:88
Size number_of_experimental_peptides
Definition: ProteinResolver.h:95
Size msd_group
Definition: ProteinResolver.h:93
FASTAFile::FASTAEntry * fasta_entry
Definition: ProteinResolver.h:86
Size index
Definition: ProteinResolver.h:92
Size isd_group
Definition: ProteinResolver.h:94
Definition: ProteinResolver.h:136
std::vector< MSDGroup > * msds
Definition: ProteinResolver.h:139
std::vector< ISDGroup > * isds
Definition: ProteinResolver.h:138
std::vector< PeptideIdentification > * peptide_identification
Definition: ProteinResolver.h:145
type
Definition: ProteinResolver.h:144
std::vector< Size > * reindexed_proteins
Definition: ProteinResolver.h:143
std::vector< PeptideEntry > * peptide_entries
Definition: ProteinResolver.h:141
std::vector< Size > * reindexed_peptides
Definition: ProteinResolver.h:142
std::vector< ProteinEntry > * protein_entries
Definition: ProteinResolver.h:140
ConsensusMap * consensus_map
Definition: ProteinResolver.h:146
String identifier
Definition: ProteinResolver.h:137