OpenMS  2.7.0
XFDRAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Eugen Netz $
32 // $Authors: Lukas Zimmermann, Eugen Netz $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
44 
45 namespace OpenMS
46 {
47 
48  //-------------------------------------------------------------
49  // Doxygen docu
50  //-------------------------------------------------------------
51 
65  class OPENMS_DLLAPI XFDRAlgorithm :
66  public DefaultParamHandler, public ProgressLogger
67  {
68 
69  public:
70 
72  enum ExitCodes
73  {
76  UNEXPECTED_RESULT
77  };
78 
81 
83  ~XFDRAlgorithm() override;
84 
91  ExitCodes run(std::vector<PeptideIdentification>& peptide_ids, ProteinIdentification& protein_id);
92 
98 
99 private:
100  void updateMembers_() override;
101 
111  void initDataStructures_(std::vector<PeptideIdentification>& peptide_ids, ProteinIdentification& protein_id);
112 
118  static void assignTypes_(PeptideHit& ph, StringList& types);
119 
125  void fdr_xprophet_(std::map< String, Math::Histogram<> >& cum_histograms,
126  const String& targetclass, const String& decoyclass, const String& fulldecoyclass,
127  std::vector< double >& fdr, bool mono);
128 
134  static void calc_qfdr_(const std::vector< double >& fdr, std::vector< double >& qfdr);
135 
136  void findTopUniqueHits_(std::vector<PeptideIdentification>& peptide_ids);
137 
138  void writeArgumentsLog_() const;
139 
140  String getId_(const PeptideHit& ph) const;
141 
143  {
144  Size alpha_ions = Size(ph.getMetaValue("matched_linear_alpha")) + Size(ph.getMetaValue("matched_xlink_alpha"));
145  Size beta_ions = Size(ph.getMetaValue("matched_linear_beta")) + Size(ph.getMetaValue("matched_xlink_beta"));
146  return std::min(alpha_ions, beta_ions);
147  }
148 
149  inline static void setIntraProtein_(PeptideHit& ph, const bool value)
150  {
151  ph.setMetaValue("XFDR:is_intraprotein", DataValue(value ? "true" : "false"));
152  }
153 
154  inline static void setInterProtein_(PeptideHit& ph, const bool value)
155  {
156  ph.setMetaValue("XFDR:is_interprotein", DataValue(value ? "true" : "false"));
157  }
158 
162  static bool isSameProtein_(
163  String prot1,
164  String prot2,
165  const String &decoy_string)
166  {
167  prot1.substitute(decoy_string, "");
168  prot2.substitute(decoy_string, "");
169  assert( ! prot1.hasSubstring(decoy_string));
170  assert( ! prot2.hasSubstring(decoy_string));
171  return prot1 == prot2;
172  }
173 
174  // Score range for this of the tool
177 
178  // unique top hits
179  std::vector<String> unique_ids_;
180  std::vector<double> unique_id_scores_;
181 
182  // maps index of peptide id all_pep_ids_ to vector of cross link class
183  std::map<String, std::vector<String>> cross_link_classes_;
184 
185  // Program arguments
194  double arg_binsize_;
195 
196  // Names of the class parameters
198  static const String param_minborder_;
199  static const String param_maxborder_;
200  static const String param_mindeltas_;
202  static const String param_uniquexl_;
203  static const String param_no_qvalues_;
204  static const String param_minscore_;
205  static const String param_binsize_;
206 
207  // Constants related to particular crosslink classes
220  };
221 }
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Representation of a histogram.
Definition: Histogram.h:64
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
Representation of a peptide hit.
Definition: PeptideHit.h:57
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:61
bool hasSubstring(const String &string) const
true if String contains the string, false otherwise
String & substitute(char from, char to)
Replaces all occurrences of the character from by the character to.
Calculates false discovery rate estimates on crosslink identifications.
Definition: XFDRAlgorithm.h:67
static const String crosslink_class_intralinks_
Definition: XFDRAlgorithm.h:213
ExitCodes validateClassArguments() const
Checks whether the parameters of the object are valid.
bool arg_no_qvalues_
Definition: XFDRAlgorithm.h:193
static const String param_binsize_
Definition: XFDRAlgorithm.h:205
static const String crosslink_class_intradecoys_
Definition: XFDRAlgorithm.h:208
static const String crosslink_class_decoys_
Definition: XFDRAlgorithm.h:216
ExitCodes run(std::vector< PeptideIdentification > &peptide_ids, ProteinIdentification &protein_id)
Performs the main function of this class, the FDR estimation for cross-linked peptide experiments.
std::map< String, std::vector< String > > cross_link_classes_
Definition: XFDRAlgorithm.h:183
static void setIntraProtein_(PeptideHit &ph, const bool value)
Definition: XFDRAlgorithm.h:149
void fdr_xprophet_(std::map< String, Math::Histogram<> > &cum_histograms, const String &targetclass, const String &decoyclass, const String &fulldecoyclass, std::vector< double > &fdr, bool mono)
xprophet method for target hits counting as implemented in xProphet
static const String crosslink_class_interdecoys_
Definition: XFDRAlgorithm.h:210
static void calc_qfdr_(const std::vector< double > &fdr, std::vector< double > &qfdr)
Calculates the qFDR values for the provided FDR values, assuming that the FDRs are sorted by score in...
static const String param_no_qvalues_
Definition: XFDRAlgorithm.h:203
String decoy_string_
Definition: XFDRAlgorithm.h:186
static const String crosslink_class_targets_
Definition: XFDRAlgorithm.h:217
bool arg_uniquex_
Definition: XFDRAlgorithm.h:192
double arg_minscore_
Definition: XFDRAlgorithm.h:191
XFDRAlgorithm()
Default constructor.
static bool isSameProtein_(String prot1, String prot2, const String &decoy_string)
Determines whether the Peptide Evidences belong to the same protein, modulo decoy.
Definition: XFDRAlgorithm.h:162
double arg_mindeltas_
Definition: XFDRAlgorithm.h:187
static const String crosslink_class_fulldecoysinterlinks_
Definition: XFDRAlgorithm.h:211
static const String crosslink_class_hybriddecoysinterlinks_
Definition: XFDRAlgorithm.h:219
static const String param_mindeltas_
Definition: XFDRAlgorithm.h:200
String getId_(const PeptideHit &ph) const
void initDataStructures_(std::vector< PeptideIdentification > &peptide_ids, ProteinIdentification &protein_id)
Prepares vector of PeptideIdentification such that it can be processed downstream....
Int min_score_
Definition: XFDRAlgorithm.h:175
void findTopUniqueHits_(std::vector< PeptideIdentification > &peptide_ids)
static const String crosslink_class_monolinks_
Definition: XFDRAlgorithm.h:215
static const String crosslink_class_hybriddecoysintralinks_
Definition: XFDRAlgorithm.h:218
std::vector< double > unique_id_scores_
Definition: XFDRAlgorithm.h:180
void writeArgumentsLog_() const
std::vector< String > unique_ids_
Definition: XFDRAlgorithm.h:179
static const String param_decoy_string_
Definition: XFDRAlgorithm.h:197
static Size getMinIonsMatched_(const PeptideHit &ph)
Definition: XFDRAlgorithm.h:142
static const String param_maxborder_
Definition: XFDRAlgorithm.h:199
static const String param_uniquexl_
Definition: XFDRAlgorithm.h:202
Int max_score_
Definition: XFDRAlgorithm.h:176
static void setInterProtein_(PeptideHit &ph, const bool value)
Definition: XFDRAlgorithm.h:154
static const String param_minborder_
Definition: XFDRAlgorithm.h:198
static const String param_minscore_
Definition: XFDRAlgorithm.h:204
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
static const String crosslink_class_fulldecoysintralinks_
Definition: XFDRAlgorithm.h:209
ExitCodes
Exit codes.
Definition: XFDRAlgorithm.h:73
@ ILLEGAL_PARAMETERS
Definition: XFDRAlgorithm.h:75
@ EXECUTION_OK
Definition: XFDRAlgorithm.h:74
static const String crosslink_class_monodecoys_
Definition: XFDRAlgorithm.h:212
double arg_minborder_
Definition: XFDRAlgorithm.h:188
double arg_maxborder_
Definition: XFDRAlgorithm.h:189
Int arg_minionsmatched_
Definition: XFDRAlgorithm.h:190
static const String crosslink_class_interlinks_
Definition: XFDRAlgorithm.h:214
static void assignTypes_(PeptideHit &ph, StringList &types)
Inspects PeptideIdentification pep_id and assigns all cross-link types that this identification belon...
double arg_binsize_
Definition: XFDRAlgorithm.h:194
~XFDRAlgorithm() override
Default destructor.
static const String param_minionsmatched_
Definition: XFDRAlgorithm.h:201
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47