OpenMS  2.7.0
FalseDiscoveryRate.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow $
32 // $Authors: Andreas Bertsch, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
42 
43 #include <boost/unordered_map.hpp>
44 
45 #include <vector>
46 #include <unordered_set>
47 
48 namespace OpenMS
49 {
50 
51  struct ScoreToTgtDecLabelPairs;
52 
77  class OPENMS_DLLAPI FalseDiscoveryRate :
78  public DefaultParamHandler
79  {
80 public:
83 
90  void apply(std::vector<PeptideIdentification>& fwd_ids, std::vector<PeptideIdentification>& rev_ids) const;
91 
97  void apply(std::vector<PeptideIdentification>& id) const;
98 
105  void apply(std::vector<ProteinIdentification>& fwd_ids, std::vector<ProteinIdentification>& rev_ids) const;
106 
112  void apply(std::vector<ProteinIdentification>& ids) const;
113 
119  void applyEstimated(std::vector<ProteinIdentification>& ids) const;
120 
130  double applyEvaluateProteinIDs(const std::vector<ProteinIdentification>& ids, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2);
131  double applyEvaluateProteinIDs(const ProteinIdentification& ids, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2);
132  double applyEvaluateProteinIDs(ScoreToTgtDecLabelPairs& score_to_tgt_dec_fraction_pairs, double pepCutoff = 1.0, UInt fpCutoff = 50, double diffWeight = 0.2);
133 
135  void applyBasic(std::vector<PeptideIdentification> & ids);
137  void applyBasic(ConsensusMap & cmap, bool use_unassigned_peptides = true);
139  void applyBasic(ProteinIdentification & id, bool groups_too = true);
140 
143  double rocN(const std::vector<PeptideIdentification>& ids, Size fp_cutoff) const;
144 
147  double rocN(const std::vector<PeptideIdentification>& ids, Size fp_cutoff, const String& identifier) const;
148 
151  double rocN(const ConsensusMap& ids, Size fp_cutoff) const;
152 
155  double rocN(const ConsensusMap& ids, Size fp_cutoff, const String& identifier) const;
156 
157  //TODO the next two methods could potentially be merged for speed (they iterate over the same structure)
158  //But since they have different cutoff types and it is more generic, I leave it like this.
160  double diffEstimatedEmpirical(const ScoreToTgtDecLabelPairs& scores_labels, double pepCutoff = 1.0) const;
161 
164  double rocN(const ScoreToTgtDecLabelPairs& scores_labels, Size fpCutoff = 50) const;
165 
175 
176 
177 private:
178 
181 
184 
186  void calculateFDRs_(std::map<double, double>& score_to_fdr, std::vector<double>& target_scores, std::vector<double>& decoy_scores, bool q_value, bool higher_score_better) const;
187 
192  std::vector<double>& target_scores,
193  std::vector<double>& decoy_scores,
194  std::map<IdentificationData::IdentifiedMoleculeRef, bool>& molecule_to_decoy,
195  std::map<IdentificationData::QueryMatchRef, double>& match_to_score) const;
196 
199  void calculateEstimatedQVal_(std::map<double, double> &scores_to_FDR,
200  ScoreToTgtDecLabelPairs &scores_labels,
201  bool higher_score_better) const;
202 
208  void calculateFDRBasic_(std::map<double,double>& scores_to_FDR, ScoreToTgtDecLabelPairs& scores_labels, bool qvalue, bool higher_score_better) const;
209 
212  double trapezoidal_area_xEqy(double exp1, double exp2, double act1, double act2) const;
213 
215  double trapezoidal_area(double x1, double x2, double y1, double y2) const;
216 
217  };
218 
219 } // namespace OpenMS
220 
A container for consensus elements.
Definition: ConsensusMap.h:88
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Calculates false discovery rates (FDR) from identifications.
Definition: FalseDiscoveryRate.h:79
void applyBasic(ConsensusMap &cmap, bool use_unassigned_peptides=true)
simpler reimplementation of the apply function above for peptides in ConsensusMaps.
void apply(std::vector< PeptideIdentification > &id) const
Calculates the FDR of one run from a concatenated sequence DB search.
FalseDiscoveryRate & operator=(const FalseDiscoveryRate &)
Not implemented.
void apply(std::vector< PeptideIdentification > &fwd_ids, std::vector< PeptideIdentification > &rev_ids) const
Calculates the FDR of two runs, a forward run and a decoy run on peptide level.
double applyEvaluateProteinIDs(const std::vector< ProteinIdentification > &ids, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2)
Calculate a linear combination of the area of the difference in estimated vs. empirical (TD) FDR and ...
IdentificationData::ScoreTypeRef applyToQueryMatches(IdentificationData &id_data, IdentificationData::ScoreTypeRef score_ref) const
Calculate FDR on the level of molecule-query matches (e.g. peptide-spectrum matches) for "general" id...
double rocN(const std::vector< PeptideIdentification > &ids, Size fp_cutoff) const
double rocN(const ConsensusMap &ids, Size fp_cutoff, const String &identifier) const
double rocN(const ConsensusMap &ids, Size fp_cutoff) const
void applyBasic(std::vector< PeptideIdentification > &ids)
simpler reimplementation of the apply function above.
void apply(std::vector< ProteinIdentification > &fwd_ids, std::vector< ProteinIdentification > &rev_ids) const
Calculates the FDR of two runs, a forward run and decoy run on protein level.
FalseDiscoveryRate()
Default constructor.
double applyEvaluateProteinIDs(const ProteinIdentification &ids, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2)
double diffEstimatedEmpirical(const ScoreToTgtDecLabelPairs &scores_labels, double pepCutoff=1.0) const
calculates the area of the difference between estimated and empirical FDR on the fly....
void applyEstimated(std::vector< ProteinIdentification > &ids) const
Calculate the FDR based on PEPs or PPs (if present) and modifies the IDs inplace.
void calculateFDRBasic_(std::map< double, double > &scores_to_FDR, ScoreToTgtDecLabelPairs &scores_labels, bool qvalue, bool higher_score_better) const
FalseDiscoveryRate(const FalseDiscoveryRate &)
Not implemented.
void apply(std::vector< ProteinIdentification > &ids) const
Calculate the FDR of one run from a concatenated sequence db search.
double rocN(const std::vector< PeptideIdentification > &ids, Size fp_cutoff, const String &identifier) const
double trapezoidal_area_xEqy(double exp1, double exp2, double act1, double act2) const
void calculateFDRs_(std::map< double, double > &score_to_fdr, std::vector< double > &target_scores, std::vector< double > &decoy_scores, bool q_value, bool higher_score_better) const
calculates the FDR, given two vectors of scores
double trapezoidal_area(double x1, double x2, double y1, double y2) const
calculates the trapezoidal area for a trapezoid with a flat horizontal base e.g. for an AUC
void applyBasic(ProteinIdentification &id, bool groups_too=true)
simpler reimplementation of the apply function above for proteins.
void handleQueryMatch_(IdentificationData::QueryMatchRef match_ref, IdentificationData::ScoreTypeRef score_ref, std::vector< double > &target_scores, std::vector< double > &decoy_scores, std::map< IdentificationData::IdentifiedMoleculeRef, bool > &molecule_to_decoy, std::map< IdentificationData::QueryMatchRef, double > &match_to_score) const
Helper function for applyToQueryMatches()
double applyEvaluateProteinIDs(ScoreToTgtDecLabelPairs &score_to_tgt_dec_fraction_pairs, double pepCutoff=1.0, UInt fpCutoff=50, double diffWeight=0.2)
double rocN(const ScoreToTgtDecLabelPairs &scores_labels, Size fpCutoff=50) const
void calculateEstimatedQVal_(std::map< double, double > &scores_to_FDR, ScoreToTgtDecLabelPairs &scores_labels, bool higher_score_better) const
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:90
Representation of a protein identification run.
Definition: ProteinIdentification.h:72
A more convenient string class.
Definition: String.h:61
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Wrapper that adds operator< to iterators, so they can be used as (part of) keys in maps/sets or multi...
Definition: MetaData.h:44
Definition: IDScoreGetterSetter.h:55