OpenMS  2.4.0
PosteriorErrorProbabilityModel.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: David Wojnar $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 
42 #include <vector>
43 #include <map>
44 
45 namespace OpenMS
46 {
47  class String;
48  class TextFile;
49  class PeptideIdentification;
50  class ProteinIdentification;
51  class PeptideHit;
52  namespace Math
53  {
54 
55 
67  class OPENMS_DLLAPI PosteriorErrorProbabilityModel :
68  public DefaultParamHandler
69  {
70 public:
71 
74 
77 
89  static std::map<String, std::vector<std::vector<double>>> extractAndTransformScores(
90  const std::vector<ProteinIdentification> & protein_ids,
91  const std::vector<PeptideIdentification> & peptide_ids,
92  const bool split_charge,
93  const bool top_hits_only,
94  const bool target_decoy_available,
95  const double fdr_for_targets_smaller);
96 
110  static void updateScores(
111  const PosteriorErrorProbabilityModel & PEP_model,
112  const String & search_engine,
113  const Int charge,
114  const bool prob_correct,
115  const bool split_charge,
116  std::vector<ProteinIdentification> & protein_ids,
117  std::vector<PeptideIdentification> & peptide_ids,
118  bool & unable_to_fit_data,
119  bool & data_might_not_be_well_fit);
120 
127  bool fit(std::vector<double> & search_engine_scores);
128 
136  bool fit(std::vector<double> & search_engine_scores, std::vector<double> & probabilities);
137 
139  void fillDensities(std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
141  double computeMaxLikelihood(std::vector<double> & incorrect_density, std::vector<double> & correct_density);
143  double one_minus_sum_post(std::vector<double> & incorrect_density, std::vector<double> & correct_density);
145  double sum_post(std::vector<double> & incorrect_density, std::vector<double> & correct_density);
147  double sum_pos_x0(std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
149  double sum_neg_x0(std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density);
151  double sum_pos_sigma(std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density, double positive_mean);
153  double sum_neg_sigma(std::vector<double> & x_scores, std::vector<double> & incorrect_density, std::vector<double> & correct_density, double positive_mean);
154 
155 
158  {
159  return correctly_assigned_fit_param_;
160  }
161 
164  {
165  return incorrectly_assigned_fit_param_;
166  }
167 
169  double getNegativePrior() const
170  {
171  return negative_prior_;
172  }
173 
175  static double getGumbel_(double x, const GaussFitter::GaussFitResult & params)
176  {
177  double z = exp((params.x0 - x) / params.sigma);
178  return (z * exp(-1 * z)) / params.sigma;
179  }
180 
185  double computeProbability(double score) const;
186 
188  TextFile initPlots(std::vector<double> & x_scores);
189 
191  const String getGumbelGnuplotFormula(const GaussFitter::GaussFitResult & params) const;
192 
194  const String getGaussGnuplotFormula(const GaussFitter::GaussFitResult & params) const;
195 
197  const String getBothGnuplotFormula(const GaussFitter::GaussFitResult & incorrect, const GaussFitter::GaussFitResult & correct) const;
198 
200  void plotTargetDecoyEstimation(std::vector<double> & target, std::vector<double> & decoy);
201 
203  inline double getSmallestScore()
204  {
205  return smallest_score_;
206  }
207 
209  void tryGnuplot(const String& gp_file);
210 
211 private:
213  static double transformScore_(const String & engine, const PeptideHit & hit);
214 
232  const String (PosteriorErrorProbabilityModel::* getNegativeGnuplotFormula_)(const GaussFitter::GaussFitResult & params) const;
234  const String (PosteriorErrorProbabilityModel::* getPositiveGnuplotFormula_)(const GaussFitter::GaussFitResult & params) const;
235  };
236  }
237 }
238 
A more convenient string class.
Definition: String.h:57
double max_incorrectly_
peak of the incorrectly assigned sequences distribution
Definition: PosteriorErrorProbabilityModel.h:226
double x0
parameter x0 of Gaussian distribution (center position)
Definition: GaussFitter.h:75
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
double sigma
parameter sigma of Gaussian distribution (width)
Definition: GaussFitter.h:78
GaussFitter::GaussFitResult incorrectly_assigned_fit_param_
stores parameters for incorrectly assigned sequences. If gumbel fit was used, A can be ignored...
Definition: PosteriorErrorProbabilityModel.h:220
double getNegativePrior() const
returns the estimated negative prior probability.
Definition: PosteriorErrorProbabilityModel.h:169
double getSmallestScore()
returns the smallest score used in the last fit
Definition: PosteriorErrorProbabilityModel.h:203
double max_correctly_
peak of the gauss distribution (correctly assigned sequences)
Definition: PosteriorErrorProbabilityModel.h:228
static double getGumbel_(double x, const GaussFitter::GaussFitResult &params)
computes the gumbel density at position x with parameters params.
Definition: PosteriorErrorProbabilityModel.h:175
Representation of a peptide hit.
Definition: PeptideHit.h:54
GaussFitter::GaussFitResult getIncorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before...
Definition: PosteriorErrorProbabilityModel.h:163
GaussFitter::GaussFitResult getCorrectlyAssignedFitResult() const
returns estimated parameters for correctly assigned sequences. Fit should be used before...
Definition: PosteriorErrorProbabilityModel.h:157
GaussFitter::GaussFitResult correctly_assigned_fit_param_
stores gauss parameters
Definition: PosteriorErrorProbabilityModel.h:222
struct of parameters of a Gaussian distribution
Definition: GaussFitter.h:63
Implements a mixture model of the inverse gumbel and the gauss distribution or a gaussian mixture...
Definition: PosteriorErrorProbabilityModel.h:67
double negative_prior_
stores final prior probability for negative peptides
Definition: PosteriorErrorProbabilityModel.h:224
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
double smallest_score_
smallest score which was used for fitting the model
Definition: PosteriorErrorProbabilityModel.h:230
int Int
Signed integer type.
Definition: Types.h:102
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46