OpenMS  2.5.0
ConfidenceScoring.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Hannes Roest, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <cmath> // for "exp"
38 #include <ctime> // for "time" (random number seed)
39 #include <limits> // for "infinity"
40 #include <boost/bimap.hpp>
41 #include <boost/bimap/multiset_of.hpp>
42 #include <boost/random/uniform_int.hpp>
43 #include <boost/random/mersenne_twister.hpp>
44 #include <boost/random/variate_generator.hpp>
45 
50 
54 
55 namespace OpenMS
56 {
57 
58  class OPENMS_DLLAPI ConfidenceScoring :
59  public ProgressLogger
60  {
61  public:
62 
64  explicit ConfidenceScoring(bool test_mode_ = false) :
65  generator_(), rand_gen_(generator_, boost::uniform_int<>())
66  {
67  if (!test_mode_) rand_gen_.engine().seed(time(nullptr)); // seed with current time
68  }
69 
70  virtual ~ConfidenceScoring() {}
71 
72  protected:
73 
75  typedef boost::bimap<double, boost::bimaps::multiset_of<double> >
77 
79  struct GLM_
80  {
81  double intercept;
82  double rt_coef;
83  double int_coef;
84 
85  double operator()(double diff_rt, double dist_int)
86  {
87  double lm = intercept + rt_coef * diff_rt * diff_rt +
88  int_coef * dist_int;
89  return 1.0 / (1.0 + exp(-lm));
90  }
91  } glm_;
92 
94  struct RTNorm_
95  {
96  double min_rt;
97  double max_rt;
98 
99  double operator()(double rt)
100  {
101  return (rt - min_rt) / (max_rt - min_rt) * 100;
102  }
103  } rt_norm_;
104 
106 
108 
110 
112 
114 
117 
118  boost::mt19937 generator_;
119 
121  boost::variate_generator<boost::mt19937&, boost::uniform_int<> > rand_gen_;
122 
124  void chooseDecoys_();
125 
127  double manhattanDist_(DoubleList x, DoubleList y);
128 
130  double getAssayRT_(const TargetedExperiment::Peptide& assay);
131 
134  void extractIntensities_(BimapType& intensity_map, Size n_transitions,
135  DoubleList& intensities);
136 
140  double scoreAssay_(const TargetedExperiment::Peptide& assay,
141  double feature_rt, DoubleList& feature_intensities,
142  const std::set<String>& transition_ids = std::set<String>());
143 
145  void scoreFeature_(Feature& feature);
146 
147  public:
148 
149  void initialize(const TargetedExperiment& library, const Size n_decoys, const Size n_transitions, const TransformationDescription& rt_trafo)
150  {
151  library_ = library;
152  n_decoys_ = n_decoys;
153  n_transitions_ = n_transitions;
154  rt_trafo_ = rt_trafo;
155  }
156 
157  void initializeGlm(double intercept, double rt_coef, double int_coef)
158  {
159  glm_.intercept = intercept;
160  glm_.rt_coef = rt_coef;
161  glm_.int_coef = int_coef;
162  }
163 
176  void scoreMap(FeatureMap & features)
177  {
178  // are there enough assays in the library?
179  Size n_assays = library_.getPeptides().size();
180  if (n_assays < 2)
181  {
182  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
183  "There need to be at least 2 assays in the library for ConfidenceScoring.");
184 
185  }
186  if (n_assays - 1 < n_decoys_)
187  {
188  OPENMS_LOG_WARN << "Warning: Parameter 'decoys' (" << n_decoys_
189  << ") is higher than the number of unrelated assays in the "
190  << "library (" << n_assays - 1 << "). "
191  << "Using all unrelated assays as decoys." << std::endl;
192  }
193  if (n_assays - 1 <= n_decoys_) n_decoys_ = 0; // use all available assays
194 
195  decoy_index_.resize(n_assays);
196  for (Size i = 0; i < n_assays; ++i) decoy_index_[i] = boost::numeric_cast<Int>(i);
197 
198  // build mapping between assays and transitions:
199  OPENMS_LOG_DEBUG << "Building transition map..." << std::endl;
200  for (Size i = 0; i < library_.getTransitions().size(); ++i)
201  {
202  const String& ref = library_.getTransitions()[i].getPeptideRef();
203  transition_map_[ref].push_back(boost::numeric_cast<Int>(i));
204  }
205  // find min./max. RT in the library:
206  OPENMS_LOG_DEBUG << "Determining retention time range..." << std::endl;
207  rt_norm_.min_rt = std::numeric_limits<double>::infinity();
208  rt_norm_.max_rt = -std::numeric_limits<double>::infinity();
209  for (std::vector<TargetedExperiment::Peptide>::const_iterator it =
210  library_.getPeptides().begin(); it != library_.getPeptides().end();
211  ++it)
212  {
213  double current_rt = getAssayRT_(*it);
214  if (current_rt == -1.0) continue; // indicates a missing value
215  rt_norm_.min_rt = std::min(rt_norm_.min_rt, current_rt);
216  rt_norm_.max_rt = std::max(rt_norm_.max_rt, current_rt);
217  }
218 
219  // log scoring progress:
220  OPENMS_LOG_DEBUG << "Scoring features..." << std::endl;
221  startProgress(0, features.size(), "scoring features");
222 
223  for (FeatureMap::Iterator feat_it = features.begin();
224  feat_it != features.end(); ++feat_it)
225  {
226  OPENMS_LOG_DEBUG << "Feature " << feat_it - features.begin() + 1
227  << " (ID '" << feat_it->getUniqueId() << "')"<< std::endl;
228  scoreFeature_(*feat_it);
229  setProgress(feat_it - features.begin());
230  }
231  endProgress();
232 
233  }
234 
235  };
236 
237 }
238 
OpenMS::ConfidenceScoring::rt_trafo_
TransformationDescription rt_trafo_
RT transformation to map measured RTs to assay RTs.
Definition: ConfidenceScoring.h:116
OpenMS::ConfidenceScoring::library_
TargetedExperiment library_
assay library
Definition: ConfidenceScoring.h:105
TransformationXMLFile.h
TargetedExperiment.h
OpenMS::TargetedExperiment::getPeptides
const std::vector< Peptide > & getPeptides() const
OpenMS::ConfidenceScoring::BimapType
boost::bimap< double, boost::bimaps::multiset_of< double > > BimapType
Mapping: Q3 m/z <-> transition intensity (maybe not unique!)
Definition: ConfidenceScoring.h:76
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::ConfidenceScoring::GLM_::rt_coef
double rt_coef
Definition: ConfidenceScoring.h:82
OpenMS::Exception::IllegalArgument
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
OpenMS::ConfidenceScoring::GLM_::int_coef
double int_coef
Definition: ConfidenceScoring.h:83
OpenMS::ConfidenceScoring::GLM_
Binomial GLM.
Definition: ConfidenceScoring.h:79
OpenMS::ConfidenceScoring::generator_
boost::mt19937 generator_
random number generation engine
Definition: ConfidenceScoring.h:118
OpenMS::ConfidenceScoring::ConfidenceScoring
ConfidenceScoring(bool test_mode_=false)
Constructor.
Definition: ConfidenceScoring.h:64
OpenMS::ConfidenceScoring::scoreMap
void scoreMap(FeatureMap &features)
Score a feature map -> make sure the class is properly initialized.
Definition: ConfidenceScoring.h:176
OpenMS::FeatureMap::Iterator
Base::iterator Iterator
Definition: FeatureMap.h:139
OpenMS::TransformationDescription
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:61
OpenMS::ConfidenceScoring::transition_map_
Map< String, IntList > transition_map_
assay (ID) -> transitions (indexes)
Definition: ConfidenceScoring.h:111
OpenMS::Map
Map class based on the STL map (containing several convenience functions)
Definition: Map.h:50
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:95
OpenMS::ConfidenceScoring::GLM_::operator()
double operator()(double diff_rt, double dist_int)
Definition: ConfidenceScoring.h:85
OpenMS::ConfidenceScoring::initialize
void initialize(const TargetedExperiment &library, const Size n_decoys, const Size n_transitions, const TransformationDescription &rt_trafo)
Definition: ConfidenceScoring.h:149
FeatureMap.h
OpenMS::ConfidenceScoring::rand_gen_
boost::variate_generator< boost::mt19937 &, boost::uniform_int<> > rand_gen_
Random number generator (must be initialized in init. list of c'tor!)
Definition: ConfidenceScoring.h:121
OpenMS::Feature
An LC-MS feature.
Definition: Feature.h:70
Scoring.h
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
OpenMS::TargetedExperiment::getTransitions
const std::vector< ReactionMonitoringTransition > & getTransitions() const
returns the transition list
OpenMS::UniqueIdInterface::getUniqueId
UInt64 getUniqueId() const
Non-mutable access to unique id - returns the unique id.
Definition: UniqueIdInterface.h:105
OpenMS::ConfidenceScoring::decoy_index_
IntList decoy_index_
indexes of assays to use as decoys
Definition: ConfidenceScoring.h:107
FeatureXMLFile.h
OpenMS::ConfidenceScoring::RTNorm_::min_rt
double min_rt
Definition: ConfidenceScoring.h:96
OpenMS::ConfidenceScoring::RTNorm_::operator()
double operator()(double rt)
Definition: ConfidenceScoring.h:99
OpenMS::ConfidenceScoring::n_transitions_
Size n_transitions_
number of transitions to consider
Definition: ConfidenceScoring.h:113
OpenMS::ConfidenceScoring::RTNorm_::max_rt
double max_rt
Definition: ConfidenceScoring.h:97
TraMLFile.h
OpenMS::ConfidenceScoring::RTNorm_
Helper for RT normalization (range 0-100)
Definition: ConfidenceScoring.h:94
OPENMS_LOG_DEBUG
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
OpenMS::ConfidenceScoring
Definition: ConfidenceScoring.h:58
OpenMS::ConfidenceScoring::n_decoys_
Size n_decoys_
number of decoys to use (per feature/true assay)
Definition: ConfidenceScoring.h:109
TransformationDescription.h
OpenMS::ConfidenceScoring::~ConfidenceScoring
virtual ~ConfidenceScoring()
Definition: ConfidenceScoring.h:70
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::DoubleList
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:62
OpenMS::IntList
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
OpenMS::ConfidenceScoring::initializeGlm
void initializeGlm(double intercept, double rt_coef, double int_coef)
Definition: ConfidenceScoring.h:157
OpenMS::TargetedExperiment
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
OpenMS::ConfidenceScoring::GLM_::intercept
double intercept
Definition: ConfidenceScoring.h:81
OpenMS::TargetedExperimentHelper::Peptide
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:370