OpenMS  2.4.0
FeatureFinderIdentificationAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
37 
45 
46 #include <vector>
47 #include <fstream>
48 #include <map>
49 
50 namespace OpenMS
51 {
52  class IsotopeDistribution;
53 
55  public DefaultParamHandler
56 {
57 public:
60 
64  void run(
65  std::vector<PeptideIdentification> peptides,
66  std::vector<ProteinIdentification> proteins,
67  std::vector<PeptideIdentification> peptides_ext,
68  std::vector<ProteinIdentification> proteins_ext,
69  FeatureMap& features,
70  const FeatureMap& seeds = FeatureMap()
71  );
72 
73  void runOnCandidates(FeatureMap& features);
74 
75  PeakMap& getMSData() { return ms_data_; }
76  const PeakMap& getMSData() const { return ms_data_; }
77 
78  PeakMap& getChromatograms() { return chrom_data_; }
79  const PeakMap& getChromatograms() const { return chrom_data_; }
80 
81  ProgressLogger& getProgressLogger() { return prog_log_; }
82  const ProgressLogger& getProgressLogger() const { return prog_log_; }
83 
84  TargetedExperiment& getLibrary() { return library_; }
85  const TargetedExperiment& getLibrary() const { return library_; }
86 
87 protected:
90 
92  typedef std::multimap<double, PeptideIdentification*> RTMap;
94  typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap;
96  typedef std::map<AASequence, ChargeMap> PeptideMap;
98  typedef std::map<String, std::pair<RTMap, RTMap> > PeptideRefRTMap;
99 
101 
102  Size n_internal_peps_; //< number of internal peptide
103  Size n_external_peps_; //< number of external peptides
104 
105  double rt_window_; //< RT window width
106  double mz_window_; //< m/z window width
107  bool mz_window_ppm_; //< m/z window width is given in PPM (not Da)?
108 
109  double mapping_tolerance_; //< RT tolerance for mapping IDs to features
110 
111  double isotope_pmin_; //< min. isotope probability for peptide assay
112  Size n_isotopes_; //< number of isotopes for peptide assay
113 
114  double rt_quantile_;
115 
116  double peak_width_;
119 
121 
122  // SVM related parameters
127  Size svm_n_parts_; //< number of partitions for SVM cross-validation
128  Size svm_n_samples_; //< number of samples for SVM training
129 
130  // output file (before filtering)
132 
134 
135  void updateMembers_() override;
136 
138  struct RTRegion
139  {
140  double start, end;
141  ChargeMap ids; //< internal/external peptide IDs (per charge) in this region
142  };
143 
146  {
147  bool operator()(const Feature& feature)
148  {
149  return feature.getOverallQuality() == 0.0;
150  }
151  } feature_filter_quality_;
152 
155  {
156  bool operator()(const Feature& feature)
157  {
158  return feature.getPeptideIdentifications().empty();
159  }
160  } feature_filter_peptides_;
161 
164  {
166  const PeptideIdentification& p2)
167  {
168  const String& seq1 = p1.getHits()[0].getSequence().toString();
169  const String& seq2 = p2.getHits()[0].getSequence().toString();
170  if (seq1 == seq2)
171  {
172  Int charge1 = p1.getHits()[0].getCharge();
173  Int charge2 = p2.getHits()[0].getCharge();
174  if (charge1 == charge2)
175  {
176  return p1.getRT() < p2.getRT();
177  }
178  return charge1 < charge2;
179  }
180  return seq1 < seq2;
181  }
182  } peptide_compare_;
183 
186  {
187  bool operator()(const Feature& f1, const Feature& f2)
188  {
189  const String& ref1 = f1.getMetaValue("PeptideRef");
190  const String& ref2 = f2.getMetaValue("PeptideRef");
191  if (ref1 == ref2)
192  {
193  return f1.getRT() < f2.getRT();
194  }
195  return ref1 < ref2;
196  }
197  } feature_compare_;
198 
199  PeakMap ms_data_; //< input LC-MS data
200  PeakMap chrom_data_; //< accumulated chromatograms (XICs)
201  TargetedExperiment library_; //< accumulated assays for peptides
202 
204  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
206  std::multiset<double> svm_probs_external_;
207  Size n_internal_features_; //< internal feature counter (for FDR calculation)
208  Size n_external_features_; //< external feature counter (for FDR calculation)
210  TransformationDescription trafo_external_; //< transform. to external RT scale
211  std::map<String, double> isotope_probs_; //< isotope probabilities of transitions
212  MRMFeatureFinderScoring feat_finder_; //< OpenSWATH feature finder
213 
215 
217  void generateTransitions_(const String& peptide_id, double mz, Int charge,
218  const IsotopeDistribution& iso_dist);
219 
220  void addPeptideRT_(TargetedExperiment::Peptide& peptide, double rt) const;
221 
223  void getRTRegions_(ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions) const;
224 
225  void annotateFeaturesFinalizeAssay_(
226  FeatureMap& features,
227  std::map<Size, std::vector<PeptideIdentification*> >& feat_ids,
228  RTMap& rt_internal);
229 
231  void annotateFeatures_(FeatureMap& features, PeptideRefRTMap& ref_rt_map);
232 
233  void ensureConvexHulls_(Feature& feature);
234 
235  void postProcess_(FeatureMap& features, bool with_external_ids);
236 
238  void statistics_(const FeatureMap& features) const;
239 
240  void createAssayLibrary_(PeptideMap& peptide_map, PeptideRefRTMap& ref_rt_map);
241 
242  void addPeptideToMap_(PeptideIdentification& peptide,
243  PeptideMap& peptide_map,
244  bool external = false) const;
245 
246  void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const;
247 
248  void getUnbiasedSample_(const std::multimap<double, std::pair<Size, bool> >& valid_obs,
249  std::map<Size, Int>& training_labels);
250 
251  void getRandomSample_(std::map<Size, Int>& training_labels);
252 
253  void classifyFeatures_(FeatureMap& features);
254 
255  void filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality,
256  const double quality_cutoff);
257 
258  void filterFeatures_(FeatureMap& features, bool classified);
259 
260  void calculateFDR_(FeatureMap& features);
261 
262  };
263 
264 } // namespace OpenMS
265 
266 #endif
267 
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
TargetedExperiment & getLibrary()
Definition: FeatureFinderIdentificationAlgorithm.h:84
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:126
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:92
A more convenient string class.
Definition: String.h:58
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:165
TransformationDescription trafo_external_
TransformationDescription trafo_; // RT transformation (to range 0-1)
Definition: FeatureFinderIdentificationAlgorithm.h:210
double mapping_tolerance_
Definition: FeatureFinderIdentificationAlgorithm.h:109
ChargeMap ids
Definition: FeatureFinderIdentificationAlgorithm.h:141
Size n_internal_features_
Definition: FeatureFinderIdentificationAlgorithm.h:207
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:187
double start
Definition: FeatureFinderIdentificationAlgorithm.h:140
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:109
A container for features.
Definition: FeatureMap.h:93
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Definition: FeatureFinderIdentificationAlgorithm.h:54
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:93
const PeakMap & getChromatograms() const
Definition: FeatureFinderIdentificationAlgorithm.h:79
Definition: IsotopeDistribution.h:72
PeakMap chrom_data_
Definition: FeatureFinderIdentificationAlgorithm.h:200
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:89
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:124
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:138
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:117
Size n_internal_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:102
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:156
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size n_isotopes_
Definition: FeatureFinderIdentificationAlgorithm.h:112
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:120
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:208
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:154
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string (or DataValue::EMPTY if not found)
bool mz_window_ppm_
Definition: FeatureFinderIdentificationAlgorithm.h:107
PeakMap ms_data_
Definition: FeatureFinderIdentificationAlgorithm.h:199
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:96
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:114
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:125
const PeakMap & getMSData() const
Definition: FeatureFinderIdentificationAlgorithm.h:76
double isotope_pmin_
Definition: FeatureFinderIdentificationAlgorithm.h:111
double rt_window_
Definition: FeatureFinderIdentificationAlgorithm.h:105
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
PeakMap & getMSData()
Definition: FeatureFinderIdentificationAlgorithm.h:75
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:204
MRMFeatureFinderScoring feat_finder_
Definition: FeatureFinderIdentificationAlgorithm.h:212
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:214
Size svm_n_parts_
Definition: FeatureFinderIdentificationAlgorithm.h:127
const TargetedExperiment & getLibrary() const
Definition: FeatureFinderIdentificationAlgorithm.h:85
std::map< String, double > isotope_probs_
Definition: FeatureFinderIdentificationAlgorithm.h:211
const ProgressLogger & getProgressLogger() const
Definition: FeatureFinderIdentificationAlgorithm.h:82
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:145
An LC-MS feature.
Definition: Feature.h:70
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:116
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:131
ProgressLogger & getProgressLogger()
Definition: FeatureFinderIdentificationAlgorithm.h:81
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
TargetedExperiment library_
Definition: FeatureFinderIdentificationAlgorithm.h:201
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
PeakMap & getChromatograms()
Definition: FeatureFinderIdentificationAlgorithm.h:78
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:100
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:88
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:163
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:185
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:147
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
Size n_external_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:103
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:118
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:123
Size svm_n_samples_
Definition: FeatureFinderIdentificationAlgorithm.h:128
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:61
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:94
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:98
int Int
Signed integer type.
Definition: Types.h:102
double mz_window_
Definition: FeatureFinderIdentificationAlgorithm.h:106
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:206
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:133
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:79
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:451