Home  · Classes  · Annotated Classes  · Modules  · Members  · Namespaces  · Related Pages
FeatureFinderIdentificationAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2017.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
37 
51 
52 #include <vector>
53 #include <fstream>
54 #include <map>
55 
56 namespace OpenMS
57 {
58 
60  public DefaultParamHandler
61 {
62 public:
65 
69  void run(
70  std::vector<PeptideIdentification> peptides,
71  std::vector<ProteinIdentification> proteins,
72  std::vector<PeptideIdentification> peptides_ext,
73  std::vector<ProteinIdentification> proteins_ext,
74  FeatureMap& features
75  );
76 
77  void runOnCandidates(FeatureMap& features);
78 
79  PeakMap& getMSData() { return ms_data_; }
80  const PeakMap& getMSData() const { return ms_data_; }
81 
82  PeakMap& getChromatograms() { return chrom_data_; }
83  const PeakMap& getChromatograms() const { return chrom_data_; }
84 
85  ProgressLogger& getProgressLogger() { return prog_log_; }
86  const ProgressLogger& getProgressLogger() const { return prog_log_; }
87 
88  TargetedExperiment& getLibrary() { return library_; }
89  const TargetedExperiment& getLibrary() const { return library_; }
90 
91 protected:
94 
96  typedef std::multimap<double, PeptideIdentification*> RTMap;
98  typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap;
100  typedef std::map<AASequence, ChargeMap> PeptideMap;
102  typedef std::map<String, std::pair<RTMap, RTMap> > PeptideRefRTMap;
103 
105 
106  Size n_internal_peps_; //< number of internal peptide
107  Size n_external_peps_; //< number of external peptides
108 
109  double rt_window_; //< RT window width
110  double mz_window_; //< m/z window width
111  bool mz_window_ppm_; //< m/z window width is given in PPM (not Da)?
112 
113  double mapping_tolerance_; //< RT tolerance for mapping IDs to features
114 
115  double isotope_pmin_; //< min. isotope probability for peptide assay
116  Size n_isotopes_; //< number of isotopes for peptide assay
117 
118  double rt_quantile_;
119 
120  double peak_width_;
123 
125 
126  // SVM related parameters
131  Size svm_n_parts_; //< number of partitions for SVM cross-validation
132  Size svm_n_samples_; //< number of samples for SVM training
133 
134  // output file (before filtering)
136 
138 
139  void updateMembers_() override;
140 
142  struct RTRegion
143  {
144  double start, end;
145  ChargeMap ids; //< internal/external peptide IDs (per charge) in this region
146  };
147 
150  {
151  bool operator()(const Feature& feature)
152  {
153  return feature.getOverallQuality() == 0.0;
154  }
155  } feature_filter_quality_;
156 
159  {
160  bool operator()(const Feature& feature)
161  {
162  return feature.getPeptideIdentifications().empty();
163  }
164  } feature_filter_peptides_;
165 
168  {
170  const PeptideIdentification& p2)
171  {
172  const String& seq1 = p1.getHits()[0].getSequence().toString();
173  const String& seq2 = p2.getHits()[0].getSequence().toString();
174  if (seq1 == seq2)
175  {
176  Int charge1 = p1.getHits()[0].getCharge();
177  Int charge2 = p2.getHits()[0].getCharge();
178  if (charge1 == charge2)
179  {
180  return p1.getRT() < p2.getRT();
181  }
182  return charge1 < charge2;
183  }
184  return seq1 < seq2;
185  }
186  } peptide_compare_;
187 
190  {
191  bool operator()(const Feature& f1, const Feature& f2)
192  {
193  const String& ref1 = f1.getMetaValue("PeptideRef");
194  const String& ref2 = f2.getMetaValue("PeptideRef");
195  if (ref1 == ref2)
196  {
197  return f1.getRT() < f2.getRT();
198  }
199  return ref1 < ref2;
200  }
201  } feature_compare_;
202 
203  PeakMap ms_data_; //< input LC-MS data
204  PeakMap chrom_data_; //< accumulated chromatograms (XICs)
205  TargetedExperiment library_; //< accumulated assays for peptides
206 
208  std::map<double, std::pair<Size, Size> > svm_probs_internal_;
210  std::multiset<double> svm_probs_external_;
211  Size n_internal_features_; //< internal feature counter (for FDR calculation)
212  Size n_external_features_; //< external feature counter (for FDR calculation)
214  TransformationDescription trafo_external_; //< transform. to external RT scale
215  std::map<String, double> isotope_probs_; //< isotope probabilities of transitions
216  MRMFeatureFinderScoring feat_finder_; //< OpenSWATH feature finder
217 
219 
221  void generateTransitions_(const String& peptide_id, double mz, Int charge,
222  const IsotopeDistribution& iso_dist);
223 
224  void addPeptideRT_(TargetedExperiment::Peptide& peptide, double rt) const;
225 
227  void getRTRegions_(ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions) const;
228 
229  void annotateFeaturesFinalizeAssay_(
230  FeatureMap& features,
231  std::map<Size, std::vector<PeptideIdentification*> >& feat_ids,
232  RTMap& rt_internal);
233 
235  void annotateFeatures_(FeatureMap& features, PeptideRefRTMap& ref_rt_map);
236 
237  void ensureConvexHulls_(Feature& feature);
238 
239  void postProcess_(FeatureMap& features, bool with_external_ids);
240 
242  void statistics_(const FeatureMap& features) const;
243 
244  void createAssayLibrary_(PeptideMap& peptide_map, PeptideRefRTMap& ref_rt_map);
245 
246  void addPeptideToMap_(PeptideIdentification& peptide,
247  PeptideMap& peptide_map,
248  bool external = false) const;
249 
250  void checkNumObservations_(Size n_pos, Size n_neg, const String& note = "") const;
251 
252  void getUnbiasedSample_(const std::multimap<double, std::pair<Size, bool> >& valid_obs,
253  std::map<Size, Int>& training_labels);
254 
255  void getRandomSample_(std::map<Size, Int>& training_labels);
256 
257  void classifyFeatures_(FeatureMap& features);
258 
259  void filterFeaturesFinalizeAssay_(Feature& best_feature, double best_quality,
260  const double quality_cutoff);
261 
262  void filterFeatures_(FeatureMap& features, bool classified);
263 
264  void calculateFDR_(FeatureMap& features);
265 
266  };
267 
268 } // namespace OpenMS
269 
270 #endif
271 
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
TargetedExperiment & getLibrary()
Definition: FeatureFinderIdentificationAlgorithm.h:88
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:130
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:96
A more convenient string class.
Definition: String.h:57
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:169
TransformationDescription trafo_external_
TransformationDescription trafo_; // RT transformation (to range 0-1)
Definition: FeatureFinderIdentificationAlgorithm.h:214
double mapping_tolerance_
Definition: FeatureFinderIdentificationAlgorithm.h:113
ChargeMap ids
Definition: FeatureFinderIdentificationAlgorithm.h:145
Size n_internal_features_
Definition: FeatureFinderIdentificationAlgorithm.h:211
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:191
double start
Definition: FeatureFinderIdentificationAlgorithm.h:144
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:109
A container for features.
Definition: FeatureMap.h:93
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Definition: FeatureFinderIdentificationAlgorithm.h:59
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:93
const PeakMap & getChromatograms() const
Definition: FeatureFinderIdentificationAlgorithm.h:83
Isotope distribution class.
Definition: IsotopeDistribution.h:61
PeakMap chrom_data_
Definition: FeatureFinderIdentificationAlgorithm.h:204
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:93
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:128
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:142
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:121
Size n_internal_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:106
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:160
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size n_isotopes_
Definition: FeatureFinderIdentificationAlgorithm.h:116
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:124
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:212
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:158
const DataValue & getMetaValue(const String &name) const
Returns the value corresponding to a string (or DataValue::EMPTY if not found)
bool mz_window_ppm_
Definition: FeatureFinderIdentificationAlgorithm.h:111
PeakMap ms_data_
Definition: FeatureFinderIdentificationAlgorithm.h:203
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:100
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:118
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:129
const PeakMap & getMSData() const
Definition: FeatureFinderIdentificationAlgorithm.h:80
double isotope_pmin_
Definition: FeatureFinderIdentificationAlgorithm.h:115
double rt_window_
Definition: FeatureFinderIdentificationAlgorithm.h:109
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
PeakMap & getMSData()
Definition: FeatureFinderIdentificationAlgorithm.h:79
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:208
MRMFeatureFinderScoring feat_finder_
Definition: FeatureFinderIdentificationAlgorithm.h:216
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:218
Size svm_n_parts_
Definition: FeatureFinderIdentificationAlgorithm.h:131
const TargetedExperiment & getLibrary() const
Definition: FeatureFinderIdentificationAlgorithm.h:89
std::map< String, double > isotope_probs_
Definition: FeatureFinderIdentificationAlgorithm.h:215
const ProgressLogger & getProgressLogger() const
Definition: FeatureFinderIdentificationAlgorithm.h:86
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:149
An LC-MS feature.
Definition: Feature.h:69
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:120
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:135
ProgressLogger & getProgressLogger()
Definition: FeatureFinderIdentificationAlgorithm.h:85
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
TargetedExperiment library_
Definition: FeatureFinderIdentificationAlgorithm.h:205
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
PeakMap & getChromatograms()
Definition: FeatureFinderIdentificationAlgorithm.h:82
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:104
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:92
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:167
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:189
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:151
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:61
Size n_external_peps_
Definition: FeatureFinderIdentificationAlgorithm.h:107
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:122
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:127
Size svm_n_samples_
Definition: FeatureFinderIdentificationAlgorithm.h:132
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:60
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:98
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:102
int Int
Signed integer type.
Definition: Types.h:102
double mz_window_
Definition: FeatureFinderIdentificationAlgorithm.h:110
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:210
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:137
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:79
Definition: TargetedExperimentHelper.h:429

OpenMS / TOPP release 2.3.0 Documentation generated on Wed Apr 18 2018 19:29:04 using doxygen 1.8.14