OpenMS
Loading...
Searching...
No Matches
FeatureFinderIdentificationAlgorithm.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: Timo Sachsenberg $
6// $Authors: Hendrik Weisser $
7// --------------------------------------------------------------------------
8
9#pragma once
10
20
21#include <vector>
22#include <fstream>
23#include <map>
24
25namespace OpenMS {
80{
81public:
84
99 void run(
101 const std::vector<ProteinIdentification>& proteins,
102 PeptideIdentificationList peptides_ext,
103 std::vector<ProteinIdentification> proteins_ext,
104 FeatureMap& features,
105 const FeatureMap& seeds = FeatureMap(),
106 const std::string& spectra_file = ""
107 );
108
111
121 const PeakMap& getMSData() const;
122
127 void setMSData(const PeakMap& ms_data); // for pyOpenMS
132 void setMSData(PeakMap&& ms_data); // moves peak data and saves the copy. Note that getMSData() will give back a processed/modified version.
133
143 const PeakMap& getChromatograms() const;
144
155
167
168protected:
169
172
174 typedef std::multimap<double, PeptideIdentification*> RTMap;
176 typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap;
178 typedef std::map<AASequence, ChargeMap> PeptideMap;
180 typedef std::map<std::string, std::pair<RTMap, RTMap> > PeptideRefRTMap;
181
183
186
188 double rt_window_;
189 double mz_window_;
191
193
196
198
202
203 std::string elution_model_;
204
205 // SVM related parameters
208 std::string svm_xval_out_;
212
213 // output file (before filtering)
214 std::string candidates_out_;
215
217
218 void updateMembers_() override;
219
221 struct RTRegion
222 {
223 double start, end;
225 };
226
238 struct IMStats
239 {
240 double median = -1.0;
241 double min = -1.0;
242 double max = -1.0;
243 };
244
247 {
248 bool operator()(const Feature& feature)
249 {
250 return feature.getOverallQuality() == 0.0;
251 }
252 } feature_filter_quality_;
253
256 {
257 bool operator()(const Feature& feature)
258 {
259 return feature.getPeptideIdentifications().empty();
260 }
261 } feature_filter_peptides_;
262
265 {
267 const PeptideIdentification& p2)
268 {
269 const std::string& seq1 = p1.getHits()[0].getSequence().toString();
270 const std::string& seq2 = p2.getHits()[0].getSequence().toString();
271 if (seq1 == seq2)
272 {
273 Int charge1 = p1.getHits()[0].getCharge();
274 Int charge2 = p2.getHits()[0].getCharge();
275 if (charge1 == charge2)
276 {
277 return p1.getRT() < p2.getRT();
278 }
279 return charge1 < charge2;
280 }
281 return seq1 < seq2;
282 }
283 } peptide_compare_;
284
287 {
288 bool operator()(const Feature& f1, const Feature& f2)
289 {
290 const std::string ref1 = StringUtils::toStr(f1.getMetaValue("PeptideRef"));
291 const std::string ref2 = StringUtils::toStr(f2.getMetaValue("PeptideRef"));
292 if (ref1 == ref2)
293 {
294 return f1.getRT() < f2.getRT();
295 }
296 return ref1 < ref2;
297 }
298 } feature_compare_;
299
304
306 double add_mass_offset_peptides_{0.0};
307 double seed_apex_rt_tolerance_{5.0};
311
312 const double seed_rt_window_ = 60.0;
313
315 std::map<double, std::pair<Size, Size> > svm_probs_internal_;
317 std::multiset<double> svm_probs_external_;
321 std::map<std::string, double> isotope_probs_;
329 std::map<std::string, IMStats> im_stats_;
330
339
342
344
346 void generateTransitions_(const std::string& peptide_id, double mz, Int charge,
347 const IsotopeDistribution& iso_dist);
348
349 void addPeptideRT_(TargetedExperiment::Peptide& peptide, double rt) const;
350
352 void getRTRegions_(ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions, bool clear_IDs = true) const;
353
377
391
393 FeatureMap& features,
394 std::map<Size, std::vector<PeptideIdentification*> >& feat_ids,
395 RTMap& rt_internal);
396
398 void annotateFeatures_(FeatureMap& features, PeptideRefRTMap& ref_rt_map);
399
400 void ensureConvexHulls_(Feature& feature) const;
401
402 void postProcess_(FeatureMap& features, bool with_external_ids);
403
407 double calculateRTWindow_(double rt_uncertainty) const;
409
411 static bool isSeedPseudoHit_(const PeptideHit& hit);
412
414 std::pair<double, double> calculateRTBounds_(double rt_min, double rt_max) const;
415
417 void statistics_(const FeatureMap& features) const;
418
422 void createAssayLibrary_(const PeptideMap::iterator& begin, const PeptideMap::iterator& end, PeptideRefRTMap& ref_rt_map, bool clear_IDs = true);
423
428 PeptideMap& peptide_map,
429 bool external = false);
430
431 void filterFeatures_(FeatureMap& features, bool classified);
432
437 const std::vector<ProteinIdentification>& proteins,
438 PeptideIdentificationList peptides_ext,
439 std::vector<ProteinIdentification> proteins_ext,
440 FeatureMap& features,
441 const FeatureMap& seeds,
442 const std::string& spectra_file);
443
444 // seeds for untargeted extraction
446
447 // quant. decoys
449
452 template <typename It>
453 std::vector<std::pair<It,It>>
454 chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
455 {
456 /* Aliases, to make the rest of the code more readable. */
457 using std::vector;
458 using std::pair;
459 using std::make_pair;
460 using std::distance;
461 using diff_t = std::ptrdiff_t;
462
463 /* Total item number and batch_size size. */
464 const diff_t total {distance(range_from, range_to)};
465 const diff_t num {total / batch_size};
466
467 vector<pair<It,It>> chunks(num);
468
469 It batch_end {range_from};
470
471 /* Use the 'generate' algorithm to create batches. */
472 std::generate(begin(chunks), end(chunks), [&batch_end, batch_size]()
473 {
474 It batch_start {batch_end };
475
476 std::advance(batch_end, batch_size);
477 return make_pair(batch_start, batch_end);
478 });
479
480 /* The last batch_size's end must always be 'range_to'. */
481 if (chunks.empty())
482 {
483 chunks.emplace_back(range_from, range_to);
484 }
485 else
486 {
487 chunks.back().second = range_to;
488 }
489
490 return chunks;
491 }
492}; // namespace OpenMS
493} // namespace OpenMS
494
const PeptideIdentificationList & getPeptideIdentifications() const
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
bool empty() const noexcept
Definition ExposedVector.h:140
ID-guided MS1 feature finder; the algorithm behind FeatureFinderIdentification.
Definition FeatureFinderIdentificationAlgorithm.h:80
const TargetedExperiment & getLibrary() const
Read-only access to the assay library used / produced by the last run.
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition FeatureFinderIdentificationAlgorithm.h:171
void postProcess_(FeatureMap &features, bool with_external_ids)
double rt_window_
RT window width.
Definition FeatureFinderIdentificationAlgorithm.h:188
void getRTRegions_(ChargeMap &peptide_data, std::vector< RTRegion > &rt_regions, bool clear_IDs=true) const
get regions in which peptide eludes (ideally only one) by clustering RT elution times
std::map< std::string, double > isotope_probs_
TransformationDescription trafo_; // RT transformation (to range 0-1)
Definition FeatureFinderIdentificationAlgorithm.h:321
void runOnCandidates(FeatureMap &features)
Re-score / filter an existing candidate FeatureMap in place using the configured classifier and quali...
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition FeatureFinderIdentificationAlgorithm.h:178
void validateSVMParameters_() const
Helper functions for run()
void createAssayLibrary_(const PeptideMap::iterator &begin, const PeptideMap::iterator &end, PeptideRefRTMap &ref_rt_map, bool clear_IDs=true)
static bool isSeedPseudoHit_(const PeptideHit &hit)
Helper function to check if a peptide hit is a seed pseudo-ID.
IMStats global_im_stats_
Global ion mobility statistics from all peptide identifications.
Definition FeatureFinderIdentificationAlgorithm.h:338
PeptideMap peptide_map_
Definition FeatureFinderIdentificationAlgorithm.h:182
std::string candidates_out_
Definition FeatureFinderIdentificationAlgorithm.h:214
double end
Definition FeatureFinderIdentificationAlgorithm.h:223
Internal::FFIDAlgoExternalIDHandler external_id_handler_
Handler for external peptide IDs.
Definition FeatureFinderIdentificationAlgorithm.h:341
MRMFeatureFinderScoring feat_finder_
OpenSWATH feature finder.
Definition FeatureFinderIdentificationAlgorithm.h:340
FeatureFinderIdentificationAlgorithm()
Default constructor; installs the FFid parameters (see class docs)
void generateTransitions_(const std::string &peptide_id, double mz, Int charge, const IsotopeDistribution &iso_dist)
generate transitions (isotopic traces) for a peptide ion and add them to the library:
void filterFeatures_(FeatureMap &features, bool classified)
Size n_external_peps_
number of external peptides
Definition FeatureFinderIdentificationAlgorithm.h:185
double signal_to_noise_
Definition FeatureFinderIdentificationAlgorithm.h:201
std::string svm_xval_out_
Definition FeatureFinderIdentificationAlgorithm.h:208
bool quantify_decoys_
Definition FeatureFinderIdentificationAlgorithm.h:305
TargetedExperiment library_
assays for peptides (cleared per chunk during processing)
Definition FeatureFinderIdentificationAlgorithm.h:302
void annotateFeaturesFinalizeAssay_(FeatureMap &features, std::map< Size, std::vector< PeptideIdentification * > > &feat_ids, RTMap &rt_internal)
double min_peak_width_
Definition FeatureFinderIdentificationAlgorithm.h:200
const PeakMap & getMSData() const
Read-only access to the cached MS1 data.
StringList svm_predictor_names_
Definition FeatureFinderIdentificationAlgorithm.h:207
ProgressLogger & getProgressLogger()
Mutable access to the progress logger used by the algorithm.
const ProgressLogger & getProgressLogger() const
Read-only access to the progress logger.
void setMSData(const PeakMap &ms_data)
Copy the MS data into the algorithm instance; useful from pyOpenMS where moving is awkward.
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition FeatureFinderIdentificationAlgorithm.h:176
Size svm_n_parts_
number of partitions for SVM cross-validation
Definition FeatureFinderIdentificationAlgorithm.h:210
Size svm_n_samples_
number of samples for SVM training
Definition FeatureFinderIdentificationAlgorithm.h:211
Size n_internal_peps_
number of internal peptide
Definition FeatureFinderIdentificationAlgorithm.h:184
Size addSeeds_(PeptideIdentificationList &peptides, const FeatureMap &seeds)
Size n_internal_features_
internal feature counter (for FDR calculation)
Definition FeatureFinderIdentificationAlgorithm.h:318
ProgressLogger prog_log_
Definition FeatureFinderIdentificationAlgorithm.h:343
PeakMap ms_data_
input LC-MS data
Definition FeatureFinderIdentificationAlgorithm.h:300
void statistics_(const FeatureMap &features) const
some statistics on detected features
void setMSData(PeakMap &&ms_data)
Move the MS data into the algorithm instance (no copy).
std::vector< std::pair< It, It > > chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
Definition FeatureFinderIdentificationAlgorithm.h:454
const PeakMap & getChromatograms() const
Read-only access to the accumulated extracted chromatograms.
void calculateGlobalIMStats_()
Calculate global IM statistics from MS data and peptide identifications.
Size batch_size_
nr of peptides to use at the same time during chromatogram extraction
Definition FeatureFinderIdentificationAlgorithm.h:187
void run(PeptideIdentificationList peptides, const std::vector< ProteinIdentification > &proteins, PeptideIdentificationList peptides_ext, std::vector< ProteinIdentification > proteins_ext, FeatureMap &features, const FeatureMap &seeds=FeatureMap(), const std::string &spectra_file="")
Run the FFid pipeline; for FAIMS data this dispatches one run per CV group and merges results.
double mz_window_
m/z window width
Definition FeatureFinderIdentificationAlgorithm.h:189
double svm_min_prob_
Definition FeatureFinderIdentificationAlgorithm.h:206
double peak_width_
Definition FeatureFinderIdentificationAlgorithm.h:199
Size n_external_features_
Definition FeatureFinderIdentificationAlgorithm.h:319
double calculateRTWindow_(double rt_uncertainty) const
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition FeatureFinderIdentificationAlgorithm.h:170
PeakMap & getMSData()
Mutable access to the cached MS1 data.
Size n_isotopes_
number of isotopes for peptide assay
Definition FeatureFinderIdentificationAlgorithm.h:195
double mapping_tolerance_
RT tolerance for mapping IDs to features.
Definition FeatureFinderIdentificationAlgorithm.h:192
std::string elution_model_
Definition FeatureFinderIdentificationAlgorithm.h:203
Size debug_level_
Definition FeatureFinderIdentificationAlgorithm.h:216
double rt_quantile_
Definition FeatureFinderIdentificationAlgorithm.h:197
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition FeatureFinderIdentificationAlgorithm.h:317
ChargeMap ids
internal/external peptide IDs (per charge) in this region
Definition FeatureFinderIdentificationAlgorithm.h:224
std::pair< double, double > calculateRTBounds_(double rt_min, double rt_max) const
Calculate RT bounds with optional tolerance expansion.
void removeSeedPseudoIDs_(FeatureMap &features)
Size addOffsetPeptides_(PeptideIdentificationList &peptides, double offset)
bool mz_window_ppm_
m/z window width is given in PPM (not Da)?
Definition FeatureFinderIdentificationAlgorithm.h:190
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
std::map< std::string, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition FeatureFinderIdentificationAlgorithm.h:180
TargetedExperiment & getLibrary()
Mutable access to the assay library used / produced by the last run.
TargetedExperiment output_library_
accumulated assays for output (populated from library_ before clearing)
Definition FeatureFinderIdentificationAlgorithm.h:303
bool use_psm_cutoff_
Definition FeatureFinderIdentificationAlgorithm.h:308
std::map< std::string, IMStats > im_stats_
Ion mobility statistics per peptide reference (peptide sequence/charge:region)
Definition FeatureFinderIdentificationAlgorithm.h:329
IMStats getRTRegionIMStats_(const RTRegion &r)
Calculate ion mobility statistics for peptide identifications in an RT region.
void addPeptideRT_(TargetedExperiment::Peptide &peptide, double rt) const
PeakMap chrom_data_
accumulated chromatograms (XICs)
Definition FeatureFinderIdentificationAlgorithm.h:301
void ensureConvexHulls_(Feature &feature) const
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition FeatureFinderIdentificationAlgorithm.h:174
PeptideIdentificationList unassignedIDs_
Definition FeatureFinderIdentificationAlgorithm.h:310
void addPeptideToMap_(PeptideIdentification &peptide, PeptideMap &peptide_map, bool external=false)
double psm_score_cutoff_
Definition FeatureFinderIdentificationAlgorithm.h:309
void annotateFeatures_(FeatureMap &features, PeptideRefRTMap &ref_rt_map)
annotate identified features with m/z, isotope probabilities, etc.
PeakMap & getChromatograms()
Mutable access to the accumulated extracted chromatograms (XICs) from the last run.
void runSingleGroup_(PeptideIdentificationList peptides, const std::vector< ProteinIdentification > &proteins, PeptideIdentificationList peptides_ext, std::vector< ProteinIdentification > proteins_ext, FeatureMap &features, const FeatureMap &seeds, const std::string &spectra_file)
double isotope_pmin_
min. isotope probability for peptide assay
Definition FeatureFinderIdentificationAlgorithm.h:194
double svm_quality_cutoff
Definition FeatureFinderIdentificationAlgorithm.h:209
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition FeatureFinderIdentificationAlgorithm.h:315
Ion mobility statistics for a peptide in a specific RT region and charge state.
Definition FeatureFinderIdentificationAlgorithm.h:239
region in RT in which a peptide elutes:
Definition FeatureFinderIdentificationAlgorithm.h:222
A container for features.
Definition FeatureMap.h:78
An LC-MS feature.
Definition Feature.h:46
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
Class for handling external peptide identifications in feature finding.
Definition FFIDAlgoExternalIDHandler.h:38
Definition IsotopeDistribution.h:40
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition MRMFeatureFinderScoring.h:69
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
const DataValue & getMetaValue(const std::string &name) const
Returns the value corresponding to a string, or DataValue::EMPTY if not found.
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition Peak2D.h:185
Represents a single spectrum match (candidate) for a specific tandem mass spectrum (MS/MS).
Definition PeptideHit.h:52
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Represents the set of candidates (SpectrumMatches) identified for a single precursor spectrum.
Definition PeptideIdentification.h:66
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Represents a peptide (amino acid sequence)
Definition TargetedExperimentHelper.h:335
A description of a targeted experiment containing precursor and production ions.
Definition TargetedExperiment.h:40
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition FeatureFinderAlgorithmPickedHelperStructs.h:54
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition FeatureFinderAlgorithmPickedHelperStructs.h:85
comparison functor for features
Definition FeatureFinderIdentificationAlgorithm.h:287
bool operator()(const Feature &f1, const Feature &f2)
Definition FeatureFinderIdentificationAlgorithm.h:288
predicate for filtering features by assigned peptides:
Definition FeatureFinderIdentificationAlgorithm.h:256
bool operator()(const Feature &feature)
Definition FeatureFinderIdentificationAlgorithm.h:257
predicate for filtering features by overall quality:
Definition FeatureFinderIdentificationAlgorithm.h:247
bool operator()(const Feature &feature)
Definition FeatureFinderIdentificationAlgorithm.h:248
comparison functor for (unassigned) peptide IDs
Definition FeatureFinderIdentificationAlgorithm.h:265
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition FeatureFinderIdentificationAlgorithm.h:266