OpenMS
FeatureFinderIdentificationAlgorithm Class Reference

#include <OpenMS/TRANSFORMATIONS/FEATUREFINDER/FeatureFinderIdentificationAlgorithm.h>

Inheritance diagram for FeatureFinderIdentificationAlgorithm:
[legend]
Collaboration diagram for FeatureFinderIdentificationAlgorithm:
[legend]

Classes

struct  FeatureCompare
 comparison functor for features More...
 
struct  FeatureFilterPeptides
 predicate for filtering features by assigned peptides: More...
 
struct  FeatureFilterQuality
 predicate for filtering features by overall quality: More...
 
struct  PeptideCompare
 comparison functor for (unassigned) peptide IDs More...
 
struct  RTRegion
 region in RT in which a peptide elutes: More...
 

Public Member Functions

 FeatureFinderIdentificationAlgorithm ()
 default constructor More...
 
void run (std::vector< PeptideIdentification > peptides, const std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > peptides_ext, std::vector< ProteinIdentification > proteins_ext, FeatureMap &features, const FeatureMap &seeds=FeatureMap(), const String &spectra_file="")
 
void runOnCandidates (FeatureMap &features)
 
PeakMapgetMSData ()
 
const PeakMapgetMSData () const
 
void setMSData (const PeakMap &ms_data)
 set the MS data used for feature detection More...
 
void setMSData (PeakMap &&ms_data)
 
PeakMapgetChromatograms ()
 
const PeakMapgetChromatograms () const
 
ProgressLoggergetProgressLogger ()
 
const ProgressLoggergetProgressLogger () const
 
TargetedExperimentgetLibrary ()
 
const TargetedExperimentgetLibrary () const
 
- Public Member Functions inherited from DefaultParamHandler
 DefaultParamHandler (const String &name)
 Constructor with name that is displayed in error messages. More...
 
 DefaultParamHandler (const DefaultParamHandler &rhs)
 Copy constructor. More...
 
virtual ~DefaultParamHandler ()
 Destructor. More...
 
DefaultParamHandleroperator= (const DefaultParamHandler &rhs)
 Assignment operator. More...
 
virtual bool operator== (const DefaultParamHandler &rhs) const
 Equality operator. More...
 
void setParameters (const Param &param)
 Sets the parameters. More...
 
const ParamgetParameters () const
 Non-mutable access to the parameters. More...
 
const ParamgetDefaults () const
 Non-mutable access to the default parameters. More...
 
const StringgetName () const
 Non-mutable access to the name. More...
 
void setName (const String &name)
 Mutable access to the name. More...
 
const std::vector< String > & getSubsections () const
 Non-mutable access to the registered subsections. More...
 

Protected Types

typedef FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
 
typedef FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
 
typedef std::multimap< double, PeptideIdentification * > RTMap
 mapping: RT (not necessarily unique) -> pointer to peptide More...
 
typedef std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
 mapping: charge -> internal/external: (RT -> pointer to peptide) More...
 
typedef std::map< AASequence, ChargeMapPeptideMap
 mapping: sequence -> charge -> internal/external ID information More...
 
typedef std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
 mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide) More...
 

Protected Member Functions

void updateMembers_ () override
 This method is used to update extra member variables at the end of the setParameters() method. More...
 
void generateTransitions_ (const String &peptide_id, double mz, Int charge, const IsotopeDistribution &iso_dist)
 generate transitions (isotopic traces) for a peptide ion and add them to the library: More...
 
void addPeptideRT_ (TargetedExperiment::Peptide &peptide, double rt) const
 
void getRTRegions_ (ChargeMap &peptide_data, std::vector< RTRegion > &rt_regions, bool clear_IDs=true) const
 get regions in which peptide eludes (ideally only one) by clustering RT elution times More...
 
void annotateFeaturesFinalizeAssay_ (FeatureMap &features, std::map< Size, std::vector< PeptideIdentification * > > &feat_ids, RTMap &rt_internal)
 
void annotateFeatures_ (FeatureMap &features, PeptideRefRTMap &ref_rt_map)
 annotate identified features with m/z, isotope probabilities, etc. More...
 
void ensureConvexHulls_ (Feature &feature) const
 
void postProcess_ (FeatureMap &features, bool with_external_ids)
 
void statistics_ (const FeatureMap &features) const
 some statistics on detected features More...
 
void createAssayLibrary_ (const PeptideMap::iterator &begin, const PeptideMap::iterator &end, PeptideRefRTMap &ref_rt_map, bool clear_IDs=true)
 
void addPeptideToMap_ (PeptideIdentification &peptide, PeptideMap &peptide_map, bool external=false)
 
void checkNumObservations_ (Size n_pos, Size n_neg, const String &note="") const
 
void getUnbiasedSample_ (const std::multimap< double, std::pair< Size, bool > > &valid_obs, std::map< Size, double > &training_labels)
 
void getRandomSample_ (std::map< Size, double > &training_labels) const
 
void classifyFeatures_ (FeatureMap &features)
 
void filterFeaturesFinalizeAssay_ (Feature &best_feature, double best_quality, const double quality_cutoff)
 
void filterFeatures_ (FeatureMap &features, bool classified)
 
void calculateFDR_ (FeatureMap &features)
 
template<typename It >
std::vector< std::pair< It, It > > chunk_ (It range_from, It range_to, const std::ptrdiff_t batch_size)
 
- Protected Member Functions inherited from DefaultParamHandler
void defaultsToParam_ ()
 Updates the parameters after the defaults have been set in the constructor. More...
 

Protected Attributes

PeptideMap peptide_map_
 
Size n_internal_peps_
 number of internal peptide More...
 
Size n_external_peps_
 number of external peptides More...
 
Size batch_size_
 nr of peptides to use at the same time during chromatogram extraction More...
 
double rt_window_
 RT window width. More...
 
double mz_window_
 m/z window width More...
 
bool mz_window_ppm_
 m/z window width is given in PPM (not Da)? More...
 
double mapping_tolerance_
 RT tolerance for mapping IDs to features. More...
 
double isotope_pmin_
 min. isotope probability for peptide assay More...
 
Size n_isotopes_
 number of isotopes for peptide assay More...
 
double rt_quantile_
 
double peak_width_
 
double min_peak_width_
 
double signal_to_noise_
 
String elution_model_
 
double svm_min_prob_
 
StringList svm_predictor_names_
 
String svm_xval_out_
 
double svm_quality_cutoff
 
Size svm_n_parts_
 number of partitions for SVM cross-validation More...
 
Size svm_n_samples_
 number of samples for SVM training More...
 
String candidates_out_
 
Size debug_level_
 
struct OpenMS::FeatureFinderIdentificationAlgorithm::FeatureFilterQuality feature_filter_quality_
 
struct OpenMS::FeatureFinderIdentificationAlgorithm::FeatureFilterPeptides feature_filter_peptides_
 
struct OpenMS::FeatureFinderIdentificationAlgorithm::PeptideCompare peptide_compare_
 
struct OpenMS::FeatureFinderIdentificationAlgorithm::FeatureCompare feature_compare_
 
PeakMap ms_data_
 input LC-MS data More...
 
PeakMap chrom_data_
 accumulated chromatograms (XICs) More...
 
TargetedExperiment library_
 accumulated assays for peptides More...
 
bool quantify_decoys_
 
bool use_psm_cutoff_
 
double psm_score_cutoff_
 
std::vector< PeptideIdentificationunassignedIDs_
 
const double seed_rt_window_ = 60.0
 extraction window used for seeds (smaller than rt_window_ as we know the exact apex positions) More...
 
std::map< double, std::pair< Size, Size > > svm_probs_internal_
 SVM probability -> number of pos./neg. features (for FDR calculation): More...
 
std::multiset< double > svm_probs_external_
 SVM probabilities for "external" features (for FDR calculation): More...
 
Size n_internal_features_
 internal feature counter (for FDR calculation) More...
 
Size n_external_features_
 
TransformationDescription trafo_external_
 TransformationDescription trafo_; // RT transformation (to range 0-1) More...
 
std::map< String, double > isotope_probs_
 isotope probabilities of transitions More...
 
MRMFeatureFinderScoring feat_finder_
 OpenSWATH feature finder. More...
 
ProgressLogger prog_log_
 
- Protected Attributes inherited from DefaultParamHandler
Param param_
 Container for current parameters. More...
 
Param defaults_
 Container for default parameters. This member should be filled in the constructor of derived classes! More...
 
std::vector< Stringsubsections_
 Container for registered subsections. This member should be filled in the constructor of derived classes! More...
 
String error_name_
 Name that is displayed in error messages during the parameter checking. More...
 
bool check_defaults_
 If this member is set to false no checking if parameters in done;. More...
 
bool warn_empty_defaults_
 If this member is set to false no warning is emitted when defaults are empty;. More...
 

Additional Inherited Members

- Static Public Member Functions inherited from DefaultParamHandler
static void writeParametersToMetaValues (const Param &write_this, MetaInfoInterface &write_here, const String &key_prefix="")
 Writes all parameters to meta values. More...
 

Class Documentation

◆ OpenMS::FeatureFinderIdentificationAlgorithm::RTRegion

struct OpenMS::FeatureFinderIdentificationAlgorithm::RTRegion

region in RT in which a peptide elutes:

Collaboration diagram for FeatureFinderIdentificationAlgorithm::RTRegion:
[legend]
Class Members
double end
ChargeMap ids internal/external peptide IDs (per charge) in this region
double start

Member Typedef Documentation

◆ ChargeMap

typedef std::map<Int, std::pair<RTMap, RTMap> > ChargeMap
protected

mapping: charge -> internal/external: (RT -> pointer to peptide)

◆ MassTrace

◆ MassTraces

◆ PeptideMap

typedef std::map<AASequence, ChargeMap> PeptideMap
protected

mapping: sequence -> charge -> internal/external ID information

◆ PeptideRefRTMap

typedef std::map<String, std::pair<RTMap, RTMap> > PeptideRefRTMap
protected

mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)

◆ RTMap

typedef std::multimap<double, PeptideIdentification*> RTMap
protected

mapping: RT (not necessarily unique) -> pointer to peptide

Constructor & Destructor Documentation

◆ FeatureFinderIdentificationAlgorithm()

default constructor

Member Function Documentation

◆ addPeptideRT_()

void addPeptideRT_ ( TargetedExperiment::Peptide peptide,
double  rt 
) const
protected

◆ addPeptideToMap_()

void addPeptideToMap_ ( PeptideIdentification peptide,
PeptideMap peptide_map,
bool  external = false 
)
protected

CAUTION: This method stores a pointer to the given peptide reference in internals Make sure it stays valid until destruction of the class.

Todo:
find better solution

◆ annotateFeatures_()

void annotateFeatures_ ( FeatureMap features,
PeptideRefRTMap ref_rt_map 
)
protected

annotate identified features with m/z, isotope probabilities, etc.

◆ annotateFeaturesFinalizeAssay_()

void annotateFeaturesFinalizeAssay_ ( FeatureMap features,
std::map< Size, std::vector< PeptideIdentification * > > &  feat_ids,
RTMap rt_internal 
)
protected

◆ calculateFDR_()

void calculateFDR_ ( FeatureMap features)
protected

◆ checkNumObservations_()

void checkNumObservations_ ( Size  n_pos,
Size  n_neg,
const String note = "" 
) const
protected

◆ chunk_()

std::vector<std::pair<It,It> > chunk_ ( It  range_from,
It  range_to,
const std::ptrdiff_t  batch_size 
)
inlineprotected

Chunks an iterator range (allowing advance and distance) into batches of size batch_size. Last batch might be smaller.

◆ classifyFeatures_()

void classifyFeatures_ ( FeatureMap features)
protected

◆ createAssayLibrary_()

void createAssayLibrary_ ( const PeptideMap::iterator &  begin,
const PeptideMap::iterator &  end,
PeptideRefRTMap ref_rt_map,
bool  clear_IDs = true 
)
protected

creates an assay library out of the peptide sequences and their RT elution windows the PeptideMap is mutable since we clear it on-the-go

Parameters
clear_IDsset to false to keep IDs in internal charge maps (only needed for debugging purposes)

◆ ensureConvexHulls_()

void ensureConvexHulls_ ( Feature feature) const
protected

◆ filterFeatures_()

void filterFeatures_ ( FeatureMap features,
bool  classified 
)
protected

◆ filterFeaturesFinalizeAssay_()

void filterFeaturesFinalizeAssay_ ( Feature best_feature,
double  best_quality,
const double  quality_cutoff 
)
protected

◆ generateTransitions_()

void generateTransitions_ ( const String peptide_id,
double  mz,
Int  charge,
const IsotopeDistribution iso_dist 
)
protected

generate transitions (isotopic traces) for a peptide ion and add them to the library:

◆ getChromatograms() [1/2]

PeakMap& getChromatograms ( )

◆ getChromatograms() [2/2]

const PeakMap& getChromatograms ( ) const

◆ getLibrary() [1/2]

TargetedExperiment& getLibrary ( )

◆ getLibrary() [2/2]

const TargetedExperiment& getLibrary ( ) const

◆ getMSData() [1/2]

PeakMap& getMSData ( )

◆ getMSData() [2/2]

const PeakMap& getMSData ( ) const

◆ getProgressLogger() [1/2]

ProgressLogger& getProgressLogger ( )

◆ getProgressLogger() [2/2]

const ProgressLogger& getProgressLogger ( ) const

◆ getRandomSample_()

void getRandomSample_ ( std::map< Size, double > &  training_labels) const
protected

◆ getRTRegions_()

void getRTRegions_ ( ChargeMap peptide_data,
std::vector< RTRegion > &  rt_regions,
bool  clear_IDs = true 
) const
protected

get regions in which peptide eludes (ideally only one) by clustering RT elution times

◆ getUnbiasedSample_()

void getUnbiasedSample_ ( const std::multimap< double, std::pair< Size, bool > > &  valid_obs,
std::map< Size, double > &  training_labels 
)
protected

◆ postProcess_()

void postProcess_ ( FeatureMap features,
bool  with_external_ids 
)
protected

◆ run()

void run ( std::vector< PeptideIdentification peptides,
const std::vector< ProteinIdentification > &  proteins,
std::vector< PeptideIdentification peptides_ext,
std::vector< ProteinIdentification proteins_ext,
FeatureMap features,
const FeatureMap seeds = FeatureMap(),
const String spectra_file = "" 
)

Main method for actual FeatureFinder External IDs (peptides_ext, proteins_ext) may be empty, in which case no machine learning or FDR estimation will be performed. Optional seeds from e.g. untargeted FeatureFinders can be added with seeds. Results will be written to features. Note: The primaryMSRunPath of features will be updated to the primaryMSRunPath stored in the MSExperiment. If that path is not a valid and readable mzML spectra_file will be annotated as a fall-back. Caution: peptide IDs will be shrunk to best hit, FFid metavalues added and potential seed IDs added.

◆ runOnCandidates()

void runOnCandidates ( FeatureMap features)

◆ setMSData() [1/2]

void setMSData ( const PeakMap ms_data)

set the MS data used for feature detection

◆ setMSData() [2/2]

void setMSData ( PeakMap &&  ms_data)

◆ statistics_()

void statistics_ ( const FeatureMap features) const
protected

some statistics on detected features

◆ updateMembers_()

void updateMembers_ ( )
overrideprotectedvirtual

This method is used to update extra member variables at the end of the setParameters() method.

Also call it at the end of the derived classes' copy constructor and assignment operator.

The default implementation is empty.

Reimplemented from DefaultParamHandler.

Member Data Documentation

◆ batch_size_

Size batch_size_
protected

nr of peptides to use at the same time during chromatogram extraction

◆ candidates_out_

String candidates_out_
protected

◆ chrom_data_

PeakMap chrom_data_
protected

accumulated chromatograms (XICs)

◆ debug_level_

Size debug_level_
protected

◆ elution_model_

String elution_model_
protected

◆ feat_finder_

MRMFeatureFinderScoring feat_finder_
protected

OpenSWATH feature finder.

◆ feature_compare_

◆ feature_filter_peptides_

◆ feature_filter_quality_

◆ isotope_pmin_

double isotope_pmin_
protected

min. isotope probability for peptide assay

◆ isotope_probs_

std::map<String, double> isotope_probs_
protected

isotope probabilities of transitions

◆ library_

TargetedExperiment library_
protected

accumulated assays for peptides

◆ mapping_tolerance_

double mapping_tolerance_
protected

RT tolerance for mapping IDs to features.

◆ min_peak_width_

double min_peak_width_
protected

◆ ms_data_

PeakMap ms_data_
protected

input LC-MS data

◆ mz_window_

double mz_window_
protected

m/z window width

◆ mz_window_ppm_

bool mz_window_ppm_
protected

m/z window width is given in PPM (not Da)?

◆ n_external_features_

Size n_external_features_
protected

external feature counter (for FDR calculation)

◆ n_external_peps_

Size n_external_peps_
protected

number of external peptides

◆ n_internal_features_

Size n_internal_features_
protected

internal feature counter (for FDR calculation)

◆ n_internal_peps_

Size n_internal_peps_
protected

number of internal peptide

◆ n_isotopes_

Size n_isotopes_
protected

number of isotopes for peptide assay

◆ peak_width_

double peak_width_
protected

◆ peptide_compare_

◆ peptide_map_

PeptideMap peptide_map_
protected

◆ prog_log_

ProgressLogger prog_log_
protected

◆ psm_score_cutoff_

double psm_score_cutoff_
protected

◆ quantify_decoys_

bool quantify_decoys_
protected

◆ rt_quantile_

double rt_quantile_
protected

◆ rt_window_

double rt_window_
protected

RT window width.

◆ seed_rt_window_

const double seed_rt_window_ = 60.0
protected

extraction window used for seeds (smaller than rt_window_ as we know the exact apex positions)

◆ signal_to_noise_

double signal_to_noise_
protected

◆ svm_min_prob_

double svm_min_prob_
protected

◆ svm_n_parts_

Size svm_n_parts_
protected

number of partitions for SVM cross-validation

◆ svm_n_samples_

Size svm_n_samples_
protected

number of samples for SVM training

◆ svm_predictor_names_

StringList svm_predictor_names_
protected

◆ svm_probs_external_

std::multiset<double> svm_probs_external_
protected

SVM probabilities for "external" features (for FDR calculation):

◆ svm_probs_internal_

std::map<double, std::pair<Size, Size> > svm_probs_internal_
protected

SVM probability -> number of pos./neg. features (for FDR calculation):

◆ svm_quality_cutoff

double svm_quality_cutoff
protected

◆ svm_xval_out_

String svm_xval_out_
protected

◆ trafo_external_

TransformationDescription trafo_external_
protected

TransformationDescription trafo_; // RT transformation (to range 0-1)

transform. to external RT scale

◆ unassignedIDs_

std::vector<PeptideIdentification> unassignedIDs_
protected

◆ use_psm_cutoff_

bool use_psm_cutoff_
protected