|
OpenMS
2.5.0
|
Go to the documentation of this file.
35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H
52 class IsotopeDistribution;
67 std::vector<PeptideIdentification> peptides,
68 const std::vector<ProteinIdentification>& proteins,
69 std::vector<PeptideIdentification> peptides_ext,
70 std::vector<ProteinIdentification> proteins_ext,
94 typedef std::multimap<double, PeptideIdentification*>
RTMap;
96 typedef std::map<Int, std::pair<RTMap, RTMap> >
ChargeMap;
138 void updateMembers_()
override;
154 } feature_filter_quality_;
163 } feature_filter_peptides_;
171 const String& seq1 = p1.
getHits()[0].getSequence().toString();
172 const String& seq2 = p2.
getHits()[0].getSequence().toString();
177 if (charge1 == charge2)
181 return charge1 < charge2;
220 void generateTransitions_(
const String& peptide_id,
double mz,
Int charge,
226 void getRTRegions_(
ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions)
const;
228 void annotateFeaturesFinalizeAssay_(
230 std::map<
Size, std::vector<PeptideIdentification*> >& feat_ids,
236 void ensureConvexHulls_(
Feature& feature);
238 void postProcess_(
FeatureMap& features,
bool with_external_ids);
241 void statistics_(
const FeatureMap& features)
const;
245 void createAssayLibrary_(
const PeptideMap::iterator& begin,
const PeptideMap::iterator& end,
PeptideRefRTMap& ref_rt_map);
249 bool external =
false)
const;
251 void checkNumObservations_(
Size n_pos,
Size n_neg,
const String& note =
"")
const;
253 void getUnbiasedSample_(
const std::multimap<
double, std::pair<Size, bool> >& valid_obs,
254 std::map<Size, Int>& training_labels);
256 void getRandomSample_(std::map<Size, Int>& training_labels);
260 void filterFeaturesFinalizeAssay_(
Feature& best_feature,
double best_quality,
261 const double quality_cutoff);
263 void filterFeatures_(
FeatureMap& features,
bool classified);
269 template <
typename It>
270 std::vector<std::pair<It,It>>
271 chunk_(It range_from, It range_to,
const std::ptrdiff_t batch_size)
276 using std::make_pair;
278 using diff_t = std::ptrdiff_t;
281 const diff_t total {distance(range_from, range_to)};
282 const diff_t num {total / batch_size};
284 vector<pair<It,It>> chunks(num);
286 It batch_end {range_from};
289 std::generate(begin(chunks), end(chunks), [&batch_end, batch_size]()
291 It batch_start {batch_end };
293 std::advance(batch_end, batch_size);
294 return make_pair(batch_start, batch_end);
300 chunks.emplace_back(range_from, range_to);
304 chunks.back().second = range_to;
void addMSLevel(int level)
adds a desired MS level for peaks to load
std::map< String, double > isotope_probs_
isotope probabilities of transitions
Definition: FeatureFinderIdentificationAlgorithm.h:214
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:148
TargetedExperiment library_
accumulated assays for peptides
Definition: FeatureFinderIdentificationAlgorithm.h:204
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:117
Management and storage of parameters / INI files.
Definition: Param.h:73
PeakMap & getChromatograms()
Definition: FeatureFinderIdentificationAlgorithm.h:80
Size n_internal_peps_
number of internal peptide
Definition: FeatureFinderIdentificationAlgorithm.h:104
void store(const String &filename, const TargetedExperiment &id) const
Stores a map in a TraML file.
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:68
Definition: FeatureFinderIdentificationAlgorithm.h:56
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:94
ProgressLogger & getProgressLogger()
Definition: FeatureFinderIdentificationAlgorithm.h:83
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:109
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:98
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:119
Size batch_size_
nr of peptides to use at the same time during chromatogram extraction
Definition: FeatureFinderIdentificationAlgorithm.h:107
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:134
void store(const String &filename, const FeatureMap &feature_map)
stores the map feature_map in file with name filename.
PeakMap chrom_data_
accumulated chromatograms (XICs)
Definition: FeatureFinderIdentificationAlgorithm.h:203
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:100
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:61
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:168
A container for features.
Definition: FeatureMap.h:95
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:127
PeakMap & getMSData()
Definition: FeatureFinderIdentificationAlgorithm.h:77
void clear(bool clear_meta_data)
Clears all data and meta data.
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:79
TargetedExperiment & getLibrary()
Definition: FeatureFinderIdentificationAlgorithm.h:86
double start
Definition: FeatureFinderIdentificationAlgorithm.h:143
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:208
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
returns a const reference to the PeptideIdentification vector
void insert(const String &prefix, const Param ¶m)
double isotope_pmin_
min. isotope probability for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:114
Base class for TOPP applications.
Definition: TOPPBase.h:144
Data filtering or extraction.
Definition: DataProcessing.h:71
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:93
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
double rt_window_
RT window width.
Definition: FeatureFinderIdentificationAlgorithm.h:108
An LC-MS feature.
Definition: Feature.h:70
File adapter for HUPO PSI TraML files.
Definition: TraMLFile.h:63
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:141
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
Quantitation.
Definition: DataProcessing.h:72
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:128
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:91
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:136
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:209
Size svm_n_samples_
number of samples for SVM training
Definition: FeatureFinderIdentificationAlgorithm.h:131
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:150
void setPrimaryMSRunPath(const StringList &s)
set the file path to the primary MS run (usually the mzML file obtained after data conversion from ra...
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Size n_isotopes_
number of isotopes for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:115
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:126
std::vector< std::pair< It, It > > chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
Definition: FeatureFinderIdentificationAlgorithm.h:271
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:157
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:96
const TargetedExperiment & getLibrary() const
Definition: FeatureFinderIdentificationAlgorithm.h:87
void setParameters(const Param ¶m)
Sets the parameters.
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:211
ChargeMap ids
internal/external peptide IDs (per charge) in this region
Definition: FeatureFinderIdentificationAlgorithm.h:144
const ProgressLogger & getProgressLogger() const
Definition: FeatureFinderIdentificationAlgorithm.h:84
bool mz_window_ppm_
m/z window width is given in PPM (not Da)?
Definition: FeatureFinderIdentificationAlgorithm.h:110
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
Size n_external_peps_
number of external peptides
Definition: FeatureFinderIdentificationAlgorithm.h:105
void runOnCandidates(FeatureMap &features)
void remove(const String &key)
Remove the entry key or a section key (when suffix is ':')
PeakMap ms_data_
input LC-MS data
Definition: FeatureFinderIdentificationAlgorithm.h:202
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:159
void run(std::vector< PeptideIdentification > peptides, const std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > peptides_ext, std::vector< ProteinIdentification > proteins_ext, FeatureMap &features, const FeatureMap &seeds=FeatureMap())
void store(const String &filename, const PeakMap &map) const
Stores a map in an MzML file.
const Param & getDefaults() const
Non-mutable access to the default parameters.
Size n_internal_features_
internal feature counter (for FDR calculation)
Definition: FeatureFinderIdentificationAlgorithm.h:210
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:62
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
const PeakMap & getMSData() const
Definition: FeatureFinderIdentificationAlgorithm.h:78
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:207
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
Used to load and store idXML files.
Definition: IdXMLFile.h:63
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:121
File adapter for MzML files.
Definition: MzMLFile.h:55
A more convenient string class.
Definition: String.h:58
Definition: IsotopeDistribution.h:64
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:188
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
MRMFeatureFinderScoring feat_finder_
OpenSWATH feature finder.
Definition: FeatureFinderIdentificationAlgorithm.h:215
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:190
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:102
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
double mapping_tolerance_
RT tolerance for mapping IDs to features.
Definition: FeatureFinderIdentificationAlgorithm.h:112
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size svm_n_parts_
number of partitions for SVM cross-validation
Definition: FeatureFinderIdentificationAlgorithm.h:130
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:120
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:123
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:129
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:166
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:73
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:90
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:217
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:370
PeakFileOptions & getOptions()
Mutable access to the options for loading/storing.
const PeakMap & getChromatograms() const
Definition: FeatureFinderIdentificationAlgorithm.h:81
double mz_window_
m/z window width
Definition: FeatureFinderIdentificationAlgorithm.h:109