35 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H 36 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_FEATUREFINDERIDENTIFICATIONALGORITHM_H 52 class IsotopeDistribution;
76 std::vector<PeptideIdentification> peptides,
77 const std::vector<ProteinIdentification>& proteins,
78 std::vector<PeptideIdentification> peptides_ext,
79 std::vector<ProteinIdentification> proteins_ext,
82 const String& spectra_file =
"" 88 const PeakMap& getMSData()
const;
91 void setMSData(
const PeakMap& ms_data);
92 void setMSData(
PeakMap&& ms_data);
95 const PeakMap& getChromatograms()
const;
108 typedef std::multimap<double, PeptideIdentification*>
RTMap;
110 typedef std::map<Int, std::pair<RTMap, RTMap> >
ChargeMap;
152 void updateMembers_()
override;
168 } feature_filter_quality_;
177 } feature_filter_peptides_;
185 const String& seq1 = p1.
getHits()[0].getSequence().toString();
186 const String& seq2 = p2.
getHits()[0].getSequence().toString();
191 if (charge1 == charge2)
195 return charge1 < charge2;
225 const double seed_rt_window_ = 60.0;
241 void generateTransitions_(
const String& peptide_id,
double mz,
Int charge,
247 void getRTRegions_(
ChargeMap& peptide_data, std::vector<RTRegion>& rt_regions,
bool clear_IDs =
true)
const;
249 void annotateFeaturesFinalizeAssay_(
251 std::map<
Size, std::vector<PeptideIdentification*> >& feat_ids,
257 void ensureConvexHulls_(
Feature& feature)
const;
259 void postProcess_(
FeatureMap& features,
bool with_external_ids);
262 void statistics_(
const FeatureMap& features)
const;
267 void createAssayLibrary_(
const PeptideMap::iterator& begin,
const PeptideMap::iterator& end,
PeptideRefRTMap& ref_rt_map,
bool clear_IDs =
true);
274 bool external =
false);
276 void checkNumObservations_(
Size n_pos,
Size n_neg,
const String& note =
"")
const;
278 void getUnbiasedSample_(
const std::multimap<
double, std::pair<Size, bool> >& valid_obs,
279 std::map<Size, double>& training_labels);
281 void getRandomSample_(std::map<Size, double>& training_labels)
const;
285 void filterFeaturesFinalizeAssay_(
Feature& best_feature,
double best_quality,
286 const double quality_cutoff);
288 void filterFeatures_(
FeatureMap& features,
bool classified);
294 template <
typename It>
295 std::vector<std::pair<It,It>>
296 chunk_(It range_from, It range_to,
const std::ptrdiff_t batch_size)
301 using std::make_pair;
303 using diff_t = std::ptrdiff_t;
306 const diff_t total {distance(range_from, range_to)};
307 const diff_t num {total / batch_size};
309 vector<pair<It,It>> chunks(num);
311 It batch_end {range_from};
314 std::generate(begin(chunks), end(chunks), [&batch_end, batch_size]()
316 It batch_start {batch_end };
318 std::advance(batch_end, batch_size);
319 return make_pair(batch_start, batch_end);
325 chunks.emplace_back(range_from, range_to);
329 chunks.back().second = range_to;
QualityType getOverallQuality() const
Non-mutable access to the overall quality.
double svm_quality_cutoff
Definition: FeatureFinderIdentificationAlgorithm.h:143
std::multimap< double, PeptideIdentification * > RTMap
mapping: RT (not necessarily unique) -> pointer to peptide
Definition: FeatureFinderIdentificationAlgorithm.h:108
std::vector< PeptideIdentification > unassignedIDs_
Definition: FeatureFinderIdentificationAlgorithm.h:223
A more convenient string class.
Definition: String.h:58
region in RT in which a peptide elutes:
Definition: FeatureFinderIdentificationAlgorithm.h:155
bool operator()(const PeptideIdentification &p1, const PeptideIdentification &p2)
Definition: FeatureFinderIdentificationAlgorithm.h:182
double mapping_tolerance_
RT tolerance for mapping IDs to features.
Definition: FeatureFinderIdentificationAlgorithm.h:126
Size n_internal_features_
internal feature counter (for FDR calculation)
Definition: FeatureFinderIdentificationAlgorithm.h:231
bool operator()(const Feature &f1, const Feature &f2)
Definition: FeatureFinderIdentificationAlgorithm.h:204
Helper struct for a collection of mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:109
A container for features.
Definition: FeatureMap.h:98
const std::vector< PeptideHit > & getHits() const
returns the peptide hits as const
Definition: FeatureFinderIdentificationAlgorithm.h:56
The MRMFeatureFinder finds and scores peaks of transitions that co-elute.
Definition: MRMFeatureFinderScoring.h:89
Definition: IsotopeDistribution.h:64
PeakMap chrom_data_
accumulated chromatograms (XICs)
Definition: FeatureFinderIdentificationAlgorithm.h:217
FeatureFinderAlgorithmPickedHelperStructs::MassTraces MassTraces
Definition: FeatureFinderIdentificationAlgorithm.h:105
StringList svm_predictor_names_
Definition: FeatureFinderIdentificationAlgorithm.h:141
const std::vector< PeptideIdentification > & getPeptideIdentifications() const
double min_peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:134
Size n_internal_peps_
number of internal peptide
Definition: FeatureFinderIdentificationAlgorithm.h:118
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:173
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Size n_isotopes_
number of isotopes for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:129
String elution_model_
Definition: FeatureFinderIdentificationAlgorithm.h:137
Size n_external_features_
Definition: FeatureFinderIdentificationAlgorithm.h:232
predicate for filtering features by assigned peptides:
Definition: FeatureFinderIdentificationAlgorithm.h:171
Size batch_size_
nr of peptides to use at the same time during chromatogram extraction
Definition: FeatureFinderIdentificationAlgorithm.h:121
bool mz_window_ppm_
m/z window width is given in PPM (not Da)?
Definition: FeatureFinderIdentificationAlgorithm.h:124
PeakMap ms_data_
input LC-MS data
Definition: FeatureFinderIdentificationAlgorithm.h:216
std::map< AASequence, ChargeMap > PeptideMap
mapping: sequence -> charge -> internal/external ID information
Definition: FeatureFinderIdentificationAlgorithm.h:112
double psm_score_cutoff_
Definition: FeatureFinderIdentificationAlgorithm.h:222
double rt_quantile_
Definition: FeatureFinderIdentificationAlgorithm.h:131
std::vector< std::pair< It, It > > chunk_(It range_from, It range_to, const std::ptrdiff_t batch_size)
Definition: FeatureFinderIdentificationAlgorithm.h:296
String svm_xval_out_
Definition: FeatureFinderIdentificationAlgorithm.h:142
double isotope_pmin_
min. isotope probability for peptide assay
Definition: FeatureFinderIdentificationAlgorithm.h:128
double rt_window_
RT window width.
Definition: FeatureFinderIdentificationAlgorithm.h:122
double getRT() const
returns the RT of the MS2 spectrum where the identification occurred
std::map< double, std::pair< Size, Size > > svm_probs_internal_
SVM probability -> number of pos./neg. features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:228
MRMFeatureFinderScoring feat_finder_
OpenSWATH feature finder.
Definition: FeatureFinderIdentificationAlgorithm.h:236
ProgressLogger prog_log_
Definition: FeatureFinderIdentificationAlgorithm.h:238
Size svm_n_parts_
number of partitions for SVM cross-validation
Definition: FeatureFinderIdentificationAlgorithm.h:144
std::map< String, double > isotope_probs_
isotope probabilities of transitions
Definition: FeatureFinderIdentificationAlgorithm.h:235
predicate for filtering features by overall quality:
Definition: FeatureFinderIdentificationAlgorithm.h:162
An LC-MS feature.
Definition: Feature.h:70
double peak_width_
Definition: FeatureFinderIdentificationAlgorithm.h:133
double start
Definition: FeatureFinderIdentificationAlgorithm.h:157
String candidates_out_
Definition: FeatureFinderIdentificationAlgorithm.h:148
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
TargetedExperiment library_
accumulated assays for peptides
Definition: FeatureFinderIdentificationAlgorithm.h:218
ChargeMap ids
internal/external peptide IDs (per charge) in this region
Definition: FeatureFinderIdentificationAlgorithm.h:158
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
CoordinateType getRT() const
Returns the RT coordinate (index 0)
Definition: Peak2D.h:210
bool quantify_decoys_
Definition: FeatureFinderIdentificationAlgorithm.h:220
PeptideMap peptide_map_
Definition: FeatureFinderIdentificationAlgorithm.h:116
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
FeatureFinderAlgorithmPickedHelperStructs::MassTrace MassTrace
Definition: FeatureFinderIdentificationAlgorithm.h:104
comparison functor for (unassigned) peptide IDs
Definition: FeatureFinderIdentificationAlgorithm.h:180
comparison functor for features
Definition: FeatureFinderIdentificationAlgorithm.h:202
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
bool operator()(const Feature &feature)
Definition: FeatureFinderIdentificationAlgorithm.h:164
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
Size n_external_peps_
number of external peptides
Definition: FeatureFinderIdentificationAlgorithm.h:119
double signal_to_noise_
Definition: FeatureFinderIdentificationAlgorithm.h:135
double svm_min_prob_
Definition: FeatureFinderIdentificationAlgorithm.h:140
Size svm_n_samples_
number of samples for SVM training
Definition: FeatureFinderIdentificationAlgorithm.h:145
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
std::map< Int, std::pair< RTMap, RTMap > > ChargeMap
mapping: charge -> internal/external: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:110
std::map< String, std::pair< RTMap, RTMap > > PeptideRefRTMap
mapping: peptide ref. -> int./ext.: (RT -> pointer to peptide)
Definition: FeatureFinderIdentificationAlgorithm.h:114
int Int
Signed integer type.
Definition: Types.h:102
double mz_window_
m/z window width
Definition: FeatureFinderIdentificationAlgorithm.h:123
std::multiset< double > svm_probs_external_
SVM probabilities for "external" features (for FDR calculation):
Definition: FeatureFinderIdentificationAlgorithm.h:230
Size debug_level_
Definition: FeatureFinderIdentificationAlgorithm.h:150
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
Helper struct for mass traces used in FeatureFinderAlgorithmPicked.
Definition: FeatureFinderAlgorithmPickedHelperStructs.h:79
Represents a peptide (amino acid sequence)
Definition: TargetedExperimentHelper.h:358
bool use_psm_cutoff_
Definition: FeatureFinderIdentificationAlgorithm.h:221