28class TheoreticalSpectrumGenerator;
75 bool is_open_search =
false;
118 std::vector<FASTAFile::FASTAEntry>
db;
130 bool release_fragment_index_after_scoring =
false;
155 const std::string& in_db,
156 std::vector<ProteinIdentification>& prot_ids,
205 const std::string& in_db,
206 const std::string& output_base_name =
"")
const;
226 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
227 std::vector<ProteinIdentification>& prot_ids,
272 std::vector<ProteinIdentification>& prot_ids,
286 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
287 const std::string& output_base_name =
"")
const;
316 const std::vector<std::string>& in_spectra_files,
317 const std::vector<FASTAFile::FASTAEntry>& fasta_db,
318 const std::vector<std::string>& output_base_names = {},
319 const std::string& aggregate_base_name =
"")
const;
332 const std::vector<std::string>& in_spectra_files,
333 const std::string& in_db,
334 const std::vector<std::string>& output_base_names = {},
335 const std::string& aggregate_base_name =
"")
const;
350 double delta_mass = 0.0;
351 float prefix_fraction = 0;
352 float suffix_fraction = 0;
353 float mean_error = 0.0f;
354 int isotope_error = 0;
355 uint16_t applied_charge = 0;
356 uint16_t matched_prefix_ions = 0;
357 uint16_t matched_suffix_ions = 0;
377 const std::vector<FASTAFile::FASTAEntry>& fasta_db)
const;
393 const std::vector<FASTAFile::FASTAEntry>& full_db)
const;
409 std::vector<FASTAFile::FASTAEntry>& full_db,
410 std::vector<ProteinIdentification>& protein_ids,
424 const std::vector<FASTAFile::FASTAEntry>& db,
426 double effective_fragment_tol,
427 bool fragment_mass_tolerance_unit_ppm,
428 bool open_search_mode,
429 std::vector<std::vector<AnnotatedHit_>>& annotated_hits,
430 const std::string& progress_label)
const;
457 std::vector<std::vector<ProSEAlgorithm::AnnotatedHit_> >& annotated_hits,
458 std::vector<ProteinIdentification>& protein_ids,
463 Int peptide_missed_cleavages,
464 double precursor_mass_tolerance,
465 double fragment_mass_tolerance,
466 const std::string& precursor_mass_tolerance_unit_ppm,
467 const std::string& fragment_mass_tolerance_unit_ppm,
468 const Int precursor_min_charge,
469 const Int precursor_max_charge,
470 const std::string& enzyme,
471 const std::string& database_name)
const;
476 mutable double precursor_mass_tolerance_lower_{20.0};
477 mutable double precursor_mass_tolerance_upper_{20.0};
478 std::string precursor_mass_tolerance_unit_{
"ppm"};
500 double fdr_psm_{0.0};
501 double fdr_protein_{0.0};
514 bool add_a_ions_{
false};
515 bool add_b_ions_{
true};
516 bool add_c_ions_{
false};
517 bool add_x_ions_{
false};
518 bool add_y_ions_{
true};
519 bool add_z_ions_{
false};
523 bool calibration_enabled_{
false};
524 double calibration_subset_ratio_{0.1};
525 Size calibration_min_psms_{50};
536 double precursor_shift{0};
537 double precursor_spread{0};
540 double fragment_tolerance{0};
541 double fragment_shift{0};
542 bool extreme_bias{
false};
558 mutable double last_mod_match_tolerance_used_{-1.0};
572 if (precursor_mass_tolerance_lower_ <= 0.0)
return precursor_mass_tolerance_upper_;
573 if (precursor_mass_tolerance_upper_ <= 0.0)
return precursor_mass_tolerance_lower_;
574 return std::min(precursor_mass_tolerance_lower_, precursor_mass_tolerance_upper_);
591 const std::vector<FASTAFile::FASTAEntry>& db)
const;
595 const std::string& output_base_name)
const;
599 const std::vector<ProteinIdentification>& protein_ids,
605 return FragmentIndex::isOpenSearchMode(precursor_mass_tolerance_lower_,
606 precursor_mass_tolerance_upper_,
607 precursor_mass_tolerance_unit_ ==
"ppm");
Representation of a peptide/protein sequence.
Definition AASequence.h:88
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition FragmentIndex.h:35
In-Memory representation of a mass spectrometry run.
Definition MSExperiment.h:49
Combined result of open search modification analysis.
Definition OpenSearchModificationAnalysis.h:104
Container for peptide identifications from multiple spectra.
Definition PeptideIdentificationList.h:66
Fragment-index-based peptide database search algorithm (experimental).
Definition ProSEAlgorithm.h:47
ExitCodes search(PeakMap &spectra, const std::vector< FASTAFile::FASTAEntry > &fasta_db, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
In-memory search: search spectra against a protein database without file I/O.
static void preprocessSpectra_(PeakMap &exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm)
filter, deisotope, decharge spectra
std::string enzyme_
Definition ProSEAlgorithm.h:495
FragmentIndex fragment_index
Definition ProSEAlgorithm.h:119
void logSearchDiagnostics_(const PeakMap &spectra, const std::vector< ProteinIdentification > &protein_ids, const PeptideIdentificationList &peptide_ids) const
Helper: log search summary statistics and per-run tolerance estimation.
Size peptide_max_size_
Definition ProSEAlgorithm.h:506
SearchResult searchWithModificationAnalysis(const std::string &in_spectra, const std::string &in_db, const std::string &output_base_name="") const
Search with comprehensive results including modification analysis tables.
Size precursor_max_charge_
Definition ProSEAlgorithm.h:481
Size precursor_min_charge_
Definition ProSEAlgorithm.h:480
std::string decoy_prefix_
Definition ProSEAlgorithm.h:498
Size report_top_hits_
Definition ProSEAlgorithm.h:512
Size modifications_max_variable_mods_per_peptide_
Definition ProSEAlgorithm.h:493
SearchResult searchWithModificationAnalysis(PeakMap &spectra, const std::vector< FASTAFile::FASTAEntry > &fasta_db, const std::string &output_base_name="") const
In-memory search with modification analysis: no file I/O required.
std::string peptide_motif_
Definition ProSEAlgorithm.h:510
StringList modifications_fixed_
Definition ProSEAlgorithm.h:489
CalibrationResult_ runCalibrationPass_(PeakMap &spectra, FragmentIndex &fragment_index, const std::vector< FASTAFile::FASTAEntry > &db) const
Run a fast calibration pass on a subset of spectra to estimate mass accuracy.
void postProcessHits_(const PeakMap &exp, std::vector< std::vector< ProSEAlgorithm::AnnotatedHit_ > > &annotated_hits, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids, Size top_hits, const StringList &modifications_fixed, const StringList &modifications_variable, Int peptide_missed_cleavages, double precursor_mass_tolerance, double fragment_mass_tolerance, const std::string &precursor_mass_tolerance_unit_ppm, const std::string &fragment_mass_tolerance_unit_ppm, const Int precursor_min_charge, const Int precursor_max_charge, const std::string &enzyme, const std::string &database_name) const
Filter and annotate search results.
ExitCodes searchChunked_(PeakMap &spectra, std::vector< FASTAFile::FASTAEntry > &full_db, std::vector< ProteinIdentification > &protein_ids, PeptideIdentificationList &peptide_ids) const
Chunked database search implementation.
std::vector< ProteinIdentification > protein_ids
Definition ProSEAlgorithm.h:72
Size peptide_min_size_
Definition ProSEAlgorithm.h:505
SearchResult aggregate
Definition ProSEAlgorithm.h:105
IntList precursor_isotopes_
Definition ProSEAlgorithm.h:483
bool decoys_
Definition ProSEAlgorithm.h:497
MultiFileSearchResult searchWithModificationAnalysis(const std::vector< std::string > &in_spectra_files, const std::string &in_db, const std::vector< std::string > &output_base_names={}, const std::string &aggregate_base_name="") const
Multi-file search with modification analysis (FASTA file path).
MultiFileSearchResult searchWithModificationAnalysis(const std::vector< std::string > &in_spectra_files, const std::vector< FASTAFile::FASTAEntry > &fasta_db, const std::vector< std::string > &output_base_names={}, const std::string &aggregate_base_name="") const
Multi-file search with modification analysis (in-memory FASTA).
std::string fragment_mass_tolerance_unit_
Definition ProSEAlgorithm.h:487
StringList annotate_psm_
Definition ProSEAlgorithm.h:503
OpenSearchModificationAnalysis::OpenSearchAnalysisResult modification_analysis
Definition ProSEAlgorithm.h:74
ExitCodes search(const std::string &in_spectra, const std::string &in_db, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
Search spectra in a spectrum file (mzML or Bruker .d) against a protein database using an FI-backed w...
std::vector< FASTAFile::FASTAEntry > db
Definition ProSEAlgorithm.h:118
ExitCodes search(PeakMap &spectra, SearchContext &ctx, std::vector< ProteinIdentification > &prot_ids, PeptideIdentificationList &pep_ids) const
In-memory search using a pre-built SearchContext.
bool isOpenSearchMode_() const
Helper function to determine if open search should be used based on tolerance.
Definition ProSEAlgorithm.h:603
std::vector< FASTAFile::FASTAEntry > buildCalibrationSample_(const std::vector< FASTAFile::FASTAEntry > &full_db) const
Build a strided protein sample for chunked calibration.
PeptideIdentificationList peptide_ids
Definition ProSEAlgorithm.h:73
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
ExitCodes
Exit codes.
Definition ProSEAlgorithm.h:53
StringList modifications_variable_
Definition ProSEAlgorithm.h:491
std::vector< SearchResult > per_file
Definition ProSEAlgorithm.h:104
double fragment_mass_tolerance_
Definition ProSEAlgorithm.h:485
SearchContext prepareContext(const std::vector< FASTAFile::FASTAEntry > &fasta_db) const
Build a SearchContext (decoy-augmented database + FragmentIndex) for reuse.
void scoreSpectraAgainstIndex_(const PeakMap &spectra, FragmentIndex &fi, const std::vector< FASTAFile::FASTAEntry > &db, const TheoreticalSpectrumGenerator &spectrum_generator, double effective_fragment_tol, bool fragment_mass_tolerance_unit_ppm, bool open_search_mode, std::vector< std::vector< AnnotatedHit_ > > &annotated_hits, const std::string &progress_label) const
Score all spectra against one FragmentIndex.
Size peptide_missed_cleavages_
Definition ProSEAlgorithm.h:507
CalibrationResult_ last_calibration_result_
Definition ProSEAlgorithm.h:549
double computeModMatchTolerance_() const
Definition ProSEAlgorithm.h:570
std::vector< FASTAFile::FASTAEntry > buildDecoyAugmentedDB_(const std::vector< FASTAFile::FASTAEntry > &fasta_db) const
Build a decoy-augmented copy of the input FASTA.
void logModificationAnalysisSummary_(const SearchResult &result, const std::string &output_base_name) const
Helper: log the modification analysis summary (shared by in-memory and file-based paths)
Result of a calibration pass.
Definition ProSEAlgorithm.h:535
Multi-file search result bundle.
Definition ProSEAlgorithm.h:103
Prepared per-database state shared across multiple spectrum files.
Definition ProSEAlgorithm.h:117
Comprehensive search result including modification analysis.
Definition ProSEAlgorithm.h:70
Base class for all classes that want to report their progress.
Definition ProgressLogger.h:27
Generates theoretical spectra for peptides with various options.
Definition TheoreticalSpectrumGenerator.h:45
int Int
Signed integer type.
Definition Types.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< Int > IntList
Vector of signed integers.
Definition ListUtils.h:29
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
Slimmer structure as storing all scored candidates in PeptideHit objects takes too much space.
Definition ProSEAlgorithm.h:342
static bool hasBetterScore(const AnnotatedHit_ &a, const AnnotatedHit_ &b)
Definition ProSEAlgorithm.h:359
double score
main score
Definition ProSEAlgorithm.h:349
AASequence sequence
Definition ProSEAlgorithm.h:343