OpenMS
Loading...
Searching...
No Matches
FragmentIndex.h
Go to the documentation of this file.
1// Copyright (c) 2002-present, OpenMS Inc. -- EKU Tuebingen, ETH Zurich, and FU Berlin
2// SPDX-License-Identifier: BSD-3-Clause
3//
4// --------------------------------------------------------------------------
5// $Maintainer: $
6// $Authors: $
7// --------------------------------------------------------------------------
8
9#pragma once
10
18
19
20#include <array>
21#include <mutex>
22#include <vector>
23#include <functional>
24#include <algorithm> // std::max (used by inline static isOpenSearchMode)
25
26namespace OpenMS
27{
34 class OPENMS_DLLAPI FragmentIndex : public DefaultParamHandler
35 {
36 public:
37
38
52 struct Peptide {
53
54 // We need a constructor in order to emplace back
55 Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair<uint16_t , uint16_t> sequence, float precursor_mz):
56 protein_idx(protein_idx),
57 mod_bitmask_(mod_bitmask),
58 sequence_(sequence),
59 precursor_mz_(precursor_mz)
60 {}
61
63 uint32_t mod_bitmask_;
64 std::pair<uint16_t , uint16_t> sequence_;
66 };
67
72 {
73 uint32_t num_matched_{};
74 uint32_t subset_bitmask_{};
75 float sigma_delta_{};
76 uint16_t precursor_charge_{};
77 int16_t isotope_error_{};
78 size_t peptide_idx_{};
79 };
80
81
86 {
87 std::vector<SpectrumMatch> hits_;
88
89
91
99 {
100
101 this->hits_.insert(this->hits_.end(), other.hits_.begin(), other.hits_.end());
102 return *this;
103 }
104
105 void clear()
106 {
107 hits_.clear();
108
109 }
110 };
121
128 ~FragmentIndex() override = default;
129
139 bool isBuild() const;
140
153 const std::vector<Peptide>& getPeptides() const;
154
155#ifdef DEBUG_FRAGMENT_INDEX
179 void addSpecialPeptide(AASequence& peptide, Size source_idx);
180#endif
181
188 void build(const std::vector<FASTAFile::FASTAEntry> & fasta_entries);
189
191 void clear();
192
193
205 std::pair<size_t, size_t> getPeptidesInMassWindow(float precursor_mass,
206 const std::pair<float, float>& window) const;
207
212 static bool isOpenSearchMode(double lower_magnitude,
213 double upper_magnitude,
214 bool unit_ppm) noexcept
215 {
216 const double threshold = unit_ppm ? 1000.0 : 1.0;
217 return std::max(lower_magnitude, upper_magnitude) > threshold;
218 }
219
239 static constexpr uint32_t SNES_KIND_BIT_MASK = 1u << 31;
240 static constexpr uint32_t SNES_SLOT_MASK = ~SNES_KIND_BIT_MASK;
241
245 enum class SnesAnchor
246 {
247 NONE,
248 PROT_NTERM,
249 PROT_CTERM
250 };
251
254 static bool isSingleCMother(uint32_t mod_bitmask) noexcept
255 {
256 return (mod_bitmask & SNES_KIND_BIT_MASK) != 0;
257 }
259 static bool isSingleNMother(uint32_t mod_bitmask) noexcept
260 {
261 return (mod_bitmask & SNES_KIND_BIT_MASK) == 0;
262 }
263
269 bool isSnesMode() const noexcept { return is_snes_mode_; }
270
271
275 struct Hit
276 {
277 Hit(UInt32 peptide_idx, float fragment_mz) :
278 peptide_idx(peptide_idx),
279 fragment_mz(fragment_mz)
280 {}
281 UInt32 peptide_idx; // index in database
283 };
284
291 std::vector<Hit> query(const Peak1D& peak,
292 const std::pair<size_t,size_t>& peptide_idx_range,
293 uint16_t peak_charge);
294
302 void querySpectrum(const MSSpectrum& spectrum,
304
316 void querySpectrum(const MSSpectrum& spectrum,
317 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
319
331 const std::vector<FASTAFile::FASTAEntry>& fasta_entries) const;
332
354 int realizeSNESLength(const Peptide& mother,
355 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
356 double target_mh_plus,
357 double tolerance_lower_magnitude,
358 double tolerance_upper_magnitude,
359 bool tolerance_ppm) const;
360
369 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
370 size_t realized_length,
371 uint32_t subset_bitmask = 0) const;
372
373protected:
374
375
378 struct Fragment
379 {
380 Fragment() = default;
381 Fragment(UInt32 peptide_idx, float fragment_mz):
382 peptide_idx_(peptide_idx),
383 fragment_mz_(fragment_mz)
384 {}
385 UInt32 peptide_idx_{}; // 32 bit in sage
386 float fragment_mz_{};
387 };
388
389 bool is_build_{false};
390
391 void updateMembers_() override;
392
399 void generatePeptides(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
400
422 void generateSNESMothers_(const std::vector<FASTAFile::FASTAEntry>& fasta_entries);
423
431
437 struct ModSlot
438 {
439 uint16_t position;
440 double delta_mass;
442
443 static constexpr uint16_t NTERM_SLOT = UINT16_MAX - 1;
444 static constexpr uint16_t CTERM_SLOT = UINT16_MAX;
445 };
446
447 static constexpr size_t MAX_MOD_SLOTS = 32;
448
452
462 size_t buildModSlots_(const char* sequence, size_t seq_len, ModSlot* out_slots,
463 bool is_protein_nterm = false, bool is_protein_cterm = false) const;
464
473 std::vector<double> computeSnesSigmaDeltaSet_(bool include_prot_nterm_mods,
474 bool include_prot_cterm_mods) const;
475
477 std::array<double, 128> fixed_mod_deltas_{};
479 std::array<const ResidueModification*, 128> fixed_mod_ptrs_{};
480 double fixed_nterm_delta_{0.0};
481 double fixed_cterm_delta_{0.0};
482 const ResidueModification* fixed_nterm_mod_ptr_{nullptr};
483 const ResidueModification* fixed_cterm_mod_ptr_{nullptr};
484
486 std::array<std::vector<VarModEntry>, 128> variable_mod_table_{};
488 std::vector<VarModEntry> variable_nterm_mods_;
490 std::vector<VarModEntry> variable_cterm_mods_;
491
492 bool mod_tables_initialized_{false};
493
499 bool is_snes_mode_{false};
500
506 bool snes_enabled_{false};
507
510 std::vector<double> snes_sigma_delta_set_;
517
520 static std::array<double, 128> residue_mass_table_;
521 static std::once_flag mass_table_once_flag_;
523
526 {
527 double b_offset{0.0};
528 double y_offset{0.0};
529 double a_offset{0.0};
530 double c_offset{0.0};
531 double x_offset{0.0};
532 double z_offset{0.0};
533 };
535
549 std::vector<Fragment>& fragments,
550 const char* sequence,
551 size_t seq_len,
552 UInt32 peptide_idx,
553 double n_term_mod_mass,
554 double c_term_mod_mass,
555 const double* residue_mod_masses) const;
556
578 std::vector<Fragment>& fragments,
579 const char* sequence,
580 size_t seq_len,
581 UInt32 peptide_idx,
582 double n_term_mod_mass,
583 double c_term_mod_mass,
584 const double* residue_mod_masses,
585 bool add_b,
586 bool add_a,
587 bool add_c,
588 bool add_y,
589 bool add_x,
590 bool add_z) const;
591
592 std::vector<Peptide> fi_peptides_;
593 std::vector<Fragment> fi_fragments_;
594
597 std::vector<uint32_t> protein_lengths_;
598
601 size_t min_ion_index_{0};
602 size_t bucketsize_;
603 std::vector<float> bucket_min_mz_;
604 double precursor_mass_tolerance_lower_{20.0};
605 double precursor_mass_tolerance_upper_{20.0};
606 bool precursor_mass_tolerance_unit_ppm_{true};
608 bool fragment_mz_tolerance_unit_ppm_{true};
609private:
610
611
653 void querySpectrumSNES_(const MSSpectrum& spectrum,
654 const std::vector<FASTAFile::FASTAEntry>& fasta_entries,
656
667 const MSSpectrum& spectrum,
668 const std::pair<size_t, size_t>& candidates_range,
669 const int16_t isotope_error,
670 const uint16_t precursor_charge);
680 float precursor_mass,
682 uint16_t charge);
683
687 void trimHits(SpectrumMatchesTopN& init_hits) const;
688
689 //since we work with TheoreticalSpectrumGenerator, we must transfer some of those member variables
696
697 // SpectrumGenerator independend member variables
698 std::string digestion_enzyme_;
699 EnzymaticDigestion::Specificity enzyme_specificity_{EnzymaticDigestion::SPEC_FULL};
700
706
710
711 // Search Related member variables
712
720
722 bool isOpenSearchMode_() const noexcept
723 {
724 return isOpenSearchMode(precursor_mass_tolerance_lower_,
725 precursor_mass_tolerance_upper_,
726 precursor_mass_tolerance_unit_ppm_);
727 }
728
733 std::pair<float, float> computeMassWindow_(float precursor_mass) const;
734
735
736 };
737
738}
Representation of a peptide/protein sequence.
Definition AASequence.h:88
A base class for all classes handling default parameters.
Definition DefaultParamHandler.h:66
Specificity
when querying for valid digestion products, this determines if the specificity of the two peptide end...
Definition EnzymaticDigestion.h:42
Generates from a set of Fasta files a 2D-datastructure which stores all theoretical masses of all b a...
Definition FragmentIndex.h:35
void generateFragmentsForSeries_(std::vector< Fragment > &fragments, const char *sequence, size_t seq_len, UInt32 peptide_idx, double n_term_mod_mass, double c_term_mod_mass, const double *residue_mod_masses, bool add_b, bool add_a, bool add_c, bool add_y, bool add_x, bool add_z) const
size_t bucketsize_
number of fragments per outer node
Definition FragmentIndex.h:602
uint16_t min_matched_peaks_
PSM with less hits are discarded.
Definition FragmentIndex.h:713
bool add_x_ions_
Definition FragmentIndex.h:694
void generateSNESMothers_(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
SNES-mode peptide enumeration: emit Single-N + Single-C mother peptides.
AASequence reconstructRealizedSubSequence(const Peptide &mother, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, size_t realized_length, uint32_t subset_bitmask=0) const
const ResidueModification * mod_ptr
pointer to the modification (for AASequence reconstruction)
Definition FragmentIndex.h:428
bool add_a_ions_
Definition FragmentIndex.h:692
void querySpectrum(const MSSpectrum &spectrum, SpectrumMatchesTopN &sms)
: queries one complete experimental spectra against the Database. Loops over all precursor charges St...
bool add_b_ions_
Definition FragmentIndex.h:690
static bool isOpenSearchMode(double lower_magnitude, double upper_magnitude, bool unit_ppm) noexcept
Definition FragmentIndex.h:212
void queryPeaks(SpectrumMatchesTopN &candidates, const MSSpectrum &spectrum, const std::pair< size_t, size_t > &candidates_range, const int16_t isotope_error, const uint16_t precursor_charge)
queries peaks for a given experimental spectrum with a set range of potential peptides,...
static IonOffsets ion_offsets_
Definition FragmentIndex.h:534
size_t buildModSlots_(const char *sequence, size_t seq_len, ModSlot *out_slots, bool is_protein_nterm=false, bool is_protein_cterm=false) const
ResidueModification::TermSpecificity term_spec
where this mod can be applied
Definition FragmentIndex.h:429
static std::once_flag mass_table_once_flag_
Definition FragmentIndex.h:521
std::pair< float, float > computeMassWindow_(float precursor_mass) const
StringList modifications_fixed_
Modification that are one all peptides.
Definition FragmentIndex.h:707
static bool isSingleCMother(uint32_t mod_bitmask) noexcept
Definition FragmentIndex.h:254
float fragment_mz_tolerance_
Definition FragmentIndex.h:607
bool add_y_ions_
Definition FragmentIndex.h:691
std::vector< Peptide > fi_peptides_
vector of all (digested) peptides
Definition FragmentIndex.h:592
std::vector< VarModEntry > variable_cterm_mods_
Pure C-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:490
size_t missed_cleavages_
number of missed cleavages
Definition FragmentIndex.h:701
float fragment_min_mz_
smallest fragment mz
Definition FragmentIndex.h:599
uint16_t min_precursor_charge_
minimal possible precursor charge (usually always 1)
Definition FragmentIndex.h:716
uint32_t max_processed_hits_
The amount of PSM that will be used. the rest is filtered out.
Definition FragmentIndex.h:719
static bool isSingleNMother(uint32_t mod_bitmask) noexcept
Definition FragmentIndex.h:259
float peptide_max_mass_
Definition FragmentIndex.h:703
void querySpectrumSNES_(const MSSpectrum &spectrum, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, SpectrumMatchesTopN &sms)
SNES-mode spectrum query (MetaMorpheus-style: byte-count + b-ion filter).
uint16_t max_fragment_charge_
The maximal possible charge of the fragments.
Definition FragmentIndex.h:718
std::pair< size_t, size_t > getPeptidesInMassWindow(float precursor_mass, const std::pair< float, float > &window) const
std::vector< double > snes_sigma_delta_set_with_prot_cterm_
Definition FragmentIndex.h:516
std::vector< Hit > query(const Peak1D &peak, const std::pair< size_t, size_t > &peptide_idx_range, uint16_t peak_charge)
Queries one peak.
void generateFragmentsLightweight_(std::vector< Fragment > &fragments, const char *sequence, size_t seq_len, UInt32 peptide_idx, double n_term_mod_mass, double c_term_mod_mass, const double *residue_mod_masses) const
bool isOpenSearchMode_() const noexcept
Instance delegate — same rule, reads the member bounds.
Definition FragmentIndex.h:722
std::vector< Fragment > fi_fragments_
vector of all theoretical fragments (b- and y- ions)
Definition FragmentIndex.h:593
void querySpectrum(const MSSpectrum &spectrum, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, SpectrumMatchesTopN &sms)
Query a spectrum against the fragment index with FASTA context.
std::vector< double > snes_sigma_delta_set_
Definition FragmentIndex.h:510
int16_t max_isotope_error_
Maximal possible isotope error (both only used for closed search)
Definition FragmentIndex.h:715
float fragment_max_mz_
largest fragment mz
Definition FragmentIndex.h:600
std::string digestion_enzyme_
Definition FragmentIndex.h:698
bool isBuild() const
Indicates whether the fragment index has been built.
size_t peptide_max_length_
Definition FragmentIndex.h:705
~FragmentIndex() override=default
Default destructor.
std::vector< double > snes_sigma_delta_set_with_prot_nterm_
Definition FragmentIndex.h:513
void generatePeptides(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Generates all peptides from given fasta entries. If Bottom-up is set to false skips digestion....
std::vector< float > bucket_min_mz_
vector of the smalles fragment mz of each bucket
Definition FragmentIndex.h:603
double delta_mass
mass delta from this modification
Definition FragmentIndex.h:427
float peptide_min_mass_
Definition FragmentIndex.h:702
void searchDifferentPrecursorRanges(const MSSpectrum &spectrum, float precursor_mass, SpectrumMatchesTopN &sms, uint16_t charge)
If closed search loops over all isotope errors. For each iteration loop over all peaks with queryPeak...
int16_t min_isotope_error_
Minimal possible isotope error.
Definition FragmentIndex.h:714
uint16_t max_precursor_charge_
maximal possible precursor charge
Definition FragmentIndex.h:717
std::vector< uint32_t > protein_lengths_
Definition FragmentIndex.h:597
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void clear()
Delete fragment index. Sets is_build=false.
StringList modifications_variable_
Variable Modification -> all possible comibnations are created.
Definition FragmentIndex.h:708
size_t max_variable_mods_per_peptide_
Definition FragmentIndex.h:709
void build(const std::vector< FASTAFile::FASTAEntry > &fasta_entries)
Given a set of Fasta files, builds the Fragment Index datastructure (FID). First all fragments are so...
int realizeSNESLength(const Peptide &mother, const std::vector< FASTAFile::FASTAEntry > &fasta_entries, double target_mh_plus, double tolerance_lower_magnitude, double tolerance_upper_magnitude, bool tolerance_ppm) const
Find the realized sub-peptide length of a SNES mother that best matches the observed precursor mass.
bool isSnesMode() const noexcept
Definition FragmentIndex.h:269
void trimHits(SpectrumMatchesTopN &init_hits) const
places the k-largest elements in the front of the input array. Inside of the k-largest elements and o...
const std::vector< Peptide > & getPeptides() const
Returns a reference to the internal peptide container.
AASequence reconstructModifiedSequence(const Peptide &peptide, const std::vector< FASTAFile::FASTAEntry > &fasta_entries) const
Reconstruct a fully modified AASequence from a Peptide's bitmask.
std::vector< VarModEntry > variable_nterm_mods_
Pure N-terminal variable mods (not residue-specific)
Definition FragmentIndex.h:488
size_t peptide_min_length_
Definition FragmentIndex.h:704
static void initResidueMassTable_()
bool add_c_ions_
Definition FragmentIndex.h:693
FragmentIndex()
Default constructor.
SnesAnchor
Definition FragmentIndex.h:246
bool add_z_ions_
Definition FragmentIndex.h:695
static std::array< double, 128 > residue_mass_table_
Definition FragmentIndex.h:520
std::vector< double > computeSnesSigmaDeltaSet_(bool include_prot_nterm_mods, bool include_prot_cterm_mods) const
Precomputed ion-type mass offsets (from Residue::getInternalTo*Ion formulas)
Definition FragmentIndex.h:526
Match between a query peak and an entry in the DB.
Definition FragmentIndex.h:72
Entry in the per-AA variable modification lookup table.
Definition FragmentIndex.h:426
The representation of a 1D spectrum.
Definition MSSpectrum.h:44
A 1-dimensional raw data point or peak.
Definition Peak1D.h:30
Representation of a modification on an amino acid residue.
Definition ResidueModification.h:55
TermSpecificity
Position where the modification is allowed to occur.
Definition ResidueModification.h:74
uint32_t UInt32
Unsigned integer type (32bit)
Definition Types.h:33
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition Types.h:97
std::vector< std::string > StringList
Vector of String.
Definition ListUtils.h:44
Main OpenMS namespace.
Definition openswathalgo/include/OpenMS/OPENSWATHALGO/DATAACCESS/ISpectrumAccess.h:19
One entry in the fragment index.
Definition FragmentIndex.h:379
Fragment(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:381
Definition FragmentIndex.h:276
UInt32 peptide_idx
Definition FragmentIndex.h:281
Hit(UInt32 peptide_idx, float fragment_mz)
Definition FragmentIndex.h:277
float fragment_mz
Definition FragmentIndex.h:282
A candidate modification slot for a specific peptide.
Definition FragmentIndex.h:438
const ResidueModification * mod_ptr
for AASequence reconstruction
Definition FragmentIndex.h:441
uint16_t position
residue index, or NTERM_SLOT/CTERM_SLOT
Definition FragmentIndex.h:439
double delta_mass
mass delta
Definition FragmentIndex.h:440
Compact descriptor of a peptide instance held by the FragmentIndex.
Definition FragmentIndex.h:52
std::pair< uint16_t, uint16_t > sequence_
{start, length} within the source protein sequence (start is 0-based; length in residues)
Definition FragmentIndex.h:64
uint32_t mod_bitmask_
Bitmask of active variable mod slots (0 = unmodified/fixed-only; up to 32 slots)
Definition FragmentIndex.h:63
UInt32 protein_idx
0-based index into FASTA entries provided to build(); identifies the source protein
Definition FragmentIndex.h:62
float precursor_mz_
Mono-isotopic m/z at charge 1 (M+H)+ of this peptide; used for sorting/filtering.
Definition FragmentIndex.h:65
Peptide(UInt32 protein_idx, uint32_t mod_bitmask, std::pair< uint16_t, uint16_t > sequence, float precursor_mz)
Definition FragmentIndex.h:55
container for SpectrumMatch. Also keeps count of total number of candidates and total number of match...
Definition FragmentIndex.h:86
SpectrumMatchesTopN & operator+=(const SpectrumMatchesTopN &other)
Appends the a SpectrumMatchesTopN to another one. Add the number of all matched peaks up....
Definition FragmentIndex.h:98
void clear()
Definition FragmentIndex.h:105
std::vector< SpectrumMatch > hits_
The preliminary candidates.
Definition FragmentIndex.h:87