OpenMS  3.0.0
FLASHDeconvAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Kyowon Jeong, Jihyung Kim $
32 // $Authors: Kyowon Jeong, Jihyung Kim $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
44 #include <iostream>
45 #include <boost/dynamic_bitset.hpp>
46 
47 namespace OpenMS
48 {
59  class OPENMS_DLLAPI FLASHDeconvAlgorithm :
60  public DefaultParamHandler
61  {
62  public:
65 
68 
71 
74 
77 
87  const std::vector<DeconvolvedSpectrum>& survey_scans,
88  const int scan_number,
89  const bool write_detail,
90  const std::map<int, std::vector<std::vector<double>>>& precursor_map_for_FLASHIda);
91 
96 
99 
102 
104  void setTargetMasses(const std::vector<double>& masses);
105 
109  void calculateAveragine(const bool use_RNA_averagine);
110 
111  void addExcludedMonoMass(const double m);
112 
114 
116  static int getNominalMass(const double mass);
117 
126  static float getCosine(const std::vector<float>& a,
127  int a_start,
128  int a_end,
129  const IsotopeDistribution& b,
130  int b_size,
131  int offset);
132 
143  static float getIsotopeCosineAndDetermineIsotopeIndex(const double mono_mass,
144  const std::vector<float>& per_isotope_intensities,
145  int& offset,
146  int& second_best_iso_offset,
148  int window_width = -1, int allowed_iso_error = 1);
149 
151  void setDecoyFlag(int flag);
152 
153  protected:
154  void updateMembers_() override;
155 
156  private:
158 
160  const static int min_iso_size_ = 2;
161 
163  int allowed_iso_error_ = 1;
164 
166  double min_rt_, max_rt_;
168  double min_mz_, max_mz_;
170  int min_abs_charge_, max_abs_charge_;
174  bool write_detail_ = false;
176  double min_mass_, max_mass_;
188  const IntList min_support_peak_count_ = {3,3,3,3,3,3,3,3};
196  //IntList max_mass_count_;
197 
199  int decoy_run_flag_ = 0;
200 
201  static const int charge_decoy_ = 1;
202  static const int noise_decoy_ = 2;
203  static const int isotope_decoy_ = 3;
204 
208  std::vector<std::vector<Size>> prev_mass_bins_ms1_;
209  std::vector<std::map<int, std::vector<Size>>> prev_mass_bins_ms2_;
210  std::vector<double> prev_rts_ms1_;
211  std::vector<double> prev_rts_ms2_;
212 
214  boost::dynamic_bitset<> target_mass_bins_;
215  std::vector<double> target_masses_;
216 
218  boost::dynamic_bitset<> excluded_mass_bins_;
219  std::vector<double> excluded_masses_;
220 
222  const std::vector<int> harmonic_charges_{2, 3, 5, 7};
224  std::vector<LogMzPeak> log_mz_peaks_;
230  boost::dynamic_bitset<> mass_bins_;
232  boost::dynamic_bitset<> mz_bins_;
234  boost::dynamic_bitset<> mz_bins_for_edge_effect_;
235 
237  std::vector<double> filter_;
240 
243 
245  std::vector<int> bin_offsets_;
248 
252 
255 
257  const int low_charge_ = 6; //10 inclusive
258 
261 
263  const int max_peak_count_ = 30000;//30000
264 
271  static double getBinValue_(const Size bin, const double min_value, const double bin_width);
272 
279  static Size getBinNumber_(const double value, const double min_value, const double bin_width);
280 
282  void updateLogMzPeaks_(const MSSpectrum *spec);
283 
288  void updateMzBins_(const Size bin_number, std::vector<float>& mz_bin_intensities);
289 
292 
293 
296 
301  Matrix<int> updateMassBins_(const std::vector<float>& mz_intensities);
302 
307  Matrix<int> filterMassBins_(const std::vector<float>& mass_intensities);
308 
313  void updateCandidateMassBins_(std::vector<float>& mass_intensities, const std::vector<float>& mz_intensities);
314 
318  void getCandidatePeakGroups_(const Matrix<int>& per_mass_abs_charge_ranges);
319 
321  void setFilters_();
322 
325 
327 
329  void removeOverlappingPeakGroups_(DeconvolvedSpectrum& dpec, const double tol, const int iso_length = 1);
330 
332  void filterPeakGroupsByIsotopeCosine_(const int current_max_mass_count);
333 
342  bool registerPrecursor(const std::vector<DeconvolvedSpectrum>& survey_scans,
343  const std::map<int, std::vector<std::vector<double>>>& precursor_map_for_real_time_acquisition);
344 
345  };
346 }
A class representing a deconvolved spectrum. DeconvolvedSpectrum consists of PeakGroups representing ...
Definition: DeconvolvedSpectrum.h:56
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
FLASHDeconv algorithm: ultrafast mass deconvolution algorithm for top down mass spectrometry dataset ...
Definition: FLASHDeconvAlgorithm.h:61
static int getNominalMass(const double mass)
convert double to nominal mass
std::vector< double > excluded_masses_
Definition: FLASHDeconvAlgorithm.h:219
DeconvolvedSpectrum & getDeconvolvedSpectrum()
return deconvolved spectrum
std::vector< double > prev_rts_ms2_
Definition: FLASHDeconvAlgorithm.h:211
FLASHDeconvAlgorithm()
default constructor
const PrecalculatedAveragine & getAveragine()
get calculated averagine. This should be called after calculateAveragine is called.
std::vector< double > filter_
This stores the "universal pattern".
Definition: FLASHDeconvAlgorithm.h:237
void calculateAveragine(const bool use_RNA_averagine)
precalculate averagine (for predefined mass bins) to speed up averagine generation
Matrix< int > updateMassBins_(const std::vector< float > &mz_intensities)
Update mass_bins_. It select candidate mass bins using the universal pattern, eliminate possible harm...
DeconvolvedSpectrum deconvolved_spectrum_
deconvolved_spectrum_ stores the deconvolved mass peak groups
Definition: FLASHDeconvAlgorithm.h:226
double max_mz_
Definition: FLASHDeconvAlgorithm.h:168
double current_max_mass_
max mass is controlled by precursor mass for MSn n>1; otherwise just max_mass
Definition: FLASHDeconvAlgorithm.h:182
FLASHDeconvHelperStructs::LogMzPeak LogMzPeak
Definition: FLASHDeconvAlgorithm.h:64
boost::dynamic_bitset excluded_mass_bins_
mass bins that are excluded for decoy
Definition: FLASHDeconvAlgorithm.h:218
bool is_positive_
is positive mode
Definition: FLASHDeconvAlgorithm.h:172
void updateCandidateMassBins_(std::vector< float > &mass_intensities, const std::vector< float > &mz_intensities)
Subfunction of updateMassBins_. It select candidate masses and update mass_bins_ using the universal ...
static Size getBinNumber_(const double value, const double min_value, const double bin_width)
static function that converts value to bin
int current_min_charge_
current_min_charge_ charge: 1 for MSn n>1; otherwise just min_abs_charge_
Definition: FLASHDeconvAlgorithm.h:178
FLASHDeconvAlgorithm(FLASHDeconvAlgorithm &&other)=default
move constructor
void addExcludedMonoMass(const double m)
void updateLogMzPeaks_(const MSSpectrum *spec)
generate log mz peaks from the input spectrum
std::vector< double > target_masses_
Definition: FLASHDeconvAlgorithm.h:215
void removeHarmonicsPeakGroups_(DeconvolvedSpectrum &dpec)
boost::dynamic_bitset target_mass_bins_
mass bins that are targeted for FLASHIda global targeting mode
Definition: FLASHDeconvAlgorithm.h:214
void setDecoyFlag(int flag)
set decoy_flag_
FLASHDeconvHelperStructs::PrecalculatedAveragine avg_
precalculated averagine distributions for fast averagine generation
Definition: FLASHDeconvAlgorithm.h:206
std::vector< std::map< int, std::vector< Size > > > prev_mass_bins_ms2_
Definition: FLASHDeconvAlgorithm.h:209
void updateMzBins_(const Size bin_number, std::vector< float > &mz_bin_intensities)
generate mz bins and intensity per mz bin from log mz peaks
Matrix< int > filterMassBins_(const std::vector< float > &mass_intensities)
Subfunction of updateMassBins_.
void setTargetMasses(const std::vector< double > &masses)
set targeted masses for targeted deconvolution. Masses are targeted in all ms levels
void removeOverlappingPeakGroups_(DeconvolvedSpectrum &dpec, const double tol, const int iso_length=1)
filter out overlapping masses
double mass_bin_min_value_
minimum mass and mz values representing the first bin of massBin and mzBin, respectively: to save mem...
Definition: FLASHDeconvAlgorithm.h:250
int current_max_charge_
current_max_charge_: controlled by precursor charge for MSn n>1; otherwise just max_abs_charge_
Definition: FLASHDeconvAlgorithm.h:180
double max_mass_
Definition: FLASHDeconvAlgorithm.h:176
void setAveragine(const PrecalculatedAveragine &avg)
set calculated averagine
std::vector< double > prev_rts_ms1_
Definition: FLASHDeconvAlgorithm.h:210
FLASHDeconvAlgorithm(const FLASHDeconvAlgorithm &)=default
copy constructor
static float getIsotopeCosineAndDetermineIsotopeIndex(const double mono_mass, const std::vector< float > &per_isotope_intensities, int &offset, int &second_best_iso_offset, const PrecalculatedAveragine &avg, int window_width=-1, int allowed_iso_error=1)
Examine intensity distribution over isotope indices. Also determines the most plausible isotope index...
double max_rt_
Definition: FLASHDeconvAlgorithm.h:166
DeconvolvedSpectrum & getDecoyDeconvolvedSpectrum()
return decoy deconvolved spectrum
DeconvolvedSpectrum decoy_deconvolved_spectrum_
decoy_deconvolved_spectrum_ stores the deconvolved decoy mass peak groups
Definition: FLASHDeconvAlgorithm.h:228
DoubleList bin_width_
bin size for first stage of mass selection - for fast convolution, binning is used
Definition: FLASHDeconvAlgorithm.h:192
FLASHDeconvAlgorithm & operator=(const FLASHDeconvAlgorithm &fd)=default
assignment operator
double isolation_window_size_
default precursor isolation window size.
Definition: FLASHDeconvAlgorithm.h:260
void performSpectrumDeconvolution(const MSSpectrum &spec, const std::vector< DeconvolvedSpectrum > &survey_scans, const int scan_number, const bool write_detail, const std::map< int, std::vector< std::vector< double >>> &precursor_map_for_FLASHIda)
main deconvolution function that generates the deconvolved and decoy deconvolved spectrum from the or...
DoubleList min_isotope_cosine_
cosine threshold between observed and theoretical isotope patterns for each MS level
Definition: FLASHDeconvAlgorithm.h:194
Matrix< int > harmonic_bin_offset_matrix_
This stores the patterns for harmonic reduction in binned dimension.
Definition: FLASHDeconvAlgorithm.h:247
boost::dynamic_bitset mz_bins_for_edge_effect_
mz_bins_for_edge_effect_ stores the binned log mz peaks, considering edge effect
Definition: FLASHDeconvAlgorithm.h:234
std::vector< std::vector< Size > > prev_mass_bins_ms1_
The data structures for spectra overlapping.
Definition: FLASHDeconvAlgorithm.h:208
bool registerPrecursor(const std::vector< DeconvolvedSpectrum > &survey_scans, const std::map< int, std::vector< std::vector< double >>> &precursor_map_for_real_time_acquisition)
register the precursor peak as well as the precursor peak group (or mass) if possible for MSn (n>1) s...
double intensity_threshold_
peak intensity threshold subject to analysis
Definition: FLASHDeconvAlgorithm.h:186
static double getBinValue_(const Size bin, const double min_value, const double bin_width)
static function that converts bin to value
boost::dynamic_bitset mass_bins_
mass_bins_ stores the selected bins for this spectrum + overlapped spectrum (previous a few spectra).
Definition: FLASHDeconvAlgorithm.h:230
std::vector< int > bin_offsets_
This stores the "universal pattern" in binned dimension.
Definition: FLASHDeconvAlgorithm.h:245
int ms_level_
current ms Level
Definition: FLASHDeconvAlgorithm.h:254
static float getCosine(const std::vector< float > &a, int a_start, int a_end, const IsotopeDistribution &b, int b_size, int offset)
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
Matrix< double > harmonic_filter_matrix_
This stores the patterns for harmonic reduction.
Definition: FLASHDeconvAlgorithm.h:239
void filterPeakGroupsByIsotopeCosine_(const int current_max_mass_count)
Filter out masses with low isotope cosine scores, only retaining current_max_mass_count masses.
double iso_da_distance_
isotope dalton distance
Definition: FLASHDeconvAlgorithm.h:242
boost::dynamic_bitset mz_bins_
mz_bins_ stores the binned log mz peaks
Definition: FLASHDeconvAlgorithm.h:232
std::vector< LogMzPeak > log_mz_peaks_
Stores log mz peaks.
Definition: FLASHDeconvAlgorithm.h:224
void unionPrevMassBins_()
this function takes the previous deconvolution results (from ovelapped spectra) for sensitive deconvo...
double current_min_mass_
max mass is max_mass for MS1 and 50 for MS2
Definition: FLASHDeconvAlgorithm.h:184
void scoreAndFilterPeakGroups_()
function for peak group scoring and filtering
double mz_bin_min_value_
Definition: FLASHDeconvAlgorithm.h:251
FLASHDeconvHelperStructs::PrecalculatedAveragine PrecalculatedAveragine
Definition: FLASHDeconvAlgorithm.h:63
DoubleList tolerance_
tolerance in ppm for each MS level
Definition: FLASHDeconvAlgorithm.h:190
void setFilters_()
Make the universal pattern.
void generatePeakGroupsFromSpectrum_()
Generate peak groups from the input spectrum.
void getCandidatePeakGroups_(const Matrix< int > &per_mass_abs_charge_ranges)
For selected masses in mass_bins_, select the peaks from the original spectrum. Also isotopic peaks a...
int max_abs_charge_
Definition: FLASHDeconvAlgorithm.h:170
log transformed peak. After deconvolution, all necessary information from deconvolution such as charg...
Definition: FLASHDeconvHelperStructs.h:192
Averagine patterns pre-calculated for speed up. Other variables are also calculated for fast cosine c...
Definition: FLASHDeconvHelperStructs.h:60
Definition: IsotopeDistribution.h:65
The representation of a 1D spectrum.
Definition: MSSpectrum.h:70
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:55
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:62
FLASHIda C++ to C# (or vice versa) bridge functions The functions here are called in C# to invoke fun...
Definition: FeatureDeconvolution.h:48
static FLASHDeconvHelperStructs::PrecalculatedAveragine avg
keeps the precalculated averagine to calculate average masses from monoisotopic masses
Definition: FLASHIdaBridgeFunctions.h:81