OpenMS  2.7.0
SvmTheoreticalSpectrumGenerator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Sandro Andreotti $
33 // --------------------------------------------------------------------------
34 
35 
36 #pragma once
37 
38 #include <OpenMS/config.h>
41 
46 
47 #include <boost/random/mersenne_twister.hpp>
48 
49 
50 
51 namespace OpenMS
52 {
71  class OPENMS_DLLAPI SvmTheoreticalSpectrumGenerator :
72  public DefaultParamHandler
73  {
75 public:
76 
82  struct IonType
83  {
87 
90  //Default constructor
91  IonType() :
92  residue((Residue::ResidueType) 0),
93  loss(),
94  charge(0)
95  {
96  }
97 
98  //Custom constructor
99  IonType(Residue::ResidueType local_residue, EmpiricalFormula local_loss = EmpiricalFormula(), Int local_charge = 1) :
100  residue(local_residue),
101  loss(local_loss),
102  charge(local_charge)
103  {
104  }
105 
106  //Copy constructor
107  IonType(const IonType & rhs) :
108  residue(rhs.residue),
109  loss(rhs.loss),
110  charge(rhs.charge)
111  {
112  }
113 
114  //Assignment operator
115  IonType & operator=(const IonType & rhs)
116  {
117  if (this != &rhs)
118  {
119  residue = rhs.residue;
120  loss = rhs.loss;
121  charge = rhs.charge;
122  }
123  return *this;
124  }
125 
126  bool operator<(const IonType & rhs) const
127  {
128  if (residue != rhs.residue)
129  return residue < rhs.residue;
130  else if (loss.toString() != rhs.loss.toString())
131  return loss.toString() < rhs.loss.toString();
132  else
133  return charge < rhs.charge;
134  }
135 
136  };
138 
141  {
142  typedef std::vector<svm_node> DescriptorSetType;
144  };
145 
146 
149  {
150  //pointers to the svm classification models (one per ion_type)
151  std::vector<boost::shared_ptr<SVMWrapper> > class_models;
152 
153  //pointers to the svm regression models (one per ion_type)
154  std::vector<boost::shared_ptr<SVMWrapper> > reg_models;
155 
156  //The intensity for each ion type for the SVC mode
157  std::map<Residue::ResidueType, double> static_intensities;
158 
159  //The selected primary IonTypes
160  std::vector<IonType> ion_types;
161 
162  //The selected secondary IonTypes
163  std::map<IonType, std::vector<IonType> > secondary_types;
164 
165  //The number of intensity levels
167 
168  //The number of regions for every spectrum
170 
171  //upper limits (required for scaling)
172  std::vector<double> feature_max;
173 
174  //lower limits (required for scaling)
175  std::vector<double> feature_min;
176 
177  //lower bound for scaling
179 
180  //upper bound for scaling
182 
183  //border values for binning secondary types intensity
184  std::vector<double> intensity_bin_boarders;
185 
186  //intensity values for binned secondary types intensity
187  std::vector<double> intensity_bin_values;
188 
189  //conditional probabilities for secondary types
190  std::map<std::pair<IonType, Size>, std::vector<std::vector<double> > > conditional_prob;
191  };
192 
193 
194 
200 
203 
206 
207 
211 
212 
214  void simulate(PeakSpectrum & spectrum, const AASequence & peptide, boost::random::mt19937_64& rng, Size precursor_charge);
215 
217  void load();
218 
220  const std::vector<IonType> & getIonTypes()
221  {
222  return mp_.ion_types;
223  }
224 
225 protected:
226  typedef std::map<IonType, double> IntensityMap;
227 
230 
233 
235  static std::map<String, Size> aa_to_index_;
236 
238  static std::map<String, double> hydrophobicity_;
239 
241  static std::map<String, double> helicity_;
242 
244  static std::map<String, double> basicity_;
245 
247  std::map<IonType, bool> hide_type_;
248 
250  inline void scaleSingleFeature_(double & value, double feature_min, double feature_max, double lower = -1.0, double upper = 1.0);
251 
253  void scaleDescriptorSet_(DescriptorSet & desc, double lower, double upper);
254 
256  Size generateDescriptorSet_(AASequence peptide, Size position, IonType type, Size precursor_charge, DescriptorSet & desc_set);
257 
260 
262  static void initializeMaps_();
263 
265  static bool initializedMaps_;
266 
267  void updateMembers_() override;
268  };
269 
270  void inline SvmTheoreticalSpectrumGenerator::scaleSingleFeature_(double & value, double lower, double upper, double feature_min, double feature_max)
271  {
272  double prev = value;
273  if (feature_max == feature_min)
274  {
275  return;
276  }
277 
278  if (value <= feature_min)
279  {
280  value = lower;
281  }
282  else if (value >= feature_max)
283  {
284  value = upper;
285  }
286  else
287  {
288  value = lower + (upper - lower) *
289  (value - feature_min) /
290  (feature_max - feature_min);
291  }
292 
293  if (value < 0)
294  {
295  std::cerr << "negative value!! " << value << " l: " << lower << " u: " << upper << " fm: " << feature_min << " fma: " << feature_max << " prev: " << prev << std::endl;
296  }
297  }
298 
299 } // namespace OpenMS
300 
Representation of a peptide/protein sequence.
Definition: AASequence.h:112
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Representation of an empirical formula.
Definition: EmpiricalFormula.h:83
String toString() const
returns the formula as a string (charges are not included)
The representation of a 1D spectrum.
Definition: MSSpectrum.h:71
Representation of a residue.
Definition: Residue.h:63
ResidueType
Definition: Residue.h:152
A more convenient string class.
Definition: String.h:61
Train SVM models that are used by SvmTheoreticalSpectrumGenerator.
Definition: SvmTheoreticalSpectrumGeneratorTrainer.h:67
Simulates MS2 spectra with support vector machines.
Definition: SvmTheoreticalSpectrumGenerator.h:73
void scaleSingleFeature_(double &value, double feature_min, double feature_max, double lower=-1.0, double upper=1.0)
scale value to the interval [lower,max] given the maximal and minimal entries for a feature
Definition: SvmTheoreticalSpectrumGenerator.h:270
double scaling_upper
Definition: SvmTheoreticalSpectrumGenerator.h:181
String ResidueTypeToString_(Residue::ResidueType type)
Returns the ResidueType (e.g. AIon, BIon) as string for peak annotation.
static std::map< String, double > basicity_
basicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:244
std::vector< svm_node > DescriptorSetType
Definition: SvmTheoreticalSpectrumGenerator.h:142
void simulate(PeakSpectrum &spectrum, const AASequence &peptide, boost::random::mt19937_64 &rng, Size precursor_charge)
Generate the MS/MS according to the given probabilistic model.
std::vector< double > intensity_bin_values
Definition: SvmTheoreticalSpectrumGenerator.h:187
std::map< Residue::ResidueType, double > static_intensities
Definition: SvmTheoreticalSpectrumGenerator.h:157
SvmModelParameterSet mp_
set of model parameters read from model file
Definition: SvmTheoreticalSpectrumGenerator.h:232
Size number_regions
Definition: SvmTheoreticalSpectrumGenerator.h:169
std::vector< double > feature_max
Definition: SvmTheoreticalSpectrumGenerator.h:172
std::vector< boost::shared_ptr< SVMWrapper > > reg_models
Definition: SvmTheoreticalSpectrumGenerator.h:154
double scaling_lower
Definition: SvmTheoreticalSpectrumGenerator.h:178
DescriptorSetType descriptors
Definition: SvmTheoreticalSpectrumGenerator.h:143
std::map< std::pair< IonType, Size >, std::vector< std::vector< double > > > conditional_prob
Definition: SvmTheoreticalSpectrumGenerator.h:190
static std::map< String, Size > aa_to_index_
map AA to integers
Definition: SvmTheoreticalSpectrumGenerator.h:235
Size generateDescriptorSet_(AASequence peptide, Size position, IonType type, Size precursor_charge, DescriptorSet &desc_set)
generate the descriptors for an input peptide and a given fragmentation position
std::map< IonType, std::vector< IonType > > secondary_types
Definition: SvmTheoreticalSpectrumGenerator.h:163
void load()
Load a trained Svm and Prob. models.
const std::vector< IonType > & getIonTypes()
return the set of ion types that are modeled by the loaded SVMs
Definition: SvmTheoreticalSpectrumGenerator.h:220
void scaleDescriptorSet_(DescriptorSet &desc, double lower, double upper)
scale value to the interval [lower,max] given the maximal and minimal entries for a feature
std::vector< double > feature_min
Definition: SvmTheoreticalSpectrumGenerator.h:175
SvmTheoreticalSpectrumGenerator(const SvmTheoreticalSpectrumGenerator &source)
Copy constructor.
std::vector< double > intensity_bin_boarders
Definition: SvmTheoreticalSpectrumGenerator.h:184
static std::map< String, double > hydrophobicity_
hydrophobicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:238
static bool initializedMaps_
flag to indicate if the hydrophobicity, helicity, and basicity maps were already initialized
Definition: SvmTheoreticalSpectrumGenerator.h:265
static void initializeMaps_()
initialized the maps
Size precursor_charge_
charge of the precursors used for training
Definition: SvmTheoreticalSpectrumGenerator.h:229
Size number_intensity_levels
Definition: SvmTheoreticalSpectrumGenerator.h:166
SvmTheoreticalSpectrumGenerator & operator=(const SvmTheoreticalSpectrumGenerator &tsg)
Assignment operator.
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
std::map< IonType, double > IntensityMap
Definition: SvmTheoreticalSpectrumGenerator.h:226
static std::map< String, double > helicity_
helicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:241
std::vector< boost::shared_ptr< SVMWrapper > > class_models
Definition: SvmTheoreticalSpectrumGenerator.h:151
~SvmTheoreticalSpectrumGenerator() override
Destructor.
std::map< IonType, bool > hide_type_
whether ion types are hidden or not
Definition: SvmTheoreticalSpectrumGenerator.h:247
SvmTheoreticalSpectrumGenerator()
Default constructor.
std::vector< IonType > ion_types
Definition: SvmTheoreticalSpectrumGenerator.h:160
A set of descriptors for a single training row.
Definition: SvmTheoreticalSpectrumGenerator.h:141
Simple container storing the model parameters required for simulation.
Definition: SvmTheoreticalSpectrumGenerator.h:149
int Int
Signed integer type.
Definition: Types.h:102
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Size< TNeedle >::Type position(const PatternAuxData< TNeedle > &dh)
Definition: AhoCorasickAmbiguous.h:563
nested class
Definition: SvmTheoreticalSpectrumGenerator.h:83
Int charge
Definition: SvmTheoreticalSpectrumGenerator.h:86
IonType()
Definition: SvmTheoreticalSpectrumGenerator.h:91
bool operator<(const IonType &rhs) const
Definition: SvmTheoreticalSpectrumGenerator.h:126
IonType & operator=(const IonType &rhs)
Definition: SvmTheoreticalSpectrumGenerator.h:115
IonType(const IonType &rhs)
Definition: SvmTheoreticalSpectrumGenerator.h:107
EmpiricalFormula loss
Definition: SvmTheoreticalSpectrumGenerator.h:85
IonType(Residue::ResidueType local_residue, EmpiricalFormula local_loss=EmpiricalFormula(), Int local_charge=1)
Definition: SvmTheoreticalSpectrumGenerator.h:99
Residue::ResidueType residue
Definition: SvmTheoreticalSpectrumGenerator.h:84