OpenMS  2.7.0
AccurateMassSearchEngine.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Erhan Kenar, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
38 #include <OpenMS/KERNEL/Feature.h>
42 #include <OpenMS/FORMAT/MzTab.h>
47 #include <OpenMS/SYSTEM/File.h>
49 
50 #include <iosfwd>
51 #include <vector>
52 
53 namespace OpenMS
54 {
55  class EmpiricalFormula;
56 
57  class OPENMS_DLLAPI AdductInfo
58  {
59 
60  public:
71  AdductInfo(const String& name, const EmpiricalFormula& adduct, int charge, UInt mol_multiplier = 1);
72 
74  double getNeutralMass(double observed_mz) const;
75 
77  double getMZ(double neutral_mass) const;
78 
81  bool isCompatible(EmpiricalFormula db_entry) const;
82 
84  int getCharge() const;
85 
87  const String& getName() const;
88 
91 
94 
98  static AdductInfo parseAdductString(const String& adduct);
99 
100  private:
103 
107  double mass_;
108  int charge_;
110  };
111 
112  class OPENMS_DLLAPI AccurateMassSearchResult
113  {
114  public:
117 
120 
123 
126 
128  double getObservedMZ() const;
129 
131  void setObservedMZ(const double&);
132 
134  double getCalculatedMZ() const;
135 
137  void setCalculatedMZ(const double&);
138 
140  double getQueryMass() const;
141 
143  void setQueryMass(const double&);
144 
146  double getFoundMass() const;
147 
149  void setFoundMass(const double&);
150 
152  Int getCharge() const;
153 
155  void setCharge(const Int&);
156 
158  double getMZErrorPPM() const;
159 
161  void setMZErrorPPM(const double);
162 
164  double getObservedRT() const;
165 
167  void setObservedRT(const double& rt);
168 
170  double getObservedIntensity() const;
171 
173  void setObservedIntensity(const double&);
174 
176  std::vector<double> getIndividualIntensities() const;
177 
179  void setIndividualIntensities(const std::vector<double>&);
180 
182  void setMatchingIndex(const Size&);
183 
186 
187  const String& getFoundAdduct() const;
188  void setFoundAdduct(const String&);
189 
190  const String& getFormulaString() const;
192 
193  const std::vector<String>& getMatchingHMDBids() const;
194  void setMatchingHMDBids(const std::vector<String>&);
195 
197  const std::vector<double>& getMasstraceIntensities() const;
198  void setMasstraceIntensities(const std::vector<double>&);
199 
200  double getIsotopesSimScore() const;
201  void setIsotopesSimScore(const double&);
202 
203  // debug/output functions
204  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
205 
206 private:
208  double observed_mz_;
211  double db_mass_;
214  double observed_rt_;
216  std::vector<double> individual_intensities_;
219 
222  std::vector<String> matching_hmdb_ids_;
223 
224  std::vector<double> mass_trace_intensities_;
226  };
227 
228  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
229 
261  class OPENMS_DLLAPI AccurateMassSearchEngine :
262  public DefaultParamHandler,
263  public ProgressLogger
264  {
265 public:
266 
268  static const char* search_engine_identifier;
269 
272 
275 
281  void queryByMZ(const double& observed_mz, const Int& observed_charge, const String& ion_mode, std::vector<AccurateMassSearchResult>& results, const EmpiricalFormula& observed_adduct = EmpiricalFormula()) const;
282  void queryByFeature(const Feature& feature, const Size& feature_index, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
283  void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
284 
287  void run(FeatureMap&, MzTab&) const;
288 
292  void run(ConsensusMap&, MzTab&) const;
293 
295  void init();
296 
297 protected:
298  void updateMembers_() override;
299 
300 private:
302 
305  template <typename MAPTYPE> String resolveAutoMode_(const MAPTYPE& map) const
306  {
307  String ion_mode_internal;
308  String ion_mode_detect_msg = "";
309  if (map.size() > 0)
310  {
311  if (map[0].metaValueExists("scan_polarity"))
312  {
313  StringList pols = ListUtils::create<String>(String(map[0].getMetaValue("scan_polarity")), ';');
314  if (pols.size() == 1 && pols[0].size() > 0)
315  {
316  pols[0].toLower();
317  if (pols[0] == "positive" || pols[0] == "negative")
318  {
319  ion_mode_internal = pols[0];
320  OPENMS_LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
321  }
322  else ion_mode_detect_msg = String("Meta value 'scan_polarity' does not contain unknown ion mode") + String(map[0].getMetaValue("scan_polarity"));
323  }
324  else
325  {
326  ion_mode_detect_msg = String("ambiguous ion mode: ") + String(map[0].getMetaValue("scan_polarity"));
327  }
328  }
329  else
330  {
331  ion_mode_detect_msg = String("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
332  }
333  }
334  else
335  { // do nothing, since map is
336  OPENMS_LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
337  }
338 
339  if (ion_mode_detect_msg.size() > 0)
340  {
341  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
342  }
343 
344  return ion_mode_internal;
345  }
346 
349  void parseAdductsFile_(const String& filename, std::vector<AdductInfo>& result);
350  void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
351 
353  void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
354 
357  double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
358 
359  double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
360 
361  typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
362 
363  void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out) const;
364 
366  typedef std::vector<std::vector<String> > MassIDMapping;
367  typedef std::map<String, std::vector<String> > HMDBPropsMapping;
368 
370  {
371  double mass;
372  std::vector<String> massIDs;
374  };
375  std::vector<MappingEntry_> mass_mappings_;
376 
377  struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
378  {
379  double asMass(const MappingEntry_& v) const
380  {
381  return v.mass;
382  }
383 
384  double asMass(double t) const
385  {
386  return t;
387  }
388 
389  template <typename T1, typename T2>
390  bool operator()(T1 const& t1, T2 const& t2) const
391  {
392  return asMass(t1) < asMass(t2);
393  }
394 
395  };
396 
398 
400 
406 
409 
412 
413  std::vector<AdductInfo> pos_adducts_;
414  std::vector<AdductInfo> neg_adducts_;
415 
418 
420  };
421 
422 }
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
An algorithm to search for exact mass matches from a spectrum against a database (e....
Definition: AccurateMassSearchEngine.h:264
void init()
parse database and adduct files
void parseStructMappingFile_(const StringList &)
std::vector< std::vector< String > > MassIDMapping
private member variables
Definition: AccurateMassSearchEngine.h:366
String pos_adducts_fname_
Definition: AccurateMassSearchEngine.h:407
bool keep_unidentified_masses_
Definition: AccurateMassSearchEngine.h:419
void exportMzTab_(const QueryResultsTable &overall_results, const Size number_of_maps, MzTab &mztab_out) const
double mass
Definition: AccurateMassSearchEngine.h:371
void queryByConsensusFeature(const ConsensusFeature &cfeat, const Size &cf_index, const Size &number_of_maps, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
AccurateMassSearchEngine()
Default constructor.
String mass_error_unit_
Definition: AccurateMassSearchEngine.h:403
StringList db_struct_file_
Definition: AccurateMassSearchEngine.h:411
~AccurateMassSearchEngine() override
Default destructor.
double mass_error_value_
parameter stuff
Definition: AccurateMassSearchEngine.h:402
void run(ConsensusMap &, MzTab &) const
void searchMass_(double neutral_query_mass, double diff_mass, std::pair< Size, Size > &hit_indices) const
void queryByMZ(const double &observed_mz, const Int &observed_charge, const String &ion_mode, std::vector< AccurateMassSearchResult > &results, const EmpiricalFormula &observed_adduct=EmpiricalFormula()) const
search for a specific observed mass by enumerating all possible adducts and search M+X against databa...
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition: AccurateMassSearchEngine.h:361
void annotate_(const std::vector< AccurateMassSearchResult > &, BaseFeature &) const
add search results to a Consensus/Feature
static const char * search_engine_identifier
uses 'AccurateMassSearchEngine' as search engine id for protein and peptide ids which are generated b...
Definition: AccurateMassSearchEngine.h:268
bool iso_similarity_
Definition: AccurateMassSearchEngine.h:405
std::vector< AdductInfo > pos_adducts_
Definition: AccurateMassSearchEngine.h:413
String neg_adducts_fname_
Definition: AccurateMassSearchEngine.h:408
HMDBPropsMapping hmdb_properties_mapping_
Definition: AccurateMassSearchEngine.h:397
std::vector< String > massIDs
Definition: AccurateMassSearchEngine.h:372
String ion_mode_
Definition: AccurateMassSearchEngine.h:404
double computeIsotopePatternSimilarity_(const Feature &feat, const EmpiricalFormula &form) const
void parseMappingFile_(const StringList &)
String database_version_
Definition: AccurateMassSearchEngine.h:417
double computeCosineSim_(const std::vector< double > &x, const std::vector< double > &y) const
bool is_initialized_
true if init_() was called without any subsequent param changes
Definition: AccurateMassSearchEngine.h:399
StringList db_mapping_file_
Definition: AccurateMassSearchEngine.h:410
std::vector< AdductInfo > neg_adducts_
Definition: AccurateMassSearchEngine.h:414
String database_name_
Definition: AccurateMassSearchEngine.h:416
String formula
Definition: AccurateMassSearchEngine.h:373
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
void run(FeatureMap &, MzTab &) const
String resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition: AccurateMassSearchEngine.h:305
std::map< String, std::vector< String > > HMDBPropsMapping
Definition: AccurateMassSearchEngine.h:367
void queryByFeature(const Feature &feature, const Size &feature_index, const String &ion_mode, std::vector< AccurateMassSearchResult > &results) const
void parseAdductsFile_(const String &filename, std::vector< AdductInfo > &result)
std::vector< MappingEntry_ > mass_mappings_
Definition: AccurateMassSearchEngine.h:375
Definition: AccurateMassSearchEngine.h:370
Definition: AccurateMassSearchEngine.h:113
const std::vector< String > & getMatchingHMDBids() const
std::vector< double > mass_trace_intensities_
Definition: AccurateMassSearchEngine.h:224
std::vector< double > getIndividualIntensities() const
get the observed intensities
void setMasstraceIntensities(const std::vector< double > &)
std::vector< String > matching_hmdb_ids_
Definition: AccurateMassSearchEngine.h:222
double getObservedMZ() const
get the m/z of the small molecule + adduct
std::vector< double > individual_intensities_
Definition: AccurateMassSearchEngine.h:216
AccurateMassSearchResult & operator=(const AccurateMassSearchResult &)
assignment operator
double getFoundMass() const
get the mass returned by the query (uncharged small molecule)
void setQueryMass(const double &)
set the mass used to query the database (uncharged small molecule)
const String & getFoundAdduct() const
double observed_intensity_
Definition: AccurateMassSearchEngine.h:215
void setIsotopesSimScore(const double &)
void setSourceFeatureIndex(const Size &)
const String & getFormulaString() const
double searched_mass_
Definition: AccurateMassSearchEngine.h:210
void setFoundMass(const double &)
set the mass returned by the query (uncharged small molecule)
double theoretical_mz_
Definition: AccurateMassSearchEngine.h:209
AccurateMassSearchResult()
Default constructor.
double getMZErrorPPM() const
get the error between observed and theoretical m/z in ppm
double observed_rt_
Definition: AccurateMassSearchEngine.h:214
Int getCharge() const
get the charge
double isotopes_sim_score_
Definition: AccurateMassSearchEngine.h:225
~AccurateMassSearchResult()
Default destructor.
String empirical_formula_
Definition: AccurateMassSearchEngine.h:221
void setCharge(const Int &)
set the charge
friend std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
void setFoundAdduct(const String &)
double getQueryMass() const
get the mass used to query the database (uncharged small molecule)
void setObservedMZ(const double &)
set the m/z of the small molecule + adduct
AccurateMassSearchResult(const AccurateMassSearchResult &)
copy constructor
void setEmpiricalFormula(const String &)
void setMZErrorPPM(const double)
set the error between observed and theoretical m/z in ppm
double getObservedRT() const
get the observed rt
double getObservedIntensity() const
get the observed intensity
String found_adduct_
Definition: AccurateMassSearchEngine.h:220
double observed_mz_
Stored information/results of DB query.
Definition: AccurateMassSearchEngine.h:208
double db_mass_
Definition: AccurateMassSearchEngine.h:211
void setMatchingIndex(const Size &)
Size source_feature_index_
Definition: AccurateMassSearchEngine.h:218
Size matching_index_
Definition: AccurateMassSearchEngine.h:217
void setObservedIntensity(const double &)
set the observed intensity
void setMatchingHMDBids(const std::vector< String > &)
double mz_error_ppm_
Definition: AccurateMassSearchEngine.h:213
const std::vector< double > & getMasstraceIntensities() const
return trace intensities of the underlying feature;
void setObservedRT(const double &rt)
set the observed rt
Int charge_
Definition: AccurateMassSearchEngine.h:212
void setCalculatedMZ(const double &)
set the theoretical m/z of the small molecule + adduct
void setIndividualIntensities(const std::vector< double > &)
set the observed intensities
double getCalculatedMZ() const
get the theoretical m/z of the small molecule + adduct
Definition: AccurateMassSearchEngine.h:58
AdductInfo(const String &name, const EmpiricalFormula &adduct, int charge, UInt mol_multiplier=1)
double mass_
computed from ef_.getMonoWeight(), but stored explicitly for efficiency
Definition: AccurateMassSearchEngine.h:107
EmpiricalFormula ef_
EF for the actual adduct e.g. 'H' in 2M+H;+1.
Definition: AccurateMassSearchEngine.h:106
const EmpiricalFormula & getEmpiricalFormula() const
EF of adduct itself. Useful for comparison with feature adduct annotation.
String name_
members
Definition: AccurateMassSearchEngine.h:105
UInt getMolMultiplier() const
get molecular multiplier (mono, dimer, trimer)
double getNeutralMass(double observed_mz) const
returns the neutral mass of the small molecule without adduct (creates monomer from nmer,...
int getCharge() const
get charge of adduct
static AdductInfo parseAdductString(const String &adduct)
UInt mol_multiplier_
Mol multiplier, e.g. 2 in 2M+H;+1.
Definition: AccurateMassSearchEngine.h:109
int charge_
negative or positive charge; must not be 0
Definition: AccurateMassSearchEngine.h:108
const String & getName() const
original string used for parsing
bool isCompatible(EmpiricalFormula db_entry) const
double getMZ(double neutral_mass) const
returns the m/z of the small molecule with neutral mass neutral_mass if the adduct is added (given ma...
AdductInfo()
hide default C'tor
A basic LC-MS feature.
Definition: BaseFeature.h:58
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:71
A container for consensus elements.
Definition: ConsensusMap.h:88
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Representation of an empirical formula.
Definition: EmpiricalFormula.h:83
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:341
A container for features.
Definition: FeatureMap.h:105
An LC-MS feature.
Definition: Feature.h:72
static String basename(const String &file)
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:809
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:55
A more convenient string class.
Definition: String.h:61
int Int
Signed integer type.
Definition: Types.h:102
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
Definition: AccurateMassSearchEngine.h:378
bool operator()(T1 const &t1, T2 const &t2) const
Definition: AccurateMassSearchEngine.h:390
double asMass(double t) const
Definition: AccurateMassSearchEngine.h:384
double asMass(const MappingEntry_ &v) const
Definition: AccurateMassSearchEngine.h:379