OpenMS  2.4.0
SimpleSearchEngineAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2018.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
37 
41 
45 
47 
49 
50 // preprocessing and filtering
57 
59 #include <OpenMS/FORMAT/MzMLFile.h>
61 #include <OpenMS/FORMAT/MzMLFile.h>
62 
65 #include <OpenMS/KERNEL/Peak1D.h>
67 
69 
70 #include <map>
71 #include <algorithm>
72 
73 #ifdef _OPENMP
74  #include <omp.h>
75  #define NUMBER_OF_THREADS (omp_get_num_threads())
76 #else
77  #define NUMBER_OF_THREADS (1)
78 #endif
79 
80 
81 namespace OpenMS
82 {
83 
84 class OPENMS_DLLAPI SimpleSearchEngineAlgorithm :
85  public DefaultParamHandler,
86  public ProgressLogger
87 {
88  public:
90 
92  enum class ExitCodes
93  {
94  EXECUTION_OK,
95  INPUT_FILE_EMPTY,
96  UNEXPECTED_RESULT,
97  UNKNOWN_ERROR,
98  ILLEGAL_PARAMETERS
99  };
100 
101  // @brief search spectra against database
102  ExitCodes search(const String& in_mzML,
103  const String& in_db,
104  std::vector<ProteinIdentification>& prot_ids,
105  std::vector<PeptideIdentification>& pep_ids) const;
106  protected:
107  void updateMembers_() override;
108 
111  {
113  SignedSize peptide_mod_index; // enumeration index of the non-RNA peptide modification
114  double score = 0; // main score
115  std::vector<PeptideHit::PeakAnnotation> fragment_annotations;
116  static bool hasBetterScore(const AnnotatedHit_& a, const AnnotatedHit_& b)
117  {
118  return a.score > b.score;
119  }
120  };
121 
122  // @brief filter, deisotope, decharge spectra
123  static void preprocessSpectra_(PeakMap& exp, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm);
124 
125  // @brief filter and annotate search results
126  // most of the parameters are used to properly add meta data to the id objects
127  static void postProcessHits_(const PeakMap& exp,
128  std::vector<std::vector<SimpleSearchEngineAlgorithm::AnnotatedHit_> >& annotated_hits,
129  std::vector<ProteinIdentification>& protein_ids,
130  std::vector<PeptideIdentification>& peptide_ids,
131  Size top_hits,
132  const std::vector<ResidueModification>& fixed_modifications,
133  const std::vector<ResidueModification>& variable_modifications,
134  Size max_variable_mods_per_peptide,
135  const StringList& modifications_fixed,
136  const StringList& modifications_variable,
137  Int peptide_missed_cleavages,
138  double precursor_mass_tolerance,
139  double fragment_mass_tolerance,
140  const String& precursor_mass_tolerance_unit_ppm,
141  const String& fragment_mass_tolerance_unit_ppm,
142  const Int precursor_min_charge,
143  const Int precursor_max_charge,
144  const String& enzyme,
145  const String& database_name);
146 
147  // @brief helper to retrieve modifications by name
148  static std::vector<ResidueModification> getModifications_(const StringList& modNames);
149 
152 
155 
157 
159 
161 
163 
165 
167 
169 
173 
175 
177 };
178 
179 } // namespace
180 
String precursor_mass_tolerance_unit_
Definition: SimpleSearchEngineAlgorithm.h:151
StringView sequence
Definition: SimpleSearchEngineAlgorithm.h:112
A more convenient string class.
Definition: String.h:58
Slimmer structure as storing all scored candidates in PeptideHit objects takes too much space...
Definition: SimpleSearchEngineAlgorithm.h:110
String peptide_motif_
Definition: SimpleSearchEngineAlgorithm.h:174
std::vector< Int > IntList
Vector of signed integers.
Definition: ListUtils.h:58
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Size peptide_min_size_
Definition: SimpleSearchEngineAlgorithm.h:170
static bool hasBetterScore(const AnnotatedHit_ &a, const AnnotatedHit_ &b)
Definition: SimpleSearchEngineAlgorithm.h:116
String enzyme_
Definition: SimpleSearchEngineAlgorithm.h:168
std::vector< PeptideHit::PeakAnnotation > fragment_annotations
Definition: SimpleSearchEngineAlgorithm.h:115
double fragment_mass_tolerance_
Definition: SimpleSearchEngineAlgorithm.h:158
Size peptide_missed_cleavages_
Definition: SimpleSearchEngineAlgorithm.h:172
SignedSize peptide_mod_index
Definition: SimpleSearchEngineAlgorithm.h:113
double precursor_mass_tolerance_
Definition: SimpleSearchEngineAlgorithm.h:150
Size modifications_max_variable_mods_per_peptide_
Definition: SimpleSearchEngineAlgorithm.h:166
StringList modifications_variable_
Definition: SimpleSearchEngineAlgorithm.h:164
Size precursor_min_charge_
Definition: SimpleSearchEngineAlgorithm.h:153
Size precursor_max_charge_
Definition: SimpleSearchEngineAlgorithm.h:154
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:73
In-Memory representation of a mass spectrometry experiment.
Definition: MSExperiment.h:77
Size report_top_hits_
Definition: SimpleSearchEngineAlgorithm.h:176
double score
Definition: SimpleSearchEngineAlgorithm.h:114
IntList precursor_isotopes_
Definition: SimpleSearchEngineAlgorithm.h:156
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
String fragment_mass_tolerance_unit_
Definition: SimpleSearchEngineAlgorithm.h:160
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:91
Definition: SimpleSearchEngineAlgorithm.h:84
StringList modifications_fixed_
Definition: SimpleSearchEngineAlgorithm.h:162
int Int
Signed integer type.
Definition: Types.h:102
StringView provides a non-owning view on an existing string.
Definition: String.h:489
Size peptide_max_size_
Definition: SimpleSearchEngineAlgorithm.h:171
ExitCodes
Exit codes.
Definition: SimpleSearchEngineAlgorithm.h:92