OpenMS  3.0.0
NuXLModificationsGenerator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 #include <vector>
42 #include <map>
43 #include <set>
44 #include <iostream>
45 
46 namespace OpenMS
47 {
48  class AASequence;
49 
50  /*
51  formula2mass holds the map from empirical formula to mass
52 
53  mod_combinations holds the map from empirical formula to (potentially ambigious) nucleotide formulae
54  e.g.,:
55  C10H14N5O7P -> {A}
56  C10H14N5O8P -> {G}
57  C18H22N4O16P2 -> { CU-H3N1, UU-H2O1 }
58  */
59  struct OPENMS_DLLAPI NuXLModificationMassesResult
60  {
62  {
63  bool operator () (const std::string & p_lhs, const std::string & p_rhs) const
64  {
65  const size_t lhsLength = p_lhs.length() ;
66  const size_t rhsLength = p_rhs.length() ;
67  if(lhsLength == rhsLength)
68  {
69  return (p_lhs < p_rhs) ; // when two strings have the same
70  // length, defaults to the normal
71  // string comparison
72  }
73  return (lhsLength < rhsLength) ; // compares with the length
74  }
75  };
76  std::map<String, double> formula2mass;
77 
78  using NucleotideFormulas = std::set<String, MyStringLengthCompare>;
79  using MapSumFormulaToNucleotideFormulas = std::map<String, NucleotideFormulas>;
81  };
82 
83  class OPENMS_DLLAPI NuXLModificationsGenerator
84  {
85  public:
86  /* @brief generate all combinations of precursor adducts
87  @param target_nucleotides the list of nucleotides: e.g., "U", "C", "G", "A" or "U", "T", "G", "A"
88  @param can_xl the set of cross-linkable nucleotides
89  @param mappings
90  @param modifications additional losses associated with the precursor adduct: e.g., "-H2O"
91  @param sequence_restriction only precursor adducts that are substrings of this NA sequence are generated
92  @param cysteine_adduct special DTT adduct
93  @param max_length maximum oligo length
94  */
95  static NuXLModificationMassesResult initModificationMassesNA(const StringList& target_nucleotides,
96  const StringList& nt_groups,
97  const std::set<char>& can_xl,
98  const StringList& mappings,
99  const StringList& modifications,
100  String sequence_restriction = "",
101  bool cysteine_adduct = false,
102  Int max_length = 4);
103  private:
105  static bool notInSeq(const String& res_seq, const String& query);
106 
107  static void generateTargetSequences(const String& res_seq, Size param_pos, const std::map<char, std::vector<char> >& map_source2target, StringList& target_sequences);
108  };
109 }
110 
A more convenient string class.
Definition: String.h:58
Definition: NuXLModificationsGenerator.h:59
Definition: NuXLModificationsGenerator.h:83
std::map< String, NucleotideFormulas > MapSumFormulaToNucleotideFormulas
Definition: NuXLModificationsGenerator.h:79
MapSumFormulaToNucleotideFormulas mod_combinations
empirical formula -> nucleotide formula(s) (formulas if modifications lead to ambiguities) ...
Definition: NuXLModificationsGenerator.h:80
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
std::set< String, MyStringLengthCompare > NucleotideFormulas
Definition: NuXLModificationsGenerator.h:78
Definition: NuXLModificationsGenerator.h:61
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
std::map< String, double > formula2mass
empirical formula -> mass
Definition: NuXLModificationsGenerator.h:76
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
int Int
Signed integer type.
Definition: Types.h:102