OpenMS  3.0.0
NuXLFeatureAugmentation.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <stdexcept>
42 #include <vector>
43 
44 namespace OpenMS
45 {
46  class OPENMS_DLLAPI NuXLFeatureAugmentation
47  {
48  public:
49  static void augment(std::vector<PeptideIdentification>& pep_ids,
50  std::vector<std::string> positive_weights,
51  std::vector<std::string> negative_weights)
52  {
53  // only for XLs? because they are fewer?
54  if (pep_ids.empty()) return;
55 
56  if (pep_ids[0].getHits().empty()) return;
57 
58  // feature names may not intersect between postive/negative constrained
59  std::sort(positive_weights.begin(), positive_weights.end());
60  std::sort(negative_weights.begin(), negative_weights.end());
61  std::vector<std::string> v_intersection;
62 
63  std::set_intersection(positive_weights.begin(), positive_weights.end(),
64  negative_weights.begin(), negative_weights.end(),
65  std::back_inserter(v_intersection));
66 
67  if (!v_intersection.empty()) throw std::runtime_error("Positive and negative weights may not overlap.");
68 
69  // use first PSM as template
70  auto p_template = pep_ids[0].getHits()[0];
71  p_template.setScore(0);
72  std::vector<String> keys;
73  p_template.getKeys(keys);
74  p_template.setMetaValue("NuXL:augmented", "true");
75 
76  // clear scores of template
77  for (const auto& k : keys)
78  {
79  if (p_template.getMetaValue(k).valueType() == DataValue::INT_VALUE) p_template.setMetaValue(k, 0);
80  if (p_template.getMetaValue(k).valueType() == DataValue::DOUBLE_VALUE) p_template.setMetaValue(k, 0.0);
81  }
82 
83  // determine minium and maximum for each of the positive constrained features
84  std::map<String, double> minima;
85  std::map<String, double> maxima;
86 
87  for (const auto& k : positive_weights)
88  {
89  minima[k] = 1e32;
90  maxima[k] = -1e32;
91  }
92 
93  for (const auto& pid : pep_ids)
94  {
95  for (const auto& ph : pid.getHits())
96  {
97  for (const auto& k : positive_weights)
98  {
99  auto dv = ph.getMetaValue(k);
100  if (dv.valueType() == DataValue::INT_VALUE)
101  {
102  if (minima[k] > (int)dv) minima[k] = (int)dv;
103  if (maxima[k] < (int)dv) maxima[k] = (int)dv;
104  };
105  if (dv.valueType() == DataValue::DOUBLE_VALUE)
106  {
107  if (minima[k] > (double)dv) minima[k] = (double)dv;
108  if (maxima[k] < (double)dv) maxima[k] = (double)dv;
109  };
110  }
111  }
112  }
113 
114  size_t c = 0;
115  // for each positive_weight feature, create one example with that feature set to max value
116  for (const auto& s : positive_weights)
117  {
118  auto p = p_template;
119  p.setMetaValue(s, maxima[s] + 1000.0 * (maxima[s] - minima[s])); // set feature value to >> maximum of observed ones
120  std::vector<PeptideHit> phs;
121  phs.push_back(p);
122  PeptideIdentification pid = pep_ids[0];
123  pid.setRT(1e6 + c); // RT of augmented example
124  pid.setHits(phs);
125  pep_ids.push_back(pid);
126  ++c;
127  }
128 
129  // for each negative_weight feature, create one example with that feature set to min value
130  for (const auto& s : negative_weights)
131  {
132  auto p = p_template;
133  p.setMetaValue(s, minima[s] - 1000.0 * (maxima[s] - minima[s])); // set feature value to << min of observed ones
134  std::vector<PeptideHit> phs;
135  phs.push_back(p);
136  PeptideIdentification pid = pep_ids[0];
137  pid.setRT(1e6 + c); // RT of augmented example
138  pid.setHits(phs);
139  pep_ids.push_back(pid);
140  ++c;
141  }
142  }
143 
144  static void removeAugmented(std::vector<PeptideIdentification>& pep_ids)
145  {
146  // remove augmented features again
147  for (auto& pid : pep_ids)
148  {
149  auto& phs = pid.getHits();
150  phs.erase(remove_if(phs.begin(), phs.end(), [](const PeptideHit& ph){ return ph.metaValueExists("NuXL:augmented"); }), phs.end());
151  }
152  }
153  };
154 }
155 
const double k
Definition: Constants.h:158
Definition: NuXLFeatureAugmentation.h:46
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
const double c
Definition: Constants.h:214
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Representation of a peptide hit.
Definition: PeptideHit.h:55
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
integer value
Definition: DataValue.h:70
static void removeAugmented(std::vector< PeptideIdentification > &pep_ids)
Definition: NuXLFeatureAugmentation.h:144
static void augment(std::vector< PeptideIdentification > &pep_ids, std::vector< std::string > positive_weights, std::vector< std::string > negative_weights)
Definition: NuXLFeatureAugmentation.h:49
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
double value
Definition: DataValue.h:71