OpenMS  2.7.0
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2021.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <vector>
42 #include <set>
43 
44 namespace OpenMS
45 {
46 
47  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
48  public DefaultParamHandler
49  {
50  public:
52 
57  enum class ScoreType
58  {
59  RAW,
60  RAW_EVAL,
61  PP,
62  PEP,
63  FDR,
64  QVAL,
65  };
66 
68  bool isScoreType(const String& score_name, const ScoreType& type)
69  {
70  const std::set<String>& possible_types = type_to_str_[type];
71  return possible_types.find(score_name) != possible_types.end();
72  }
73 
76  template <typename IDType>
77  void switchScores(IDType& id, Size& counter)
78  {
79  for (typename std::vector<typename IDType::HitType>::iterator hit_it = id.getHits().begin();
80  hit_it != id.getHits().end(); ++hit_it, ++counter)
81  {
82  if (!hit_it->metaValueExists(new_score_))
83  {
84  std::stringstream msg;
85  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
86  throw Exception::MissingInformation(__FILE__, __LINE__,
87  OPENMS_PRETTY_FUNCTION, msg.str());
88  }
89 
90  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
91  old_score_);
92  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
93  if (!dv.isEmpty()) // meta value for old score already exists
94  {
95  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
96  (double(dv) + hit_it->getScore())) > tolerance_)
97  {
98  std::stringstream msg;
99  msg << "Meta value '" << old_score_meta << "' already exists "
100  << "with a conflicting value for " << *hit_it;
101  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
102  msg.str(), dv.toString());
103  } // else: values match, nothing to do
104  }
105  else
106  {
107  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
108  }
109  hit_it->setScore(hit_it->getMetaValue(new_score_));
110  }
111  id.setScoreType(new_score_type_);
112  id.setHigherScoreBetter(higher_better_);
113  }
114 
118  void switchToGeneralScoreType(std::vector<PeptideIdentification>& id, ScoreType type, Size& counter)
119  {
120  if (id.empty()) return;
121  String t = findScoreType(id[0], type);
122  if (t.empty())
123  {
124  String msg = "First encountered ID does not have the requested score type.";
125  throw Exception::MissingInformation(__FILE__, __LINE__,
126  OPENMS_PRETTY_FUNCTION, msg);
127  }
128  else if (t == id[0].getScoreType())
129  {
130  // we assume that all the other peptide ids
131  // also already have the correct score set
132  return;
133  }
134 
135  if (t.hasSuffix("_score"))
136  {
137  new_score_type_ = t.chop(6);
138  }
139  else
140  {
141  new_score_type_ = t;
142  }
143  new_score_ = t;
144 
145  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
146  {
147  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
148  higher_better_ = type_to_better_[type];
149  }
150  for (auto& i : id)
151  {
152  switchScores(i, counter);
153  }
154  }
155 
159  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
160  {
161  String new_type = "";
162  for (const auto& f : cmap)
163  {
164  const auto& ids = f.getPeptideIdentifications();
165  if (!ids.empty())
166  {
167  new_type = findScoreType(ids[0], type);
168  if (new_type == ids[0].getScoreType())
169  {
170  return;
171  }
172  else
173  {
174  break;
175  }
176  }
177  }
178 
179  if (new_type.empty())
180  {
181  String msg = "First encountered ID does not have the requested score type.";
182  throw Exception::MissingInformation(__FILE__, __LINE__,
183  OPENMS_PRETTY_FUNCTION, msg);
184  }
185 
186  if (new_type.hasSuffix("_score"))
187  {
188  new_score_type_ = new_type.chop(6);
189  }
190  else
191  {
192  new_score_type_ = new_type;
193  }
194  new_score_ = new_type;
195 
196  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
197  {
198  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
199  higher_better_ = type_to_better_[type];
200  }
201 
202  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
203  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
204  }
205 
206 
208  template <typename IDType>
210  {
211  const String& curr_score_type = id.getScoreType();
212  const std::set<String>& possible_types = type_to_str_[type];
213  if (possible_types.find(curr_score_type) != possible_types.end())
214  {
215  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
216  return curr_score_type;
217  }
218  else
219  {
220  if (id.getHits().empty())
221  {
222  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
223  return "";
224  }
225  const auto& hit = id.getHits()[0];
226  for (const auto& poss_str : possible_types)
227  {
228  if (hit.metaValueExists(poss_str)) return poss_str;
229  else if (hit.metaValueExists(poss_str + "_score")) return poss_str + "_score";
230  }
231  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
232  return "";
233  }
234  }
235 
236  private:
237  void updateMembers_() override;
238 
240  const double tolerance_ = 1e-6;
241 
243  String new_score_, new_score_type_, old_score_;
245  bool higher_better_; // for the new scores, are higher ones better?
246 
248  std::map<ScoreType, std::set<String>> type_to_str_ =
249  {
250  {ScoreType::RAW, {"XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh"}},
251  //TODO find out reasonable raw scores for SES that provide E-Values as main score or see below
252  //TODO there is no test for spectraST idXML, so I don't know its score
253  //TODO check if we should combine RAW and RAW_EVAL:
254  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
255  // then you need additional if's/try's
256  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
257  {ScoreType::PP, {"Posterior Probability"}},
258  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
259  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
260  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
261  };
262 
264  std::map<ScoreType, bool> type_to_better_ =
265  {
266  {ScoreType::RAW, true}, //TODO this might actually not always be true
267  {ScoreType::RAW_EVAL, false},
268  {ScoreType::PP, true},
269  {ScoreType::PEP, false},
270  {ScoreType::FDR, false},
271  {ScoreType::QVAL, false}
272  };
273  };
274 } // namespace OpenMS
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
A container for consensus elements.
Definition: ConsensusMap.h:88
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:59
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:379
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:93
Invalid value exception.
Definition: Exception.h:329
Not all required information provided.
Definition: Exception.h:189
Definition: IDScoreSwitcherAlgorithm.h:49
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
finds a certain score type in an ID and its metavalues if present, otherwise returns empty string
Definition: IDScoreSwitcherAlgorithm.h:209
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:159
void switchScores(IDType &id, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:77
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:58
void updateMembers_() override
This method is used to update extra member variables at the end of the setParameters() method.
String new_score_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:243
void switchToGeneralScoreType(std::vector< PeptideIdentification > &id, ScoreType type, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:118
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:245
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:68
void applyFunctionOnPeptideIDs(T &&f, bool include_unassigned=true)
applies a function on all PeptideIDs or only assigned ones
Definition: MapUtilities.h:68
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:65
A more convenient string class.
Definition: String.h:61
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47